Exemple #1
0
class ML_ExponentialM1_Red(ML_Function("ml_expm1")):
  def __init__(self, args):
    # initializing base class
    ML_FunctionBasis.__init__(self, args)
    self.accuracy  = args.accuracy


  @staticmethod
  def get_default_args(**kw):
    """ Return a structure containing the arguments for ML_ExponentialM1_Red,
        builtin from a default argument mapping overloaded with @p kw """
    default_args_expm1 = {
        "output_file": "my_expm1.c",
        "function_name": "my_expm1",
        "precision": ML_Binary32,
        "accuracy": ML_Faithful,
        "target": GenericProcessor()
    }
    default_args_expm1.update(kw)
    return DefaultArgTemplate(**default_args_expm1)

  def generate_scheme(self):
    # declaring target and instantiating optimization engine

    vx = self.implementation.add_input_variable("x", self.precision)
    
    Log.set_dump_stdout(True)
    
    Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
    if self.debug_flag: 
        Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")
    
    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)
    
    C_m1 = Constant(-1, precision = self.precision)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool)
    test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool)
    test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False);
    
    #  Infnty input
    infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1)))
    #  non-std input (inf/nan)
    specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return)
    
    # Over/Underflow Tests
    
    precision_emax = self.precision.get_emax()
    precision_max_value = S2**(precision_emax + 1)
    expm1_overflow_bound = ceil(log(precision_max_value + 1))
    overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool)
    overflow_return = Statement(Return(FP_PlusInfty(self.precision)))
    
    precision_emin = self.precision.get_emin_subnormal()
    precision_min_value = S2** precision_emin
    expm1_underflow_bound = floor(log(precision_min_value) + 1)
    underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool)
    underflow_return = Statement(Return(C_m1))
    
    sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision]
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision]
    
    # Constants
    
    log_2 = round(log(2), sollya_precision, sollya.RN)
    invlog2 = round(1/log(2), sollya_precision, sollya.RN)
    log_2_cst = Constant(log_2, precision = self.precision)
    
    interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound)
    interval_fk = interval_vx * invlog2
    interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))
    
    log2_hi_precision = self.precision.get_field_size() - 6
    log2_hi = round(log(2), log2_hi_precision, sollya.RN)
    log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN)


    # Reduction
    unround_k = vx * invlog2
    ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik")
    k = Conversion(ik, precision = self.precision, tag = "k")
    
    red_coeff1 = Multiplication(k, log2_hi, precision = self.precision)
    red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision)
    
    pre_sub_mul = Subtraction(vx, red_coeff1, precision  = self.precision)
    
    s = Addition(pre_sub_mul, red_coeff2, precision = self.precision)
    z = Subtraction(s, pre_sub_mul, precision = self.precision)
    t = Subtraction(red_coeff2, z, precision = self.precision)
    
    r = Addition(s, t, precision = self.precision)
    
    r.set_attributes(tag = "r", debug = debug_multi)
    
    r_interval = Interval(-log_2/S2, log_2/S2)
    
    local_ulp = sup(ulp(exp(r_interval), self.precision))
    
    print("ulp: ", local_ulp)
    error_goal = S2**-1*local_ulp
    print("error goal: ", error_goal)
    
    
    # Polynomial Approx
    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
    Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n")
    
    poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1)
    
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
    poly_degree_list = range(0, poly_degree)
    
    precision_list = [self.precision] *(len(poly_degree_list) + 1)
    poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function)
    sub_poly = poly_object.sub_poly(start_index = 2)
    Log.report(Log.Info, "Poly : %s" % sub_poly)
    Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error))))
    pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision)
    poly = r + pre_sub_poly
    poly.set_attributes(tag = "poly", debug = debug_multi)
    
    exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision)
    exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision)
    
    diff = 1 - exp_mk
    diff.set_attributes(tag = "diff", debug = debug_multi) 
    
    # Late Tests
    late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test")
    
    overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
    diff_k = ik - overflow_exp_offset 
    
    exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi)
    exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi)
    
    late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0
    
    late_overflow_return = ConditionBlock(
        Test(late_overflow_result, specifier = Test.IsInfty, likely = False), 
        ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), 
        Return(late_overflow_result)
        )


    late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
    
    underflow_exp_offset = 2 * self.precision.get_field_size()
    corrected_coeff = ik + underflow_exp_offset
    
    exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision)
    exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision)
    
    late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0
    
    test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False)
    
    late_underflow_return = Statement(
        ConditionBlock(
            test_subnormal, 
            ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), 
            Return(late_underflow_result)
            )
    
    # Reconstruction
    
    std_result = exp_k * ( poly + diff )
    std_result.set_attributes(tag = "result", debug = debug_multi)
    
    result_scheme = ConditionBlock(
        late_overflow_test, 
        late_overflow_return, 
        ConditionBlock(
            late_underflow_test, 
            late_underflow_return, 
            Return(std_result)
            )
        )
        
    std_return = ConditionBlock(
        overflow_test, 
        overflow_return, 
        ConditionBlock(
            underflow_test, 
            underflow_return, 
            result_scheme)
        )
        
    scheme = ConditionBlock(
        test_NaN_or_inf, 
        Statement(specific_return), 
        std_return
        )

    return scheme


  def numeric_emulate(self, input_value):
    return expm1(input_value)

  standard_test_cases = [[sollya.parse(x)] for x in ["0x1.9b3216p-2", "0x1.8c108p-2"]]
Exemple #2
0
class ML_UT_M128_Debug(ML_Function("ml_ut_m128_debug")):
  def __init__(self, args=DefaultArgTemplate):
    # initializing base class
    ML_FunctionBasis.__init__(self, args)


  @staticmethod
  def get_default_args(**kw):
    """ Return a structure containing the arguments for current class,
        builtin from a default argument mapping overloaded with @p kw """
    default_args = {
        "output_file": "ut_m128_conversion.c",
        "function_name": "ut_m128_conversion",
        "precision": ML_Binary32,
        "target": X86_AVX2_Processor(),
        "fast_path_extract": True,
        "fuse_fma": True,
        "debug": True,
        "libm_compliant": True,
        "pre_gen_passes": ["m128_promotion"],
    }
    default_args.update(kw)
    return DefaultArgTemplate(**default_args)

  def generate_scheme(self):
    # declaring function input variable
    vx = self.implementation.add_input_variable("x", self.precision)

    add_xx = Addition(vx, vx, precision = self.precision)
    mult = Multiplication(add_xx, vx, precision = self.precision)
    cst  = Constant(1.1, precision = self.precision)

    index_size = 4
    table_size = 2**index_size

    table = ML_NewTable(
      dimensions = [table_size],
      storage_precision = self.precision
    )
    for i in range(table_size):
      table[i] = i

    index = NearestInteger(
      vx,
      precision = ML_Int32
    )
    # index = index % table_size = index & (2**index_size - 1)
    index = BitLogicAnd(
      TypeCast(index,precision=ML_UInt32),
      Constant(2**index_size - 1, precision = ML_UInt32),
      precision=ML_UInt32,
      tag="uindex",
      debug=debug_multi
    )

    index = BitLogicRightShift(
        TypeCast(index, precision=ML_Int32),
        Constant(1, precision=ML_Int32),
        tag="index",
        debug=debug_multi,
        precision=ML_Int32
    )

    table_value = TableLoad(table, index, precision = self.precision)

    int_tree = Multiplication(
        index,
        Addition(
            index,
            Constant(7, precision = ML_Int32),
            precision = ML_Int32
        ),
        precision = ML_Int32
    )

    result = Multiplication(
      table_value,
      FusedMultiplyAdd(
        Addition(
            cst,
            Conversion(int_tree, precision = self.precision),
            precision=self.precision,
            debug=debug_multi,
            tag="fadd"
        ),
        mult,
        add_xx,
        specifier=FusedMultiplyAdd.Subtract,
        precision=self.precision,
        tag="fused",
        debug=debug_multi
      ),
      precision=self.precision,
      debug=debug_multi,
      tag="result"
    )

    scheme = Return(result, precision=self.precision, debug=debug_multi)

    # conv_pass = Pass_M128_Promotion(self.processor)
    # new_scheme = conv_pass.execute(scheme)

    return scheme

  def numeric_emulate(self, x):
    index = int(sollya.nearestint(x)) % 16
    table_value = index >> 1
    add_xx = sollya.round(x + x, self.precision.get_sollya_object(), sollya.RN)
    mult   = sollya.round(add_xx * x, self.precision.get_sollya_object(), sollya.RN)
    cst    = sollya.round(1.1, self.precision.get_sollya_object(), sollya.RN)
    return sollya.round(
      table_value * 
        sollya.round(
          sollya.round(cst * mult, self.precision.get_sollya_object(), sollya.RN) - add_xx , self.precision.get_sollya_object(), sollya.RN), self.precision.get_sollya_object(), sollya.RN
    )
Exemple #3
0
class ML_HyperbolicCosine(ML_Function("ml_cosh")):
    def __init__(self, args=DefaultArgTemplate):
        # initializing base class
        ML_FunctionBasis.__init__(self, args=args)

    @staticmethod
    def get_default_args(**args):
        """ Generate a default argument structure set specifically for
        the Hyperbolic Cosine """
        default_cosh_args = {
            "precision": ML_Binary32,
            "accuracy": ML_Faithful,
            "target": GenericProcessor(),
            "output_file": "my_cosh.c",
            "function_name": "my_cosh",
            "language": C_Code,
            "vector_size": 1
        }
        default_cosh_args.update(args)
        return DefaultArgTemplate(**default_cosh_args)

    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        vx = Abs(vx)
        int_precision = self.precision.get_integer_format()

        # argument reduction
        arg_reg_value = log(2) / 2**index_size
        inv_log2_value = round(1 / arg_reg_value,
                               self.precision.get_sollya_object(), RN)
        inv_log2_cst = Constant(inv_log2_value,
                                precision=self.precision,
                                tag="inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2  for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision(
        ) - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value,
                              self.precision.get_sollya_object(), RN)
        log2_hi_value_cst = Constant(log2_hi_value,
                                     tag="log2_hi_value",
                                     precision=self.precision)
        log2_lo_value_cst = Constant(log2_lo_value,
                                     tag="log2_lo_value",
                                     precision=self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision)
        k_log2 = Multiplication(k,
                                log2_hi_value_cst,
                                precision=self.precision,
                                exact=True,
                                tag="k_log2",
                                unbreakable=True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag="r", debug=debug_multi)

        r_eval_error = self.get_eval_error(
            r_hi,
            variable_copy_map={
                vx:
                Variable("vx",
                         interval=Interval(0, 715),
                         precision=self.precision),
                k:
                Variable("k",
                         interval=Interval(0, 1024),
                         precision=self.precision)
            })
        print("r_eval_error: ", r_eval_error)

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(
            guessdegree(exp(sollya.x), approx_interval, error_goal_approx))
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k,
                               precision=int_precision,
                               tag="k_integer",
                               debug=debug_multi)
        k_hi = BitLogicRightShift(k_integer,
                                  Constant(index_size),
                                  tag="k_int_hi",
                                  precision=int_precision,
                                  debug=debug_multi)
        k_lo = Modulo(k_integer,
                      2**index_size,
                      tag="k_int_lo",
                      precision=int_precision,
                      debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp_table = ML_NewTable(dimensions=[2 * 2**index_size, 4],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i

            reduced_hi_prec = int(self.precision.get_mantissa_size() * 2 / 3.0)
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value = sollya.SollyaObject(2)**((input_value) *
                                                 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value) *
                                                  2**-index_size)
            pos_value_hi = round(exp_value, reduced_hi_prec, RN)
            pos_value_lo = round(exp_value - pos_value_hi,
                                 self.precision.get_sollya_object(), RN)
            neg_value_hi = round(mexp_value, reduced_hi_prec, RN)
            neg_value_lo = round(mexp_value - neg_value_hi,
                                 self.precision.get_sollya_object(), RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # cosh(x) = 1/2 * (exp(x) + exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value)
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        # S=2^(h-1), T = 2^(-h-1)
        # exp(r)  = 1 + poly_pos(r)
        # exp(-r) = 1 + poly_neg(r)
        # 2^(l / 2^index_size)  = pos_value_hi + pos_value_lo
        # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo
        #
        # cosh(x) =
        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            exp(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print("poly_approx_error: ", poly_approx_error,
              float(log2(poly_approx_error)))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            r,
            unified_precision=self.precision)
        poly_pos.set_attributes(tag="poly_pos", debug=debug_multi)

        poly_neg = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            -r,
            unified_precision=self.precision)
        poly_neg.set_attributes(tag="poly_neg", debug=debug_multi)

        table_index = Addition(k_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        neg_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      0,
                                      tag="neg_value_load_hi",
                                      debug=debug_multi)
        neg_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      1,
                                      tag="neg_value_load_lo",
                                      debug=debug_multi)
        pos_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      2,
                                      tag="pos_value_load_hi",
                                      debug=debug_multi)
        pos_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      3,
                                      tag="pos_value_load_lo",
                                      debug=debug_multi)

        k_plus = Max(
            Subtraction(k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_plus",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))
        k_neg = Max(
            Subtraction(-k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_neg",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))

        pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision)
        pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision)

        hi_terms = (pos_value_load_hi * pow_exp_pos +
                    neg_value_load_hi * pow_exp_neg)
        hi_terms.set_attributes(tag="hi_terms")

        pos_exp = (
            pos_value_load_hi * poly_pos +
            (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos
        pos_exp.set_attributes(tag="pos_exp", debug=debug_multi)

        neg_exp = (
            neg_value_load_hi * poly_neg +
            (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg
        neg_exp.set_attributes(tag="neg_exp", debug=debug_multi)

        result = Addition(Addition(
            pos_exp,
            neg_exp,
            precision=self.precision,
        ),
                          hi_terms,
                          precision=self.precision,
                          tag="result",
                          debug=debug_multi)

        # ov_value
        ov_value = round(acosh(self.precision.get_max_value()),
                         self.precision.get_sollya_object(), RD)
        ov_flag = Comparison(Abs(vx),
                             Constant(ov_value, precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme

    def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        emulate_func_name = "mpfr_cosh"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Arg(0),
                                               1: FO_Arg(1),
                                               2: FO_Arg(2)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name,
                                      [ML_Mpfr_t, ML_Mpfr_t, ML_Int32],
                                      ML_Int32, emulate_func_op)
        mpfr_call = Statement(
            ReferenceAssign(result_ternary,
                            emulate_func(result, mpfr_x, mpfr_rnd)))

        return mpfr_call

    def numeric_emulate(self, input_value):
        return cosh(input_value)

    standard_test_cases = [
        sollya_parse(x) for x in [
            "1.705527", "0.935715", "-0x1.e45322ap-1", "0x1.b8ef9f54p-1",
            "-0x1.b8ef9f54p-1", "0x1.b6fdb8a8p-1"
        ]
    ]
Exemple #4
0
class ML_Exp2(ML_Function("ml_exp2")):
    def __init__(self,
                 arg_template=DefaultArgTemplate,
                 precision=ML_Binary32,
                 accuracy=ML_Faithful,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="my_exp2.c",
                 function_name="my_exp2",
                 language=C_Code,
                 vector_size=1):
        # initializing I/O precision
        precision = ArgDefault.select_value(
            [arg_template.precision, precision])
        io_precisions = [precision] * 2

        # initializing base class
        ML_FunctionBasis.__init__(self,
                                  base_name="exp2",
                                  function_name=function_name,
                                  output_file=output_file,
                                  io_precisions=io_precisions,
                                  abs_accuracy=None,
                                  libm_compliant=libm_compliant,
                                  processor=target,
                                  fuse_fma=fuse_fma,
                                  fast_path_extract=fast_path_extract,
                                  debug_flag=debug_flag,
                                  language=language,
                                  vector_size=vector_size,
                                  arg_template=arg_template)

        self.accuracy = accuracy
        self.precision = precision

    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        approx_interval = Interval(0.0, 2**-index_size)
        error_goal_approx = 2**-(self.precision.get_precision())
        int_precision = self.precision.get_integer_format()

        vx_int = Floor(vx * 2**index_size,
                       precision=self.precision,
                       tag="vx_int",
                       debug=debug_multi)
        vx_frac = vx - (vx_int * 2**-index_size)
        vx_frac.set_attributes(tag="vx_frac",
                               debug=debug_multi,
                               unbreakable=True)
        poly_degree = sup(
            guessdegree(2**(sollya.x), approx_interval, error_goal_approx)) + 1
        precision_list = [1] + [self.precision] * (poly_degree)

        vx_integer = Conversion(vx_int,
                                precision=int_precision,
                                tag="vx_integer",
                                debug=debug_multi)
        vx_int_hi = BitLogicRightShift(vx_integer,
                                       Constant(index_size),
                                       tag="vx_int_hi",
                                       debug=debug_multi)
        vx_int_lo = Modulo(vx_integer,
                           2**index_size,
                           tag="vx_int_lo",
                           debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(vx_int_hi,
                                               precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp2_table = ML_NewTable(dimensions=[2 * 2**index_size, 2],
                                 storage_precision=self.precision,
                                 tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i
            exp2_value = SollyaObject(2)**((input_value) * 2**-index_size)
            hi_value = round(exp2_value, self.precision.get_sollya_object(),
                             RN)
            lo_value = round(exp2_value - hi_value,
                             self.precision.get_sollya_object(), RN)
            exp2_table[i][0] = lo_value
            exp2_table[i][1] = hi_value

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print "poly_approx_error: ", poly_approx_error, float(
            log2(poly_approx_error))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         vx_frac,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        table_index = Addition(vx_int_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        lo_value_load = TableLoad(exp2_table,
                                  table_index,
                                  0,
                                  tag="lo_value_load",
                                  debug=debug_multi)
        hi_value_load = TableLoad(exp2_table,
                                  table_index,
                                  1,
                                  tag="hi_value_load",
                                  debug=debug_multi)

        result = (hi_value_load +
                  (hi_value_load * poly +
                   (lo_value_load + lo_value_load * poly))) * pow_exp
        ov_flag = Comparison(vx_int_hi,
                             Constant(self.precision.get_emax(),
                                      precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme

    def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        emulate_func_name = "mpfr_exp"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Arg(0),
                                               1: FO_Arg(1),
                                               2: FO_Arg(2)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name,
                                      [ML_Mpfr_t, ML_Mpfr_t, ML_Int32],
                                      ML_Int32, emulate_func_op)
        mpfr_call = Statement(
            ReferenceAssign(result_ternary,
                            emulate_func(result, mpfr_x, mpfr_rnd)))

        return mpfr_call

    def numeric_emulate(self, input_value):
        return sollya.SollyaObject(2)**(input_value)
Exemple #5
0
class ML_Log10(ML_Function("log10")):
    def __init__(self, args):
        # initializing base class
        ML_FunctionBasis.__init__(self, args)

    @staticmethod
    def get_default_args(**kw):
        """ Return a structure containing the arguments for ML_Log10,
        builtin from a default argument mapping overloaded with @p kw """
        default_args_log10 = {
            "output_file": "my_log10f.c",
            "function_name": "my_log10f",
            "precision": ML_Binary32,
            "accuracy": ML_Faithful,
            "target": GenericProcessor()
        }
        default_args_log10.update(kw)
        return DefaultArgTemplate(**default_args_log10)

    def generate_emulate(self, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        #mpfr_x = emulate_implementation.add_input_variable("x", ML_Mpfr_t)
        #mpfr_rnd = emulate_implementation.add_input_variable("rnd", ML_Int32)
        emulate_func_name = "mpfr_log10"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Result(0),
                                               1: FO_Arg(0),
                                               2: FO_Arg(1)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32],
                                      ML_Mpfr_t, emulate_func_op)
        #emulate_func_op.declare_prototype = emulate_func
        mpfr_call = Statement(
            ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd)))

        return mpfr_call

    def generate_scheme(self):
        #func_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        log2_hi_value = round(
            log10(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), RN)
        log2_lo_value = round(
            log10(2) - log2_hi_value, self.precision.sollya_object, RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = self.precision.get_integer_format()

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = 7
        table_index_range = range(1, 2**table_index_size)
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in table_index_range:
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            inv_value = inv_approx_table[i][0]
            value_high = round(
                log10(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log10(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        # determining log_table range
        high_index_function = lambda table, i: table[i][0]
        low_index_function = lambda table, i: table[i][1]
        table_high_interval = log_table.get_subset_interval(
            high_index_function, table_index_range)
        table_low_interval = log_table.get_subset_interval(
            low_index_function, table_index_range)

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          debug=debug_lftolx)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            table_index = BitLogicAnd(BitLogicRightShift(
                TypeCast(_vx_mant, precision=int_precision, debug=debuglx),
                self.precision.get_field_size() - 7,
                debug=debuglx),
                                      0x7f,
                                      tag="table_index",
                                      debug=debuglld)

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(DivisionSeed(_vx_mant,
                                      precision=self.precision,
                                      tag="seed",
                                      debug=debug_lftolx,
                                      silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_lftolx)
            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_lftolx)
            #if not processor.is_supported_operation(arg_red_index):
            #    if self.precision != ML_Binary32:
            #        arg_red_index = DivisionSeed(Conversion(_vx_mant, precision = ML_Binary32), precision = ML_Binary32,
            _red_vx = arg_red_index * _vx_mant - 1.0
            inv_err = S2**-7
            red_interval = Interval(1 - inv_err, 1 + inv_err)
            _red_vx.set_attributes(tag="_red_vx",
                                   debug=debug_lftolx,
                                   interval=red_interval)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_lftolx)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_lftolx)

            print("building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log10(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log10(1 + x) / x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object  #.sub_poly(start_index = 1)

            print("generating polynomial evaluation scheme")
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_lftolx)
            print(global_poly_object.get_sollya_object())

            corr_exp = Conversion(
                _vx_exp if exp_corr_factor == None else _vx_exp +
                exp_corr_factor,
                precision=self.precision)
            split_red_vx = Split(_red_vx,
                                 precision=ML_DoubleDouble,
                                 tag="split_red_vx",
                                 debug=debug_ddtolx)
            red_vx_hi = split_red_vx.hi
            red_vx_lo = split_red_vx.lo

            # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo
            pre_result = -_log_inv_hi + ((_red_vx * _poly +
                                          (corr_exp * log2_lo - _log_inv_lo)))
            pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
            exact_log2_hi_exp = corr_exp * log2_hi
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex",
                                             debug=debug_lftolx)
            cancel_part = (corr_exp * log2_hi - _log_inv_hi)
            cancel_part.set_attributes(tag="cancel_part", debug=debug_lftolx)
            sub_part = red_vx_hi + cancel_part
            sub_part.set_attributes(tag="sub_part", debug=debug_lftolx)
            #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part = ((red_vx_lo +
                                    (red_vx_lo * _poly +
                                     (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part.set_attributes(tag="result_one_low_part",
                                               debug=debug_lftolx)
            _result_one = (
                (sub_part) + red_vx_hi * _poly) + result_one_low_part
            _result = exact_log2_hi_exp + pre_result
            return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one, corr_exp

        result, poly, log_inv_lo, log_inv_hi, red_vx, new_result_one, corr_exp = compute_log(
            vx)
        result.set_attributes(tag="result", debug=debug_lftolx)
        new_result_one.set_attributes(tag="new_result_one", debug=debug_lftolx)

        # building eval error map
        eval_error_map = {
            red_vx:
            Variable("red_vx",
                     precision=self.precision,
                     interval=red_vx.get_interval()),
            log_inv_hi:
            Variable("log_inv_hi",
                     precision=self.precision,
                     interval=table_high_interval),
            log_inv_lo:
            Variable("log_inv_lo",
                     precision=self.precision,
                     interval=table_low_interval),
            corr_exp:
            Variable("corr_exp_g",
                     precision=self.precision,
                     interval=self.precision.get_exponent_interval()),
        }
        # computing gappa error
        if is_gappa_installed():
            poly_eval_error = self.get_eval_error(result, eval_error_map)
            print("poly_eval_error: ", poly_eval_error)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debugd,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debugd,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd)

        # exp=-1 case
        print("managing exp=-1 case")
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_lftolx)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        log_subtract = -log_inv_hi - log2_hi
        log_subtract.set_attributes(tag="log_subtract", debug=debug_lftolx)
        result2 = (log_subtract) + ((poly * red_vx) - (log_inv_lo + log2_lo))
        result2.set_attributes(tag="result2", debug=debug_lftolx)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _, _, _ = compute_log(vx * S2100,
                                                         exp_corr_factor=m100)

        print("managing close to 1.0 cases")
        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log10(1 + sollya.x) / sollya.x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log10(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            sollya.absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_lftolx)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False)

        # main scheme
        print("MDL scheme")
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result2),
                                       Return(result))
                        #ConditionBlock(cond_one,
                        #Return(new_result_one),
                        #ConditionBlock(exp_mone,
                        #Return(result2),
                        #Return(result)
                        #)
                        #)
                    ))))
        scheme = pre_scheme
        return scheme

    def numeric_emulate(self, input_value):
        return log10(input_value)
Exemple #6
0
class ML_Log2(ML_Function("ml_log2")):
    def __init__(self,
                 arg_template=DefaultArgTemplate,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="log2f.c",
                 function_name="log2f"):
        # extracting precision argument from command line
        precision = ArgDefault.select_value(
            [arg_template.precision, precision])
        io_precisions = [precision] * 2

        # initializing base class
        ML_FunctionBasis.__init__(self,
                                  base_name="log2",
                                  function_name=function_name,
                                  output_file=output_file,
                                  io_precisions=io_precisions,
                                  abs_accuracy=None,
                                  libm_compliant=libm_compliant,
                                  processor=target,
                                  fuse_fma=fuse_fma,
                                  fast_path_extract=fast_path_extract,
                                  debug_flag=debug_flag,
                                  arg_template=arg_template)

        self.precision = precision

    def generate_emulate(self, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        #mpfr_x = emulate_implementation.add_input_variable("x", ML_Mpfr_t)
        #mpfr_rnd = emulate_implementation.add_input_variable("rnd", ML_Int32)
        emulate_func_name = "mpfr_log2"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Result(0),
                                               1: FO_Arg(0),
                                               2: FO_Arg(1)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32],
                                      ML_Mpfr_t, emulate_func_op)
        #emulate_func_op.declare_prototype = emulate_func
        mpfr_call = Statement(
            ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd)))

        return mpfr_call

    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(ML_Binary32)))

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = 7
        log_table = ML_Table(dimensions=[2**table_index_size, 2],
                             storage_precision=self.precision,
                             tag=self.uniquify_name("inv_table"))
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in xrange(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            #print inv_approx_table[i][0], inv_value
            inv_value = inv_approx_table[i][0]
            value_high = round(
                log2(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log2(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          debug=debug_lftolx)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            table_index = BitLogicAnd(BitLogicRightShift(
                TypeCast(_vx_mant, precision=int_precision, debug=debuglx),
                self.precision.get_field_size() - 7,
                debug=debuglx),
                                      0x7f,
                                      tag="table_index",
                                      debug=debuglld)

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(DivisionSeed(_vx_mant,
                                      precision=self.precision,
                                      tag="seed",
                                      debug=debug_lftolx,
                                      silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_lftolx)
            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_lftolx)
            #if not processor.is_supported_operation(arg_red_index):
            #    if self.precision != ML_Binary32:
            #        arg_red_index = DivisionSeed(Conversion(_vx_mant, precision = ML_Binary32), precision = ML_Binary32,
            _red_vx = arg_red_index * _vx_mant - 1.0
            _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx)
            inv_err = S2**-7
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_lftolx)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_lftolx)

            print "building mathematical polynomial"
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log2(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log2(1 + sollya.x) / sollya.x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=0)

            Attributes.set_default_silent(True)
            Attributes.set_default_rounding_mode(ML_RoundToNearest)

            print "generating polynomial evaluation scheme"
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_lftolx)
            print "sollya global_poly_object"
            print global_poly_object.get_sollya_object()
            print "sollya poly_object"
            print poly_object.get_sollya_object()

            corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor
            split_red_vx = Split(_red_vx,
                                 precision=ML_DoubleDouble,
                                 tag="split_red_vx",
                                 debug=debug_ddtolx)
            red_vx_hi = split_red_vx.hi
            red_vx_lo = split_red_vx.lo

            Attributes.unset_default_rounding_mode()
            Attributes.unset_default_silent()

            # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo
            #pre_result = -_log_inv_hi + (_red_vx + (_red_vx * _poly + (- _log_inv_lo)))
            pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo))
            pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
            exact_log2_hi_exp = corr_exp
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex",
                                             debug=debug_lftolx)
            _result = corr_exp + pre_result
            return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx

        result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx)
        result.set_attributes(tag="result", debug=debug_lftolx)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debugd,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debugd,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd)

        # exp=-1 case
        print "managing exp=-1 case"
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_lftolx)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        result2 = (-log_inv_hi - 1.0) + ((poly * red_vx) - log_inv_lo)
        result2.set_attributes(tag="result2", debug=debug_lftolx)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _ = compute_log(vx * S2100,
                                                   exp_corr_factor=m100)
        result_subnormal.set_attributes(tag="result_subnormal",
                                        debug=debug_lftolx)

        print "managing close to 1.0 cases"
        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log(1 + x) / x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_lftolx)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False)

        # main scheme
        print "MDL scheme"
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ),
                        Statement(ClearException(), result_subnormal,
                                  Return(result_subnormal))),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result2),
                                       Return(result))
                        #ConditionBlock(cond_one,
                        #Return(new_result_one),
                        #ConditionBlock(exp_mone,
                        #Return(result2),
                        #Return(result)
                        #)
                        #)
                    ))))
        scheme = Statement(result, pre_scheme)
        return scheme

    def numeric_emulate(self, input_value):
        return log2(input_value)
class ML_Log(ML_Function("ml_log")):
    def __init__(self,
                 precision=ML_Binary64,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="log_fixed.c",
                 function_name="log_fixed"):
        # initializing I/O precision
        io_precisions = [precision] * 2

        # initializing base class
        ML_FunctionBasis.__init__(self,
                                  base_name="log",
                                  function_name=function_name,
                                  output_file=output_file,
                                  io_precisions=io_precisions,
                                  abs_accuracy=None,
                                  libm_compliant=libm_compliant,
                                  processor=target,
                                  fuse_fma=fuse_fma,
                                  fast_path_extract=fast_path_extract,
                                  debug_flag=debug_flag)

        self.precision = precision

    def generate_emulate(self, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        emulate_func_name = "mpfr_log"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Result(0),
                                               1: FO_Arg(0),
                                               2: FO_Arg(1)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32],
                                      ML_Mpfr_t, emulate_func_op)
        mpfr_call = Statement(
            ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd)))

        return mpfr_call

    """ evaluate one argument reduction (Tang):
      given:
        an input variable of type Fixed(0,k,False), and with some input interval
        the number of bits to read from this variable for the argument reduction
        the precision of its inverse
      it returns:
        out_interval: the output interval of the variable
        length_table: the number of elements in the table
        sizeof_table: the size in byte of the table used
  """

    def evaluate_argument_reduction(self, in_interval, in_prec, inv_size,
                                    inv_prec):
        one = Constant(1, precision=ML_Exact, tag="one")

        dx = Variable("dx",
                      precision=ML_Custom_FixedPoint_Format(0, in_prec, False),
                      interval=in_interval)

        # do the argument reduction
        x = Addition(dx, one, tag="x", precision=ML_Exact)
        x1 = Conversion(x,
                        tag="x1",
                        precision=ML_Custom_FixedPoint_Format(
                            0, inv_size, False),
                        rounding_mode=ML_RoundTowardMinusInfty)
        s = Multiplication(dx,
                           Constant(S2**inv_size, precision=ML_Exact),
                           precision=ML_Exact,
                           tag="interval_index_table")
        inv_x1 = Division(one, x1, tag="ix1", precision=ML_Exact)
        inv_x = Conversion(inv_x1,
                           tag="ix",
                           precision=ML_Custom_FixedPoint_Format(
                               1, inv_prec, False),
                           rounding_mode=ML_RoundTowardPlusInfty)
        y = Multiplication(x, inv_x, tag="y", precision=ML_Exact)
        dy = Subtraction(y, one, tag="dy", precision=ML_Exact)

        # add the necessary goals and hints
        dx_gappa = Variable("dx_gappa",
                            interval=dx.get_interval(),
                            precision=dx.get_precision())
        swap_map = {dx: dx_gappa}

        # goal: dz (result of the argument reduction)
        gappa_code = self.gappa_engine.get_interval_code_no_copy(
            dy.copy(swap_map), bound_list=[swap_map[dx]])
        #self.gappa_engine.add_goal(gappa_code, s.copy(swap_map)) # range of index of table
        # hints. are the ones with isAppox=True really necessary ?
        self.gappa_engine.add_hint(gappa_code,
                                   x.copy(swap_map),
                                   x1.copy(swap_map),
                                   isApprox=True)
        self.gappa_engine.add_hint(gappa_code,
                                   inv_x1.copy(swap_map),
                                   inv_x.copy(swap_map),
                                   isApprox=True)
        self.gappa_engine.add_hint(
            gappa_code,
            Multiplication(x1, inv_x1, precision=ML_Exact).copy(swap_map), one,
            Comparison(swap_map[inv_x1],
                       Constant(0, precision=ML_Exact),
                       specifier=Comparison.NotEqual,
                       precision=ML_Bool))
        # execute and parse the result
        result = execute_gappa_script_extract(gappa_code.get(
            self.gappa_engine))
        out_interval = result['goal']
        length_table = 1 + floor(
            sup(in_interval) * S2**inv_size).getConstantAsInt()
        sizeof_table = length_table * (16 + ML_Custom_FixedPoint_Format(
            1, inv_prec, False).get_c_bit_size() / 8)
        return {
            'out_interval': out_interval,
            'length_table': length_table,
            'sizeof_table': sizeof_table,
        }

    # explore the parameters of the argument reduction
    # get the fastest code possible with some memory constraint :
    # for all possible parameters of the arg reg:
    # - get the final interval and the tables sizes proven by gappa
    # - eliminate the ones that desn't fits in the memory constraints
    # - get the smallest degree of the polynomial that achieve 2^-53 relative precision
    #   (or 2**-(self.precision.get_field_size()+1) depending on self.precision)
    # - get the smallest degree that achieve 2^-~128 absolute precision
    #   (TODO: get exact limit with worst cases. should be around 2^-114)
    # of all the parameters that achived thoses degrees, choose the one that have the smallest table size
    """ return the size of the tables used by the argument reduction,
      and the interval of the output variable (and some other infos about the argument reduction= """

    def eval_argument_reduction(self, size1, prec1, size2, prec2):
        one = Constant(1, precision=ML_Exact, tag="one")
        dx = Variable("dx",
                      precision=ML_Custom_FixedPoint_Format(0, 52, False),
                      interval=Interval(0, 1 - S2**-52))

        # do the argument reduction
        x = Addition(dx, one, tag="x", precision=ML_Exact)
        x1 = Conversion(x,
                        tag="x1",
                        precision=ML_Custom_FixedPoint_Format(0, size1, False),
                        rounding_mode=ML_RoundTowardMinusInfty)
        s = Multiplication(Subtraction(x1, one, precision=ML_Exact),
                           Constant(S2**size1, precision=ML_Exact),
                           precision=ML_Exact,
                           tag="indexTableX")
        inv_x1 = Division(one, x1, tag="ix1", precision=ML_Exact)
        inv_x = Conversion(inv_x1,
                           tag="ix",
                           precision=ML_Custom_FixedPoint_Format(
                               1, prec1, False),
                           rounding_mode=ML_RoundTowardPlusInfty)
        y = Multiplication(x, inv_x, tag="y", precision=ML_Exact)
        dy = Subtraction(y, one, tag="dy", precision=ML_Exact)
        y1 = Conversion(y,
                        tag="y",
                        precision=ML_Custom_FixedPoint_Format(0, size2, False),
                        rounding_mode=ML_RoundTowardMinusInfty)
        t = Multiplication(Subtraction(y1, one, precision=ML_Exact),
                           Constant(S2**size2, precision=ML_Exact),
                           precision=ML_Exact,
                           tag="indexTableY")
        inv_y1 = Division(one, y1, tag="iy1", precision=ML_Exact)
        inv_y = Conversion(inv_y1,
                           tag="iy",
                           precision=ML_Custom_FixedPoint_Format(
                               1, prec2, False),
                           rounding_mode=ML_RoundTowardPlusInfty)
        z = Multiplication(y, inv_y, tag="z", precision=ML_Exact)
        dz = Subtraction(z, one, tag="dz", precision=ML_Exact)

        # add the necessary goals and hints
        dx_gappa = Variable("dx_gappa",
                            interval=dx.get_interval(),
                            precision=dx.get_precision())
        swap_map = {dx: dx_gappa}
        # goals (main goal: dz, the result of the argument reduction)
        gappa_code = self.gappa_engine.get_interval_code_no_copy(
            dz.copy(swap_map), bound_list=[dx_gappa])
        self.gappa_engine.add_goal(gappa_code, dy.copy(swap_map))
        self.gappa_engine.add_goal(
            gappa_code, s.copy(swap_map))  # range of index of table 1
        self.gappa_engine.add_goal(
            gappa_code, t.copy(swap_map))  # range of index of table 2
        # hints. are the ones with isAppox=True really necessary ?
        self.gappa_engine.add_hint(gappa_code,
                                   x.copy(swap_map),
                                   x1.copy(swap_map),
                                   isApprox=True)
        self.gappa_engine.add_hint(gappa_code,
                                   y.copy(swap_map),
                                   y1.copy(swap_map),
                                   isApprox=True)
        self.gappa_engine.add_hint(gappa_code,
                                   inv_x1.copy(swap_map),
                                   inv_x.copy(swap_map),
                                   isApprox=True)
        self.gappa_engine.add_hint(gappa_code,
                                   inv_y1.copy(swap_map),
                                   inv_y.copy(swap_map),
                                   isApprox=True)
        self.gappa_engine.add_hint(
            gappa_code,
            Multiplication(x1, inv_x1, precision=ML_Exact).copy(swap_map), one,
            Comparison(swap_map[inv_x1],
                       Constant(0, precision=ML_Exact),
                       specifier=Comparison.NotEqual,
                       precision=ML_Bool))
        self.gappa_engine.add_hint(
            gappa_code,
            Multiplication(y1, inv_y1, precision=ML_Exact).copy(swap_map), one,
            Comparison(swap_map[inv_y1],
                       Constant(0, precision=ML_Exact),
                       specifier=Comparison.NotEqual,
                       precision=ML_Bool))
        toto = Variable("toto", precision=ML_Binary64)
        self.gappa_engine.add_hypothesis(gappa_code, toto,
                                         Interval(0, S2**-52))

        # execute and parse the result
        result = execute_gappa_script_extract(gappa_code.get(
            self.gappa_engine))
        self.gappa_engine.clear_memoization_map()  # avoid memory leak
        #print result['indexTableX'], result['indexTableY']
        length_table1 = 1 + floor(sup(
            result['indexTableX'])).getConstantAsInt()
        length_table2 = 1 + floor(sup(
            result['indexTableY'])).getConstantAsInt()
        if False and (length_table2 != 1 +
                      floor(sup(result['dy']) * S2**size2).getConstantAsInt()):
            print "(dy*2**size2:", 1 + floor(sup(
                result['dy'] * S2**size2)).getConstantAsInt(), ")"
            print "(indexTableY:", 1 + floor(sup(
                result['indexTableY'])).getConstantAsInt(), ")"
            print result['indexTableY'], result['dy']
            sys.exit(1)
        return {
            # arguments
            'size1':
            size1,
            'prec1':
            prec1,
            'size2':
            size2,
            'prec2':
            prec2,
            # size of the tables
            'length_table1':
            length_table1,
            'length_table2':
            length_table2,
            'sizeof_table1':
            length_table1 * (16 + ML_Custom_FixedPoint_Format(
                1, prec1, False).get_c_bit_size() / 8),
            'sizeof_table2':
            length_table2 * (16 + ML_Custom_FixedPoint_Format(
                1, prec2, False).get_c_bit_size() / 8),
            # intervals
            'in_interval':
            dx.get_interval(),
            'mid_interval':
            result['dy'],
            'out_interval':
            result['goal'],
        }

    def generate_argument_reduction(self, memory_limit):
        best_arg_reduc = None

        best_arg_reduc = self.eval_argument_reduction(6, 10, 12, 13)
        best_arg_reduc['sizeof_tables'] = best_arg_reduc[
            'sizeof_table1'] + best_arg_reduc['sizeof_table2']
        best_arg_reduc['degree_poly1'] = 4
        best_arg_reduc['degree_poly2'] = 8
        return best_arg_reduc
        # iterate through all possible parameters, and return the best argument reduction
        # the order of importance of the caracteristics of a good argument reduction is:
        #   1- the argument reduction is valid
        #   2- the degree of the polynomials obtains are minimals
        #   3- the memory used is minimal
        # An arument reduction is valid iff:
        #   - the memory used is less than memory_limit
        #   - y-1 and z-1  fit into a uint64_t
        #   - the second argument reduction should usefull (ie: it should add at least 1 bit to the argument reduction)
        # From thoses validity constraint we deduce some bound on the parameters to reduce the space of value searched:
        # (note that thoses bound are implied by, but not equivalents to the constraints)
        #   size1 <= log2(memory_limit/17)                                       (memory_limit on the first table)
        #   prec1 < 13 + size1                                                   (y-1 fits into a uint64_t)
        #   size2 <= log2((memory_limit - sizeof_table1)/17/midinterval)          (memory_limit on both tables)
        #   size2 >= 1 - log2(midinterval)                                       (second arg red should be usefull)
        #   prec2 < 12 - prec1 - log2((y-y1)/y1),  for all possible y            (z-1 fits into a uint64_t)
        # note: it is hard to deduce a tight bound on prec2 from the last inequality
        # a good approximation is  size2 ~= max[for y]( - log2((y-y1)/y1)), but using it may eliminate valid arg reduc

        #self.eval_argument_reduction(12, 20, 22, 14)

        min_size1 = 1
        max_size1 = floor(log(memory_limit / 17) / log(2)).getConstantAsInt()
        for size1 in range(max_size1, min_size1 - 1, -1):

            min_prec1 = size1
            max_prec1 = 12 + size1
            for prec1 in range(min_prec1, max_prec1 + 1):

                # we need sizeof_table1 and mid_interval for the bound on size2 and prec2
                first_arg_reduc = self.eval_argument_reduction(
                    size1, prec1, prec1, prec1)
                mid_interval = first_arg_reduc['mid_interval']
                sizeof_table1 = first_arg_reduc['sizeof_table1']

                if not (0 <= inf(mid_interval)
                        and sup(mid_interval) < S2**(64 - 52 - prec1)):
                    continue
                if not (first_arg_reduc['sizeof_table1'] < memory_limit):
                    continue

                min_size2 = 1 - ceil(
                    log(sup(mid_interval)) / log(2)).getConstantAsInt()
                max_size2 = floor(
                    log((memory_limit - sizeof_table1) /
                        (17 * sup(mid_interval))) / log(2)).getConstantAsInt()
                # during execution of the prec2 loop, it can reduces the interval of valid values for prec2
                # so min_prec2 and max_prec2 are setted here and not before the the prec2 loop
                # (because they are modified inside the body of the loop, for the next iteration of size2)
                min_prec2 = 0
                max_prec2 = 12 + max_size2 - prec1
                for size2 in range(max_size2, min_size2 - 1, -1):

                    max_prec2 = min(max_prec2, 12 + size2 - prec1)
                    for prec2 in range(max_prec2, min_prec2 - 1, -1):

                        #print '=====\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{})\t====='.format(size1,min_size1,max_size1,prec1,min_prec1,max_prec1,size2,min_size2,max_size2,prec2,min_prec2,max_prec2)
                        #print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss #memory used by the programm

                        arg_reduc = self.eval_argument_reduction(
                            size1, prec1, size2, prec2)
                        mid_interval = arg_reduc['mid_interval']
                        out_interval = arg_reduc['out_interval']
                        sizeof_tables = arg_reduc['sizeof_table1'] + arg_reduc[
                            'sizeof_table2']
                        if not (0 <= inf(out_interval) and sup(out_interval) <
                                S2**(64 - 52 - prec1 - prec2)):
                            max_prec2 = prec2 - 1
                            continue
                        if memory_limit < sizeof_tables:
                            continue
                        #assert(prec2 < 12 + size2 - prec1) # test the approximation size2 ~= max[for y]( - log2((y-y1)/y1))

                        # guess the degree of the two polynomials (relative error <= 2^-52 and absolute error <= 2^-120)
                        # note: we exclude zero from out_interval to not perturb sollya (log(1+x)/x is not well defined on 0)
                        sollya_out_interval = Interval(
                            S2**(-52 - prec1 - prec2), sup(out_interval))
                        guess_degree_poly1 = guessdegree(
                            log(1 + sollya.x) / sollya.x, sollya_out_interval,
                            S2**-52)
                        guess_degree_poly2 = guessdegree(
                            log(1 + sollya.x), sollya_out_interval, S2**-120)
                        # TODO: detect when guessdegree return multiple possible degree, and find the right one
                        if False and inf(guess_degree_poly1) <> sup(
                                guess_degree_poly1):
                            print "improvable guess_degree_poly1:", guess_degree_poly1
                        if False and inf(guess_degree_poly2) <> sup(
                                guess_degree_poly2):
                            print "improvable guess_degree_poly2:", guess_degree_poly2
                        degree_poly1 = sup(
                            guess_degree_poly1).getConstantAsInt() + 1
                        degree_poly2 = sup(
                            guess_degree_poly2).getConstantAsInt()

                        if ((best_arg_reduc is not None) and
                            (best_arg_reduc['degree_poly1'] < degree_poly1 or
                             best_arg_reduc['degree_poly2'] < degree_poly2)):
                            min_prec2 = prec2 + 1
                            break

                        if ((best_arg_reduc is None) or
                            (best_arg_reduc['degree_poly1'] > degree_poly1) or
                            (best_arg_reduc['degree_poly1'] == degree_poly1
                             and best_arg_reduc['degree_poly2'] > degree_poly2)
                                or
                            (best_arg_reduc['degree_poly1'] == degree_poly1
                             and best_arg_reduc['degree_poly2'] == degree_poly2
                             and
                             best_arg_reduc['sizeof_tables'] > sizeof_tables)):
                            arg_reduc['degree_poly1'] = degree_poly1
                            arg_reduc['degree_poly2'] = degree_poly2
                            arg_reduc['sizeof_tables'] = sizeof_tables
                            best_arg_reduc = arg_reduc
                            #print "\n   --new best--  \n", arg_reduc, "\n"
        #print "\nBest arg reduc: \n", best_arg_reduc, "\n"
        return best_arg_reduc

    def generate_scheme(self):
        memory_limit = 2500

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = input_var
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        ### Constants computations ###

        v_log2_hi = nearestint(log(2) * 2**-52) * 2**52
        v_log2_lo = round(log(2) - v_log2_hi, 64 + 53, sollya.RN)
        log2_hi = Constant(v_log2_hi, precision=self.precision, tag="log2_hi")
        log2_lo = Constant(v_log2_lo, precision=self.precision, tag="log2_lo")

        print "\n\033[1mSearch parameters for the argument reduction:\033[0m (this can take a while)"
        arg_reduc = self.generate_argument_reduction(memory_limit)

        print "\n\033[1mArgument reduction found:\033[0m [({},{}),({},{})] -> polynomials of degree {},{}, using {} bytes of memory".format(
            arg_reduc['size1'], arg_reduc['prec1'], arg_reduc['size2'],
            arg_reduc['prec2'], arg_reduc['degree_poly1'],
            arg_reduc['degree_poly2'], arg_reduc['sizeof_tables'])

        print "\n\033[1mGenerate the first logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(
            arg_reduc['length_table1'], arg_reduc['sizeof_table1'])
        inv_table_1 = ML_NewTable(
            dimensions=[arg_reduc['length_table1']],
            storage_precision=ML_Custom_FixedPoint_Format(
                1, arg_reduc['prec1'], False),
            tag=self.uniquify_name("inv_table_1"))
        log_table_1 = ML_NewTable(
            dimensions=[arg_reduc['length_table1']],
            storage_precision=ML_Custom_FixedPoint_Format(11, 128 - 11, False),
            tag=self.uniquify_name("log_table_1"))
        for i in range(0, arg_reduc['length_table1'] - 1):
            x1 = 1 + i / S2 * arg_reduc['size1']
            inv_x1 = ceil(S2**arg_reduc['prec1'] / x1) * S2**arg_reduc['prec1']
            log_x1 = floor(log(x1) * S2**(128 - 11)) * S2**(11 - 128)
            inv_table_1[
                i] = inv_x1  #Constant(inv_x1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False))
            log_table_1[
                i] = log_x1  #Constant(log_x1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))

        print "\n\033[1mGenerate the second logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(
            arg_reduc['length_table2'], arg_reduc['sizeof_table2'])
        inv_table_2 = ML_NewTable(
            dimensions=[arg_reduc['length_table2']],
            storage_precision=ML_Custom_FixedPoint_Format(
                1, arg_reduc['prec2'], False),
            tag=self.uniquify_name("inv_table_2"))
        log_table_2 = ML_NewTable(
            dimensions=[arg_reduc['length_table2']],
            storage_precision=ML_Custom_FixedPoint_Format(11, 128 - 11, False),
            tag=self.uniquify_name("log_table_2"))
        for i in range(0, arg_reduc['length_table2'] - 1):
            y1 = 1 + i / S2**arg_reduc['size2']
            inv_y1 = ceil(S2**arg_reduc['prec2'] / x1) * S2**arg_reduc['prec2']
            log_y1 = floor(log(inv_y1) * S2**(128 - 11)) * S2**(11 - 128)
            inv_table_2[
                i] = inv_y1  #Constant(inv_y1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False))
            log_table_2[
                i] = log_y1  #Constant(log_y1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))

        ### Evaluation Scheme ###

        print "\n\033[1mGenerate the evaluation scheme:\033[0m"
        input_var = self.implementation.add_input_variable(
            "input_var", self.precision)
        ve = ExponentExtraction(input_var, tag="x_exponent", debug=debugd)
        vx = MantissaExtraction(input_var,
                                tag="x_mantissa",
                                precision=ML_Custom_FixedPoint_Format(
                                    0, 52, False),
                                debug=debug_lftolx)
        #vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = self.precision, debug = debug_lftolx)

        print "filtering and handling special cases"
        test_is_special_cases = LogicalNot(
            Test(input_var,
                 specifier=Test.IsIEEENormalPositive,
                 likely=True,
                 debug=debugd,
                 tag="is_special_cases"))
        handling_special_cases = Statement(
            ConditionBlock(
                Test(input_var, specifier=Test.IsSignalingNaN, debug=True),
                ExpRaiseReturn(ML_FPE_Invalid,
                               return_value=FP_QNaN(self.precision))),
            ConditionBlock(Test(input_var, specifier=Test.IsNaN, debug=True),
                           Return(input_var))  #,
            # TODO: add tests for x == 0 (raise DivideByZero, return -Inf), x < 0 (raise InvalidOperation, return qNaN)
            # all that remains is x is a subnormal positive
            #Statement(
            #  ReferenceAssign(Dereference(ve), Subtraction(ve, Subtraction(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(12, precision = ve.get_precision())))),
            #  ReferenceAssign(Dereference(vx), BitLogicLeftShift(vx, Addition(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(1, precision = ve.get_precision()))))
            #)
        )

        print "doing the argument reduction"
        v_dx = vx
        v_x1 = Conversion(v_dx,
                          tag='x1',
                          precision=ML_Custom_FixedPoint_Format(
                              0, arg_reduc['size1'], False),
                          rounding_mode=ML_RoundTowardMinusInfty)
        v_index_x = TypeCast(
            v_x1, tag='index_x', precision=ML_Int32
        )  #ML_Custom_FixedPoint_Format(v_x1.get_precision().get_c_bit_size(), 0, False))
        v_inv_x = TableLoad(inv_table_1, v_index_x, tag='inv_x')
        v_x = Addition(v_dx,
                       1,
                       tag='x',
                       precision=ML_Custom_FixedPoint_Format(1, 52, False))
        v_dy = Multiplication(v_x,
                              v_inv_x,
                              tag='dy',
                              precision=ML_Custom_FixedPoint_Format(
                                  0, 52 + arg_reduc['prec1'], False))
        v_y1 = Conversion(v_dy,
                          tag='y1',
                          precision=ML_Custom_FixedPoint_Format(
                              0, arg_reduc['size2'], False),
                          rounding_mode=ML_RoundTowardMinusInfty)
        v_index_y = TypeCast(
            v_y1, tag='index_y', precision=ML_Int32
        )  #ML_Custom_FixedPoint_Format(v_y1.get_precision().get_c_bit_size(), 0, False))
        v_inv_y = TableLoad(inv_table_2, v_index_y, tag='inv_y')
        v_y = Addition(v_dy,
                       1,
                       tag='y',
                       precision=ML_Custom_FixedPoint_Format(
                           1, 52 + arg_reduc['prec2'], False))
        # note that we limit the number of bits used to represent dz to 64.
        # we proved during the arg reduction that we can do that (sup(out_interval) < 2^(64-52-prec1-prec2))
        v_dz = Multiplication(
            v_y,
            v_inv_y,
            tag='z',
            precision=ML_Custom_FixedPoint_Format(
                64 - 52 - arg_reduc['prec1'] - arg_reduc['prec2'],
                52 + arg_reduc['prec1'] + arg_reduc['prec2'], False))
        # reduce the number of bits used to represent dz. we can do that

        print "doing the first polynomial evaluation"
        global_poly1_object = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, arg_reduc['degree_poly1'] - 1,
            [64] * (arg_reduc['degree_poly1']), arg_reduc['out_interval'],
            fixed, sollya.absolute)
        poly1_object = global_poly1_object.sub_poly(start_index=1)
        print global_poly1_object
        print poly1_object
        poly1 = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly1_object, v_dz, unified_precision=v_dz.get_precision())
        return ConditionBlock(test_is_special_cases, handling_special_cases,
                              Return(poly1))

        #approx_interval = Interval(0, 27021597764222975*S2**-61)

        #poly_degree = 1+sup(guessdegree(log(1+x)/x, approx_interval, S2**-(self.precision.get_field_size())))
        #global_poly_object = Polynomial.build_from_approximation(log(1+x)/x, poly_degree, [1] + [self.precision]*(poly_degree), approx_interval, sollya.absolute)
        #poly_object = global_poly_object.sub_poly(start_index = 1)
        #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
        #_poly.set_attributes(tag = "poly", debug = debug_lftolx)
        """
Exemple #8
0
class ML_UT_GappaCode(ML_Function("ml_ut_gappa_code")):
    def __init__(self, args=DefaultArgTemplate):
        # initializing base class
        ML_FunctionBasis.__init__(self, args)

    @staticmethod
    def get_default_args(**kw):
        """ Return a structure containing the arguments for current class,
        builtin from a default argument mapping overloaded with @p kw """
        default_args = {
            "output_file": "ut_gappa_code.c",
            "function_name": "ut_gappa_code",
            "precision": ML_Binary32,
            "target": MPFRProcessor(),
            "fast_path_extract": True,
            "fuse_fma": True,
            "libm_compliant": True
        }
        default_args.update(kw)
        return DefaultArgTemplate(**default_args)

    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", ML_Binary32)
        # declaring specific interval for input variable <x>
        vx.set_interval(Interval(-1, 1))

        # declaring free Variable y
        vy = Variable("y", precision=ML_Exact)

        # declaring expression with vx variable
        expr = vx * vx - vx * 2
        # declaring second expression with vx variable
        expr2 = vx * vx - vx

        # optimizing expressions (defining every unknown precision as the
        # default one + some optimization as FMA merging if enabled)
        opt_expr = self.optimise_scheme(expr)
        opt_expr2 = self.optimise_scheme(expr2)

        # setting specific tag name for optimized expression (to be extracted
        # from gappa script )
        opt_expr.set_tag("goal")
        opt_expr2.set_tag("new_goal")

        # defining default goal to gappa execution
        gappa_goal = opt_expr

        # declaring EXACT expression to be used as hint in Gappa's script
        annotation = self.opt_engine.exactify(vy * (1 / vy))

        # the dict var_bound is used to limit the DAG part to be explored when
        # generating the gappa script, each pair (key, value), indicate a node to stop at <key>
        # and a node to replace it with during the generation: <node>,
        # <node> must be a Variable instance with defined interval
        # vx.get_handle().get_node() is used to retrieve the node instanciating the abstract node <vx>
        # after the call to self.optimise_scheme
        var_bound = {
            vx.get_handle().get_node():
            Variable("x", precision=ML_Binary32, interval=vx.get_interval())
        }
        # generating gappa code to determine interval for <opt_expr>
        gappa_code = self.gappa_engine.get_interval_code(opt_expr, var_bound)

        # add a manual hint to the gappa code
        # which state thtat vy * (1 / vy) -> 1 { vy <> 0 };
        self.gappa_engine.add_hint(
            gappa_code, annotation, Constant(1, precision=ML_Exact),
            Comparison(vy,
                       Constant(0, precision=ML_Integer),
                       specifier=Comparison.NotEqual,
                       precision=ML_Bool))

        # adding the expression <opt_expr2> as an extra goal in the gappa script
        self.gappa_engine.add_goal(gappa_code, opt_expr2)

        # executing gappa on the script generated from <gappa_code>
        # extract the result and store them into <gappa_result>
        # which is a dict indexed by the goals' tag
        gappa_result = execute_gappa_script_extract(
            gappa_code.get(self.gappa_engine))

        print("eval error: ", gappa_result["goal"], gappa_result["new_goal"])

        # dummy scheme to make functionnal code generation
        scheme = Statement(Return(vx))

        return scheme
Exemple #9
0
class ML_Exponential(ML_Function("ml_exp")):
    def __init__(self, args=DefaultArgTemplate):
        # initializing base class
        ML_FunctionBasis.__init__(self, args)
        self.accuracy = args.accuracy

    @staticmethod
    def get_default_args(**kw):
        """ Return a structure containing the arguments for ML_Exponential,
            builtin from a default argument mapping overloaded with @p kw """
        default_args_exp = {
            "output_file": "my_exp.c",
            "function_name": "my_exp",
            "precision": ML_Binary32,
            "accuracy": ML_Faithful,
            "target": GenericProcessor()
        }
        default_args_exp.update(kw)
        return DefaultArgTemplate(**default_args_exp)

    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)),
                           Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(test_signaling_nan, return_snan,
                           Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=ML_Int32,
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if self.accuracy is ML_Faithful:
            error_goal = local_ulp
        elif self.accuracy is ML_CorrectlyRounded:
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = ik - overflow_exp_offset
        diff_k.set_attributes(debug=debug_multi,
                              tag="diff_k",
                              precision=ML_Int32)
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(ik, underflow_exp_offset, precision=ML_Int32)
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(test_nan_or_inf,
                                Statement(ClearException(), specific_return),
                                std_return)

        return scheme

    def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
            mpfr_x is a mpfr_t variable which should have the right precision
            mpfr_rnd is the rounding mode
        """
        emulate_func_name = "mpfr_exp"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Arg(0),
                                               1: FO_Arg(1),
                                               2: FO_Arg(2)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name,
                                      [ML_Mpfr_t, ML_Mpfr_t, ML_Int32],
                                      ML_Int32, emulate_func_op)
        mpfr_call = Statement(
            ReferenceAssign(result_ternary,
                            emulate_func(result, mpfr_x, mpfr_rnd)))

        return mpfr_call

    def numeric_emulate(self, input_value):
        """ Numeric emaluation of exponential """
        return sollya.exp(input_value)
Exemple #10
0
class ML_UT_ImplicitIntervalEval(ML_Function("ml_ut_implicit_interval_eval")):
    def __init__(self, args=DefaultArgTemplate):
        # initializing base class
        ML_FunctionBasis.__init__(self, args)

    @staticmethod
    def get_default_args(**kw):
        """ Return a structure containing the arguments for ML_Exponential,
        builtin from a default argument mapping overloaded with @p kw """
        default_args = {
            "output_file": "ut_implicit_interval_eval.c",
            "function_name": "ut_implicit_interval_eval",
            "precision": ML_Binary32,
            "accuracy": ML_Faithful,
            "target": MPFRProcessor()
        }
        default_args.update(kw)
        return DefaultArgTemplate(**default_args)

    def generate_scheme(self):
        # map of expected interval values
        expected_interval = {}

        vx_interval = Interval(-1, 1)
        vx = self.implementation.add_input_variable("x",
                                                    self.precision,
                                                    interval=vx_interval)
        expected_interval[vx] = vx_interval

        cst = Constant(7, tag="cst")
        cst_interval = Interval(7)
        expected_interval[cst] = cst_interval

        shl = BitLogicLeftShift(NearestInteger(vx),
                                2,
                                interval=2 * vx_interval,
                                tag="shl")
        shl_interval = 2 * vx_interval
        expected_interval[shl] = shl_interval

        r = vx + cst * vx + shl - cst
        r.set_attributes(tag="r")

        r_interval = vx_interval + cst_interval * vx_interval + shl_interval - cst_interval
        expected_interval[r] = r_interval

        # NOTES: implicit interval eval is no longer enforced: explicit call
        # to evaluate_range is required
        evaluate_range(r, update_interval=True)

        for var in [vx, cst, r, shl]:
            if var.get_interval() != expected_interval[var]:
                Log.report(
                    Log.Error,
                    "unexpected interval for {}: got {}, expected {}".format(
                        var.get_str(display_precision=True),
                        var.get_interval(), expected_interval[var]))
            else:
                Log.report(
                    Log.Info,
                    "node {}: {} vs {}".format(var.get_tag(),
                                               var.get_interval(),
                                               expected_interval[var]))

        return Statement()

    def numeric_emulate(self, input_value):
        raise NotImplementedError
Exemple #11
0
class ML_Cosine(ML_Function("ml_cos")):
    """ Implementation of cosinus function """
    def __init__(self,
                 precision=ML_Binary32,
                 accuracy=ML_Faithful,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="cosf.c",
                 function_name="cosf"):
        # initializing I/O precision
        io_precisions = [precision] * 2

        # initializing base class
        ML_FunctionBasis.__init__(self,
                                  base_name="cos",
                                  function_name=function_name,
                                  output_file=output_file,
                                  io_precisions=io_precisions,
                                  abs_accuracy=None,
                                  libm_compliant=libm_compliant,
                                  processor=target,
                                  fuse_fma=fuse_fma,
                                  fast_path_extract=fast_path_extract,
                                  debug_flag=debug_flag)
        self.precision = precision

    def generate_emulate(self, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        emulate_func_name = "mpfr_cos"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Result(0),
                                               1: FO_Arg(0),
                                               2: FO_Arg(1)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32],
                                      ML_Mpfr_t, emulate_func_op)
        mpfr_call = Statement(
            ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd)))

        return mpfr_call

    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = Abs(self.implementation.add_input_variable("x", self.precision),
                 tag="vx")

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        debug_precision = {
            ML_Binary32: debug_ftox,
            ML_Binary64: debug_lftolx
        }[self.precision]

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)),
                           Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(test_signaling_nan, return_snan,
                           Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 3

        # argument reduction
        frac_pi_index = 3
        frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN)
        inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN)
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)
        # computing k = E(x * frac_pi)
        vx_pi = Multiplication(vx, frac_pi, precision=self.precision)
        k = NearestInteger(vx_pi, precision=ML_Int32, tag="k", debug=True)
        fk = Conversion(k, precision=self.precision, tag="fk")

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision)

        red_vx_hi = (vx - inv_frac_pi_cst * fk)
        red_vx_hi.set_attributes(tag="red_vx_hi",
                                 debug=debug_precision,
                                 precision=self.precision)
        red_vx_lo_sub = inv_frac_pi_lo_cst * fk
        red_vx_lo_sub.set_attributes(tag="red_vx_lo_sub",
                                     debug=debug_precision,
                                     unbreakable=True,
                                     precision=self.precision)
        vx_d = Conversion(vx, precision=ML_Binary64, tag="vx_d")
        pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk)
        pre_red_vx_d_hi.set_attributes(tag="pre_red_vx_d_hi",
                                       precision=ML_Binary64,
                                       debug=debug_lftolx)
        pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d.set_attributes(tag="pre_red_vx_d",
                                    debug=debug_lftolx,
                                    precision=ML_Binary64)

        modk = Modulo(k,
                      2**(frac_pi_index + 1),
                      precision=ML_Int32,
                      tag="switch_value",
                      debug=True)

        sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index - 1)),
                      2**(frac_pi_index - 1))
        red_vx = Select(sel_c, -pre_red_vx, pre_red_vx)
        red_vx.set_attributes(tag="red_vx",
                              debug=debug_precision,
                              precision=self.precision)

        red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d)
        red_vx_d.set_attributes(tag="red_vx_d",
                                debug=debug_lftolx,
                                precision=ML_Binary64)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        error_goal_approx = S2**-self.precision.get_precision()

        Log.report(Log.Info, "building mathematical polynomial")
        poly_degree_vector = [None] * 2**(frac_pi_index + 1)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        index_relative = []

        poly_object_vector = [None] * 2**(frac_pi_index + 1)
        for i in xrange(2**(frac_pi_index + 1)):
            sub_func = cos(sollya.x + i * pi / S2**frac_pi_index)
            degree = int(
                sup(guessdegree(sub_func, approx_interval,
                                error_goal_approx))) + 1

            degree_list = range(degree + 1)
            a_interval = approx_interval
            if i == 0:
                # ad-hoc, TODO: to be cleaned
                degree = 6
                degree_list = range(0, degree + 1, 2)
            elif i % 2**(frac_pi_index) == 2**(frac_pi_index - 1):
                # for pi/2 and 3pi/2, an approx to  sin=cos(pi/2+x)
                # must be generated
                degree_list = range(1, degree + 1, 2)

            if i == 3 or i == 5 or i == 7 or i == 9:
                precision_list = [sollya.binary64
                                  ] + [sollya.binary32] * (degree)
            else:
                precision_list = [sollya.binary32] * (degree + 1)

            poly_degree_vector[i] = degree

            constraint = sollya.absolute
            delta = (2**(frac_pi_index - 3))
            centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index - 1)
            if centered_i < delta and centered_i > -delta and centered_i != 0:
                constraint = sollya.relative
                index_relative.append(i)
            Log.report(
                Log.Info, "generating approximation for %d/%d" %
                (i, 2**(frac_pi_index + 1)))
            poly_object_vector[
                i], _ = Polynomial.build_from_approximation_with_error(
                    sub_func,
                    degree_list,
                    precision_list,
                    a_interval,
                    constraint,
                    error_function=error_function)

        # unified power map for red_sx^n
        upm = {}
        rel_error_list = []

        poly_scheme_vector = [None] * (2**(frac_pi_index + 1))

        for i in xrange(2**(frac_pi_index + 1)):
            poly_object = poly_object_vector[i]
            poly_precision = self.precision
            if i == 3 or i == 5 or i == 7 or i == 9:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * red_vx)
                poly_hi.set_precision(ML_Binary64)
                red_vx_d_2 = red_vx_d * red_vx_d
                poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2, offset=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_attributes(unbreakable=True)
            elif i == 4:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=ML_Binary64)
                poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_precision(ML_Binary64)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    red_vx,
                    unified_precision=poly_precision,
                    power_map_=upm)
            #if i == 3:
            #  c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision)
            #  c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
            #  poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

            poly_scheme.set_attributes(tag="poly_cos%dpi%d" %
                                       (i, 2**(frac_pi_index)),
                                       debug=debug_precision)
            poly_scheme_vector[i] = poly_scheme

            #try:
            if is_gappa_installed() and i == 3:
                opt_scheme = self.opt_engine.optimization_process(
                    poly_scheme,
                    self.precision,
                    copy=True,
                    fuse_fma=self.fuse_fma)

                tag_map = {}
                self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map)

                gappa_vx = Variable("red_vx",
                                    precision=self.precision,
                                    interval=approx_interval)

                cg_eval_error_copy_map = {
                    tag_map["red_vx"]: gappa_vx,
                    tag_map["red_vx_d"]: gappa_vx,
                }

                eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_scheme,
                    cg_eval_error_copy_map,
                    gappa_filename="red_arg_%d.g" % i)
                poly_range = cos(approx_interval + i * pi / S2**frac_pi_index)
                rel_error_list.append(eval_error / poly_range)

        #for rel_error in rel_error_list:
        #  print sup(abs(rel_error))

        #return

        # case 17
        #poly17 = poly_object_vector[17]
        #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision)
        #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision)
        #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

        half = 2**frac_pi_index
        sub_half = 2**(frac_pi_index - 1)

        # determine if the reduced input is within the second and third quarter (not first nor fourth)
        # to negate the cosine output
        factor_cond = BitLogicAnd(BitLogicXor(
            BitLogicRightShift(modk, frac_pi_index),
            BitLogicRightShift(modk, frac_pi_index - 1)),
                                  1,
                                  tag="factor_cond",
                                  debug=True)

        CM1 = Constant(-1, precision=self.precision)
        C1 = Constant(1, precision=self.precision)
        factor = Select(factor_cond,
                        CM1,
                        C1,
                        tag="factor",
                        debug=debug_precision)
        factor2 = Select(Equal(modk, Constant(sub_half)),
                         CM1,
                         C1,
                         tag="factor2",
                         debug=debug_precision)

        switch_map = {}
        if 0:
            for i in xrange(2**(frac_pi_index + 1)):
                switch_map[i] = Return(poly_scheme_vector[i])
        else:
            for i in xrange(2**(frac_pi_index - 1)):
                switch_case = (i, half - i)
                #switch_map[i]      = Return(poly_scheme_vector[i])
                #switch_map[half-i] = Return(-poly_scheme_vector[i])
                if i != 0:
                    switch_case = switch_case + (half + i, 2 * half - i)
                    #switch_map[half+i] = Return(-poly_scheme_vector[i])
                    #switch_map[2*half-i] = Return(poly_scheme_vector[i])
                if poly_scheme_vector[i].get_precision() != self.precision:
                    poly_result = Conversion(poly_scheme_vector[i],
                                             precision=self.precision)
                else:
                    poly_result = poly_scheme_vector[i]
                switch_map[switch_case] = Return(factor * poly_result)
            #switch_map[sub_half] = Return(-poly_scheme_vector[sub_half])
            #switch_map[half + sub_half] = Return(poly_scheme_vector[sub_half])
            switch_map[(sub_half, half + sub_half)] = Return(
                factor2 * poly_scheme_vector[sub_half])

        result = SwitchBlock(modk, switch_map)

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################

        # payne and hanek argument reduction for large arguments
        #red_func_name = "payne_hanek_cosfp32" # "payne_hanek_fp32_asm"
        red_func_name = "payne_hanek_fp32_asm"
        payne_hanek_func_op = FunctionOperator(
            red_func_name,
            arg_map={0: FO_Arg(0)},
            require_header=["support_lib/ml_red_arg.h"])
        payne_hanek_func = FunctionObject(red_func_name, [ML_Binary32],
                                          ML_Binary64, payne_hanek_func_op)
        payne_hanek_func_op.declare_prototype = payne_hanek_func
        #large_arg_red = FunctionCall(payne_hanek_func, vx)
        large_arg_red = payne_hanek_func(vx)
        red_bound = S2**20

        cond = Abs(vx) >= red_bound
        cond.set_attributes(tag="cond", likely=False)

        lar_neark = NearestInteger(large_arg_red, precision=ML_Int64)
        lar_modk = Modulo(lar_neark,
                          Constant(16, precision=ML_Int64),
                          tag="lar_modk",
                          debug=True)
        # Modulo is supposed to be already performed (by payne_hanek_cosfp32)
        #lar_modk = NearestInteger(large_arg_red, precision = ML_Int64)
        pre_lar_red_vx = large_arg_red - Conversion(lar_neark,
                                                    precision=ML_Binary64)
        pre_lar_red_vx.set_attributes(precision=ML_Binary64,
                                      debug=debug_lftolx,
                                      tag="pre_lar_red_vx")
        lar_red_vx = Conversion(pre_lar_red_vx,
                                precision=self.precision,
                                debug=debug_precision,
                                tag="lar_red_vx")
        lar_red_vx_lo = Conversion(
            pre_lar_red_vx - Conversion(lar_red_vx, precision=ML_Binary64),
            precision=self.precision)
        lar_red_vx_lo.set_attributes(tag="lar_red_vx_lo",
                                     precision=self.precision)

        lar_k = 3
        # large arg reduction Universal Power Map
        lar_upm = {}
        lar_switch_map = {}
        approx_interval = Interval(-0.5, 0.5)
        for i in xrange(2**(lar_k + 1)):
            frac_pi = pi / S2**lar_k
            func = cos(frac_pi * i + frac_pi * x)

            degree = 6
            error_mode = sollya.absolute
            if i % 2**(lar_k) == 2**(lar_k - 1):
                # close to sin(x) cases
                func = -sin(frac_pi * x) if i == 2**(lar_k -
                                                     1) else sin(frac_pi * x)
                degree_list = range(0, degree + 1, 2)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func / x, degree_list, precision_list, approx_interval,
                    error_mode)
                poly_object = poly_object.sub_poly(offset=-1)
            else:
                degree_list = range(degree + 1)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func, degree_list, precision_list, approx_interval,
                    error_mode)

            if i == 3 or i == 5 or i == 7 or i == 9 or i == 11 or i == 13:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * lar_red_vx)
                poly_hi.set_precision(ML_Binary64)
                pre_poly_scheme = poly_hi + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                pre_poly_scheme.set_attributes(precision=ML_Binary64)
                poly_scheme = Conversion(pre_poly_scheme,
                                         precision=self.precision)
            elif i == 4 or i == 12:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                c3 = Constant(coeff(poly_object.get_sollya_object(), 3),
                              precision=self.precision)
                c5 = Constant(coeff(poly_object.get_sollya_object(), 5),
                              precision=self.precision)
                poly_hi = polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=3),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                poly_hi.set_attributes(tag="poly_lar_%d_hi" % i,
                                       precision=ML_Binary64)
                poly_scheme = Conversion(FusedMultiplyAdd(
                    c1, lar_red_vx, poly_hi, precision=ML_Binary64) +
                                         c1 * lar_red_vx_lo,
                                         precision=self.precision)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
            # poly_scheme = polynomial_scheme_builder(poly_object, lar_red_vx, unified_precision = self.precision, power_map_ = lar_upm)
            poly_scheme.set_attributes(tag="lar_poly_%d" % i,
                                       debug=debug_precision)
            lar_switch_map[(i, )] = Return(poly_scheme)

        lar_result = SwitchBlock(lar_modk, lar_switch_map)

        # main scheme
        #Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        # scheme = Statement(ConditionBlock(cond, lar_result, result))

        Log.report(Log.Info, "Construction of the initial MDL scheme")
        scheme = Statement(pre_red_vx_d, red_vx_lo_sub,
                           ConditionBlock(cond, lar_result, result))

        return scheme
Exemple #12
0
class ML_FastSinCos(ML_Function("ml_fast_cos")):
    """ Implementation of cosinus function """
    def __init__(self,
                 precision=ML_Binary32,
                 accuracy=ML_Faithful,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 processor=GenericProcessor(),
                 output_file="cosf.c",
                 function_name="cosf",
                 input_interval=Interval(0, 1),
                 result_precision=ML_Binary32,
                 table_size_log=8,
                 cos_output=True):
        # initializing I/O precision
        io_precisions = [result_precision, precision]

        # initializing base class
        ML_FunctionBasis.__init__(self,
                                  base_name="cos",
                                  function_name=function_name,
                                  output_file=output_file,
                                  io_precisions=io_precisions,
                                  abs_accuracy=None,
                                  libm_compliant=libm_compliant,
                                  processor=processor,
                                  fuse_fma=fuse_fma,
                                  fast_path_extract=fast_path_extract,
                                  debug_flag=debug_flag)
        self.precision = precision
        self.cos_output = cos_output
        self.accuracy = accuracy
        self.input_interval = input_interval
        self.table_size_log = table_size_log

    def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_FastSinCos functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        emulate_func_name = "mpfr_cos" if self.cos_output else "mpfr_sin"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Arg(0),
                                               1: FO_Arg(1),
                                               2: FO_Arg(2)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name,
                                      [ML_Mpfr_t, ML_Mpfr_t, ML_Int32],
                                      ML_Int32, emulate_func_op)
        mpfr_call = Statement(
            ReferenceAssign(result_ternary,
                            emulate_func(result, mpfr_x, mpfr_rnd)))

        return mpfr_call

    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "target: %s " % self.processor.target_name)

        # display parameter information
        Log.report(Log.Info, "accuracy      : %s " % self.accuracy)
        Log.report(Log.Info, "input interval: %s " % self.input_interval)

        accuracy_goal = self.accuracy.get_goal()
        Log.report(Log.Info, "accuracy_goal=%f" % accuracy_goal)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_interval))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_Table(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in xrange(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.io_precisions[0]
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(x), [0, 2], [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.io_precisions[0])

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme

    ## generate scheme for cosine approximation of cos(X = x + u)
    #  @param computation_precision ML_Format used as default precision for scheme evaluation
    #  @param tabulated_cos tabulated value of cosine(high part of vx)
    #  @param tabulated_sin tabulated value of   sine(high part of vx)
    #  @param sin_C2 polynomial coefficient of sine approximation for u^3
    #  @param cos_C2 polynomial coefficient of cosine approximation for u^2
    #  @param red_vx_lo low part of the reduced input variable (i.e. u)
    def generate_cos_scheme(self, computation_precision, tabulated_cos,
                            tabulated_sin, sin_C2, cos_C2, red_vx_lo):
        cos_C2 = Multiplication(tabulated_cos,
                                cos_C2,
                                precision=ML_Custom_FixedPoint_Format(
                                    -1, 32, signed=True),
                                tag="cos_C2")
        u2 = Multiplication(
            red_vx_lo,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="u2")
        sin_u = Multiplication(
            tabulated_sin,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(1, 30, signed = True)
            tag="sin_u")

        cos_C2_u2 = Multiplication(
            cos_C2,
            u2,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(1, 30,signed = True)
            tag="cos_C2_u2")

        S2_u2 = Multiplication(sin_C2,
                               u2,
                               precision=ML_Custom_FixedPoint_Format(
                                   -1, 32, signed=True),
                               tag="S2_u2")

        S2_u3_sin = Multiplication(
            S2_u2,
            sin_u,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5,26, signed = True)
            tag="S2_u3_sin")

        cos_C2_u2_P_cos = Addition(
            tabulated_cos,
            cos_C2_u2,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="cos_C2_u2_P_cos")

        cos_C2_u2_P_cos_M_sin_u = Subtraction(
            cos_C2_u2_P_cos,
            sin_u,
            precision=
            computation_precision  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
        )

        scheme = Subtraction(
            cos_C2_u2_P_cos_M_sin_u,
            S2_u3_sin,
            precision=
            computation_precision  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
        )

        return scheme

    ## generate scheme for sine approximation of sin(X = x + u)
    #  @param computation_precision ML_Format used as default precision for scheme evaluation
    #  @param tabulated_cos tabulated value of cosine(high part of vx)
    #  @param tabulated_sin tabulated value of   sine(high part of vx)
    #  @param sin_C2 polynomial coefficient of sine approximation for u^3
    #  @param cos_C2 polynomial coefficient of cosine approximation for u^2
    #  @param red_vx_lo low part of the reduced input variable (i.e. u)
    def generate_sin_scheme(self, computation_precision, tabulated_cos,
                            tabulated_sin, coeff_S2, coeff_C2, red_vx_lo):
        sin_C2 = Multiplication(tabulated_sin,
                                coeff_C2,
                                precision=ML_Custom_FixedPoint_Format(
                                    -1, 32, signed=True),
                                tag="sin_C2")
        u2 = Multiplication(
            red_vx_lo,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="u2")
        cos_u = Multiplication(
            tabulated_cos,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(1, 30, signed = True)
            tag="cos_u")

        S2_u2 = Multiplication(coeff_S2,
                               u2,
                               precision=ML_Custom_FixedPoint_Format(
                                   -1, 32, signed=True),
                               tag="S2_u2")

        sin_C2_u2 = Multiplication(sin_C2,
                                   u2,
                                   precision=computation_precision,
                                   tag="sin_C2_u2")

        S2_u3_cos = Multiplication(
            S2_u2,
            cos_u,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5,26, signed = True)
            tag="S2_u3_cos")

        sin_P_cos_u = Addition(
            tabulated_sin,
            cos_u,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="sin_P_cos_u")

        sin_P_cos_u_P_C2_u2_sin = Addition(
            sin_P_cos_u,
            sin_C2_u2,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="sin_P_cos_u_P_C2_u2_sin")

        scheme = Addition(
            sin_P_cos_u_P_C2_u2_sin,
            S2_u3_cos,
            precision=
            computation_precision  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
        )

        return scheme
Exemple #13
0
class ML_Exp2(ML_Function("ml_exp2")):
    def __init__(self, args=DefaultArgTemplate):
        # initializing base class
        ML_FunctionBasis.__init__(self, args)
        self.accuracy = args.accuracy

    @staticmethod
    def get_default_args(**kw):
        """ Return a structure containing the arguments for ML_Exponential,
        builtin from a default argument mapping overloaded with @p kw """
        default_args_exp2 = {
            "output_file": "my_exp2.c",
            "function_name": "exp2f",
            "precision": ML_Binary32,
            "accuracy": ML_Faithful,
            "target": GenericProcessor()
        }
        default_args_exp2.update(kw)
        return DefaultArgTemplate(**default_args_exp2)

    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        r_interval = Interval(-0.5, 0.5)

        local_ulp = sup(ulp(2**r_interval, self.precision))
        print("ulp: ", local_ulp)
        error_goal = S2**-1 * local_ulp
        print("error goal: ", error_goal)

        sollya_precision = {
            ML_Binary32: sollya.binary32,
            ML_Binary64: sollya.binary64
        }[self.precision]
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        #Argument Reduction
        vx_int = NearestInteger(vx,
                                precision=int_precision,
                                tag='vx_int',
                                debug=debug_multi)
        vx_intf = Conversion(vx_int, precision=self.precision)
        vx_r = vx - vx_intf
        vx_r.set_attributes(tag="vx_r", debug=debug_multi)
        degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2
        precision_list = [1] + [self.precision] * degree

        exp_X = ExponentInsertion(vx_int,
                                  tag="exp_X",
                                  debug=debug_multi,
                                  precision=self.precision)

        #Polynomial Approx
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        poly_object, poly_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x) - 1, degree, precision_list, r_interval,
            sollya.absolute)
        Log.report(Log.Info, "Poly : %s" % poly_object)
        print("poly_error : ", poly_error)
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         vx_r,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        #Handling special cases
        oflow_bound = Constant(self.precision.get_emax() + 1,
                               precision=self.precision)
        subnormal_bound = self.precision.get_emin_subnormal()
        uflow_bound = self.precision.get_emin_normal()
        print("oflow : ", oflow_bound)
        #print "uflow : ", uflow_bound
        #print "sub : ", subnormal_bound
        test_overflow = Comparison(vx,
                                   oflow_bound,
                                   specifier=Comparison.GreaterOrEqual)
        test_overflow.set_attributes(tag="oflow_test",
                                     debug=debug_multi,
                                     likely=False,
                                     precision=ML_Bool)

        test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less)
        test_underflow.set_attributes(tag="uflow_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        test_subnormal = Comparison(vx,
                                    subnormal_bound,
                                    specifier=Comparison.Greater)
        test_subnormal.set_attributes(tag="sub_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        subnormal_offset = -(uflow_bound - vx_int)
        subnormal_offset.set_attributes(tag="offset", debug=debug_multi)
        exp_offset = ExponentInsertion(subnormal_offset,
                                       precision=self.precision,
                                       debug=debug_multi,
                                       tag="exp_offset")
        exp_min = ExponentInsertion(uflow_bound,
                                    precision=self.precision,
                                    debug=debug_multi,
                                    tag="exp_min")
        subnormal_result = exp_offset * exp_min * poly + exp_offset * exp_min

        test_std = LogicalOr(test_overflow,
                             test_underflow,
                             precision=ML_Bool,
                             tag="std_test",
                             likely=False)

        #Reconstruction
        result = exp_X * poly + exp_X
        result.set_attributes(tag="result", debug=debug_multi)

        C0 = Constant(0, precision=self.precision)

        return_inf = Return(FP_PlusInfty(self.precision))
        return_C0 = Return(C0)
        return_sub = Return(subnormal_result)
        return_std = Return(result)

        non_std_statement = Statement(
            ConditionBlock(
                test_overflow, return_inf,
                ConditionBlock(test_subnormal, return_sub, return_C0)))

        scheme = Statement(
            ConditionBlock(test_std, non_std_statement, return_std))

        return scheme

    def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        emulate_func_name = "mpfr_exp"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Arg(0),
                                               1: FO_Arg(1),
                                               2: FO_Arg(2)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name,
                                      [ML_Mpfr_t, ML_Mpfr_t, ML_Int32],
                                      ML_Int32, emulate_func_op)
        mpfr_call = Statement(
            ReferenceAssign(result_ternary,
                            emulate_func(result, mpfr_x, mpfr_rnd)))

        return mpfr_call

    def numeric_emulate(self, input_value):
        return sollya.SollyaObject(2)**(input_value)

    standard_test_cases = [[
        sollya.parse(x)
    ] for x in ["0x1.ffead1bac7ad2p+9", "-0x1.ee9cb4p+1", "-0x1.db0928p+3"]]
Exemple #14
0
class ML_Log1p(ML_Function("ml_log1p")):
  def __init__(self, args):
    ML_FunctionBasis.__init__(self, args)


  @staticmethod
  def get_default_args(**kw):
    """ Return a structure containing the arguments for ML_Log1p,
        builtin from a default argument mapping overloaded with @p kw """
    default_args_log1p = {
        "output_file": "my_log1p.c",
        "function_name": "my_log1pf",
        "precision": ML_Binary32,
        "accuracy": ML_Faithful,
        "target": GenericProcessor()
    }
    default_args_log1p.update(kw)
    return DefaultArgTemplate(**default_args_log1p)

  def generate_scheme(self):
    vx = self.implementation.add_input_variable("x", self.precision) 
    sollya_precision = self.get_input_precision().sollya_object

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)


    log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
    log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

    log2_hi = Constant(log2_hi_value, precision = self.precision)
    log2_lo = Constant(log2_lo_value, precision = self.precision)

    vx_exp  = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    int_precision = self.precision.get_integer_format()

    # retrieving processor inverse approximation table
    dummy_var = Variable("dummy", precision = self.precision)
    dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
    inv_approx_table = self.processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map)

    # table creation
    table_index_size = 7
    log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
    log_table[0][0] = 0.0
    log_table[0][1] = 0.0
    for i in range(1, 2**table_index_size):
        #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
        inv_value = inv_approx_table[i] # (1.0 + (inv_approx_table[i] / S2**9) ) * S2**-1
        value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
        log_table[i][0] = value_high
        log_table[i][1] = value_low


    vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    # case close to 0: ctz
    ctz_exp_limit = -7
    ctz_cond = vx_exp < ctz_exp_limit
    ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

    ctz_poly_degree = sup(guessdegree(log1p(sollya.x)/sollya.x, ctz_interval, S2**-(self.precision.get_field_size()+1))) + 1
    ctz_poly_object = Polynomial.build_from_approximation(log1p(sollya.x)/sollya.x, ctz_poly_degree, [self.precision]*(ctz_poly_degree+1), ctz_interval, sollya.absolute)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(ctz_poly_object, vx, unified_precision = self.precision)
    ctz_poly.set_attributes(tag = "ctz_poly", debug = debug_lftolx)

    ctz_result = vx * ctz_poly

    neg_input = Comparison(vx, -1, likely = False, specifier = Comparison.Less, debug = debugd, tag = "neg_input")
    vx_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debugd, tag = "nan_or_inf")
    vx_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False, debug = debugd, tag = "snan")
    vx_inf  = Test(vx, specifier = Test.IsInfty, likely = False, debug = debugd, tag = "inf")
    vx_subnormal = Test(vx, specifier = Test.IsSubnormal, likely = False, debug = debugd, tag = "vx_subnormal")
    
    log_function_code = CodeFunction("new_log", [Variable("x", precision = ML_Binary64)], output_format = ML_Binary64) 
    log_call_generator = FunctionOperator(log_function_code.get_name(), arity = 1, output_precision = ML_Binary64, declare_prototype = log_function_code)
    newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64,), ML_Binary64, log_call_generator)


    # case away from 0.0
    pre_vxp1 = vx + 1.0
    pre_vxp1.set_attributes(tag = "pre_vxp1", debug = debug_lftolx)
    pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag = "pre_vxp1_exp", debug = debugd)
    cm500 = Constant(-500, precision = ML_Int32)
    c0 = Constant(0, precision = ML_Int32)
    cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size()-2)
    scaling_factor_exp = Select(cond_scaling, cm500, c0)
    scaling_factor = ExponentInsertion(scaling_factor_exp, precision = self.precision, tag = "scaling_factor")

    vxp1 = pre_vxp1 * scaling_factor
    vxp1.set_attributes(tag = "vxp1", debug = debug_lftolx)
    vxp1_exp = ExponentExtraction(vxp1, tag = "vxp1_exp", debug = debugd)

    vxp1_inv = ReciprocalSeed(vxp1, precision = self.precision, tag = "vxp1_inv", debug = debug_lftolx, silent = True)

    vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision = self.precision, tag = "vxp1_dirty_inv", debug = debug_lftolx)

    table_index = BitLogicAnd(BitLogicRightShift(TypeCast(vxp1, precision = int_precision, debug = debuglx), self.precision.get_field_size() - 7, debug = debuglx), 0x7f, tag = "table_index", debug = debuglx) 

    # argument reduction
    # TODO: detect if single operand inverse seed is supported by the targeted architecture
    pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision = ML_UInt64), Constant(-2, precision = ML_UInt64), precision = ML_UInt64), precision = self.precision, tag = "pre_arg_red_index", debug = debug_lftolx)
    arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag = "arg_red_index", debug = debug_lftolx)

    red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index)
    #red_vxp1 = arg_red_index * vxp1 - 1.0
    red_vxp1.set_attributes(tag = "red_vxp1", debug = debug_lftolx)

    log_inv_lo = TableLoad(log_table, table_index, 1, tag = "log_inv_lo", debug = debug_lftolx) 
    log_inv_hi = TableLoad(log_table, table_index, 0, tag = "log_inv_hi", debug = debug_lftolx)

    inv_err = S2**-6 # TODO: link to target DivisionSeed precision

    Log.report(Log.Info, "building mathematical polynomial")
    approx_interval = Interval(-inv_err, inv_err)
    poly_degree = sup(guessdegree(log(1+sollya.x)/sollya.x, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
    global_poly_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
    poly_object = global_poly_object.sub_poly(start_index = 1)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    _poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, red_vxp1, unified_precision = self.precision)
    _poly.set_attributes(tag = "poly", debug = debug_lftolx)
    Log.report(Log.Info, global_poly_object.get_sollya_object())


    vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag = "vxp1_inv_exp", debug = debugd)
    corr_exp = Conversion(-vxp1_exp + scaling_factor_exp, precision = self.precision)# vxp1_inv_exp

    #poly = (red_vxp1) * (1 +  _poly)
    #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

    pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo))
    pre_result.set_attributes(tag = "pre_result", debug = debug_lftolx)
    exact_log2_hi_exp = - corr_exp * log2_hi
    exact_log2_hi_exp.set_attributes(tag = "exact_log2_hi_exp", debug = debug_lftolx, prevent_optimization = True)
    #std_result =  exact_log2_hi_exp + pre_result

    exact_log2_lo_exp = - corr_exp * log2_lo
    exact_log2_lo_exp.set_attributes(tag = "exact_log2_lo_exp", debug = debug_lftolx)#, prevent_optimization = True)
    
    init = exact_log2_lo_exp  - log_inv_lo
    init.set_attributes(tag = "init", debug = debug_lftolx, prevent_optimization = True)
    fma0 = (red_vxp1 * _poly + init) # - log_inv_lo)
    fma0.set_attributes(tag = "fma0", debug = debug_lftolx)
    step0 = fma0 
    step0.set_attributes(tag = "step0", debug = debug_lftolx) #, prevent_optimization = True)
    step1 = step0 + red_vxp1
    step1.set_attributes(tag = "step1", debug = debug_lftolx, prevent_optimization = True)
    step2 = -log_inv_hi + step1
    step2.set_attributes(tag = "step2", debug = debug_lftolx, prevent_optimization = True)
    std_result = exact_log2_hi_exp + step2
    std_result.set_attributes(tag = "std_result", debug = debug_lftolx, prevent_optimization = True)


    # main scheme
    Log.report(Log.Info, "MDL scheme")
    pre_scheme = ConditionBlock(neg_input,
        Statement(
            ClearException(),
            Raise(ML_FPE_Invalid),
            Return(FP_QNaN(self.precision))
        ),
        ConditionBlock(vx_nan_or_inf,
            ConditionBlock(vx_inf,
                Statement(
                    ClearException(),
                    Return(FP_PlusInfty(self.precision)),
                ),
                Statement(
                    ClearException(),
                    ConditionBlock(vx_snan,
                        Raise(ML_FPE_Invalid)
                    ),
                    Return(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(vx_subnormal,
                Return(vx),
                ConditionBlock(ctz_cond,
                    Statement(
                        Return(ctz_result),
                    ),
                    Statement(
                        Return(std_result)
                    )
                )
            )
        )
    )
    scheme = pre_scheme
    return scheme

  def numeric_emulate(self, input_value):
    return log1p(input_value)
Exemple #15
0
class ML_HyperbolicTangent(ML_Function("ml_tanh")):
    """ Implementation of hyperbolic tangent function """
    def __init__(self, args=DefaultArgTemplate):
        # initializing base class
        ML_FunctionBasis.__init__(self, args)

    @staticmethod
    def get_default_args(**kw):
        """ Return a structure containing the arguments for ML_HyperbolicTangent,
            builtin from a default argument mapping overloaded with @p kw """
        default_args_tanh = {
            "output_file": "my_tanh.c",
            "function_name": "my_tanh",
            "precision": ML_Binary32,
            "accuracy": ML_Faithful,
            "target": GenericProcessor()
        }
        default_args_tanh.update(kw)
        return DefaultArgTemplate(**default_args_tanh)

    def generate_approx_poly_near_zero(self, function, high_bound, error_bound,
                                       variable):
        """ Generate polynomial approximation scheme """
        error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(
            p - f, ai)
        # Some issues encountered when 0 is one of the interval bound
        # so we use a symetric interval around it
        approx_interval = Interval(-high_bound, high_bound)
        local_function = function / sollya.x

        degree = sollya.sup(
            sollya.guessdegree(local_function, approx_interval, error_bound))
        degree_list = range(0, int(degree) + 1, 1)

        poly_object, approx_error = Polynomial.build_from_approximation_with_error(
            function / sollya.x,
            degree_list, [1] + [self.precision] * (len(degree_list) - 1),
            approx_interval,
            sollya.absolute,
            error_function=error_function)
        Log.report(
            Log.Info, "approximation poly: {}\n  with error {}".format(
                poly_object, approx_error))

        poly_scheme = Multiplication(
            variable,
            PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, variable, self.precision))
        return poly_scheme, approx_error

    def generate_scheme(self):
        """ Generating implementation script for hyperic tangent
            meta-function """
        # registering the single input variable to the function
        vx = self.implementation.add_input_variable("x", self.precision)

        #Log.set_dump_stdout(True)
        # tanh(x) = sinh(x) / cosh(x)
        #         = (e^x - e^-x) / (e^x + e^-x)
        #         = (e^(2x) - 1) / (e^(2x) + 1)
        #   when x -> +inf, tanh(x) -> 1
        #   when x -> -inf, tanh(x) -> -1
        #   ~0 e^x    ~ 1 + x - x^2 / 2 + x^3 / 6 + ...
        #      e^(-x) ~ 1 - x - x^2 / 2- x^3/6 + ...
        #   when x -> 0, tanh(x) ~ (2 (x + x^3/6 + ...)) / (2 - x^2 + ...) ~ x
        # We can divide the input interval into 3 parts
        # positive, around 0, and finally negative

        # Possible argument reduction
        # x = m.2^E = k * log(2) + r
        # (k != 0) => tanh(x) = (2k * e^(2r) - 1) / (2k * e^(2r) + 1)
        #                     = (1 - 1 * e^(-2r) / 2k) / (1 + e^(-2r) / 2k)
        #
        # tanh(x) = (e^(2x) - 1) / (e^(2x) + 1)
        #         = (e^(2x) + 1 - 1- 1) / (e^(2x) + 1)
        #         = 1 - 2 / (e^(2x) + 1)

        # tanh is odd so we reduce the computation to the absolute value of
        # vx
        abs_vx = Abs(vx, precision=self.precision)

        # if p is the expected output precision
        # x > (p+2) * log(2) / 2 => tanh(x) = 1 - eps
        #   where eps < 1/2 * 2^-p
        p = self.precision.get_mantissa_size()
        high_bound = (p + 2) * sollya.log(2) / 2
        near_zero_bound = 0.125
        interval_num = 1024

        interval_size = (high_bound - near_zero_bound) / (1024)
        new_interval_size = sollya.S2**int(sollya.log2(interval_size))
        interval_num *= 2
        high_bound = new_interval_size * interval_num + near_zero_bound

        # Near 0 approximation
        near_zero_scheme, near_zero_error = self.generate_approx_poly_near_zero(
            sollya.tanh(sollya.x), near_zero_bound, S2**-p, abs_vx)

        # approximation parameters
        poly_degree = 5
        approx_interval = Interval(near_zero_bound, high_bound)

        sollya.settings.points = 117

        approx_scheme, approx_error = piecewise_approximation(
            sollya.tanh,
            abs_vx,
            self.precision,
            bound_low=near_zero_bound,
            bound_high=high_bound,
            num_intervals=interval_num,
            max_degree=5,
            error_threshold=sollya.S2**-p)
        Log.report(Log.Warning, "approx_error={}".format(approx_error))

        complete_scheme = Select(
            abs_vx < near_zero_bound, near_zero_scheme,
            Select(abs_vx < high_bound, approx_scheme,
                   Constant(1.0, precision=self.precision)))

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        scheme = Return(Select(vx < 0, Negation(complete_scheme),
                               complete_scheme),
                        precision=self.precision)
        return scheme

    def numeric_emulate(self, input_value):
        return tanh(input_value)

    standard_test_cases = [[sollya.parse(x)] for x in [
        "-0x1.572306p+0", "0x1.af0bf2p+1", "-0x1.af0bf2p+1", "-0x1.51b618p-13",
        "0x1.ffb99ep-1"
    ]]
Exemple #16
0
class ML_Log10(ML_Function("log10")):
    def __init__(self, args):
        # initializing base class
        ML_FunctionBasis.__init__(self, args)
        self.basis = args.basis

    @staticmethod
    def get_default_args(**kw):
        """ Return a structure containing the arguments for ML_Log10,
        builtin from a default argument mapping overloaded with @p kw """
        default_args_log10 = {
            "output_file": "ml_log10f.c",
            "function_name": "ml_log10f",
            "basis": 10,
            "precision": ML_Binary32,
            "accuracy": ML_Faithful,
            "target": GenericProcessor()
        }
        default_args_log10.update(kw)
        return DefaultArgTemplate(**default_args_log10)

    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        log2_hi_value = round(
            log_f(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), RN)
        log2_lo_value = round(
            log_f(2) - log2_hi_value, self.precision.sollya_object, RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = self.precision.get_integer_format()

        #---------------------
        # Approximation scheme
        #---------------------
        # log10(x) = log10(m.2^e) = log10(m.2^(e-t+t))
        #           = log10(m.2^-t) + (e+t) log10(2)
        #  t = (m > sqrt(2)) ? 1 : 0  is used to avoid catastrophic cancellation
        #  when e = -1 and m ~ 2
        #
        #
        # log10(m.2^-t) = log10(m.r/r.2^-t) = log10(m.r) + log10(2^-t/r)
        #               = log10(m.r) - log10(r.2^t)
        #     where r = rcp(m) an approximation of 1/m such that r.m ~ 1

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = inv_approx_table.index_size
        table_index_range = range(1, 2**table_index_size)
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision)
        log_table_tho = ML_NewTable(dimensions=[2**table_index_size, 2],
                                    storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        log_table_tho[0][0] = 0.0
        log_table_tho[0][1] = 0.0
        hi_size = self.precision.get_field_size() - (
            self.precision.get_exponent_size() + 1)
        for i in table_index_range:
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            inv_value = inv_approx_table[i]
            value_high = round(log_f(inv_value), hi_size, sollya.RN)
            value_low = round(
                log_f(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

            inv_value_tho = S2 * inv_approx_table[i]
            value_high_tho = round(log_f(inv_value_tho), hi_size, sollya.RN)
            value_low_tho = round(
                log_f(inv_value_tho) - value_high_tho, sollya_precision,
                sollya.RN)
            log_table_tho[i][0] = value_high_tho
            log_table_tho[i][1] = value_low_tho

        # determining log_table range
        high_index_function = lambda table, i: table[i][0]
        low_index_function = lambda table, i: table[i][1]
        table_high_interval = log_table.get_subset_interval(
            high_index_function, table_index_range)
        table_low_interval = log_table.get_subset_interval(
            low_index_function, table_index_range)

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_multi)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            table_index = inv_approx_table.index_function(_vx_mant)

            table_index.set_attributes(tag="table_index", debug=debug_multi)

            tho_cond = _vx_mant > Constant(sollya.sqrt(2),
                                           precision=self.precision)
            tho = Select(tho_cond,
                         Constant(1.0, precision=self.precision),
                         Constant(0.0, precision=self.precision),
                         precision=self.precision,
                         tag="tho",
                         debug=debug_multi)

            rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp")
            r = Multiplication(rcp,
                               _vx_mant,
                               precision=self.precision,
                               tag="r")

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_multi)
            _red_vx = arg_red_index * _vx_mant - 1.0
            inv_err = S2**-6
            red_interval = Interval(1 - inv_err, 1 + inv_err)
            _red_vx.set_attributes(tag="_red_vx",
                                   debug=debug_multi,
                                   interval=red_interval)

            # return in case of standard (non-special) input
            _log_inv_lo = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 1),
                                 TableLoad(log_table, table_index, 1),
                                 tag="log_inv_lo",
                                 debug=debug_multi)

            _log_inv_hi = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 0),
                                 TableLoad(log_table, table_index, 0),
                                 tag="log_inv_hi",
                                 debug=debug_multi)

            Log.report(Log.Info, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + x) / x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Info, "generating polynomial evaluation scheme")
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_multi)
            Log.report(Log.Info, poly_object.get_sollya_object())

            corr_exp = Conversion(_vx_exp if exp_corr_factor == None else
                                  _vx_exp + exp_corr_factor,
                                  precision=self.precision) + tho
            corr_exp.set_attributes(tag="corr_exp", debug=debug_multi)

            # _poly approximates log10(1+r)/r
            # _poly * red_vx approximates log10(x)

            m0h, m0l = Mul211(_red_vx, _poly)
            m0h, m0l = Add212(_red_vx, m0h, m0l)
            m0h.set_attributes(tag="m0h", debug=debug_multi)
            m0l.set_attributes(tag="m0l")
            l0_h = corr_exp * log2_hi
            l0_l = corr_exp * log2_lo
            l0_h.set_attributes(tag="l0_h")
            l0_l.set_attributes(tag="l0_l")
            rh, rl = Add222(l0_h, l0_l, m0h, m0l)
            rh.set_attributes(tag="rh0", debug=debug_multi)
            rl.set_attributes(tag="rl0", debug=debug_multi)
            rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl)
            rh.set_attributes(tag="rh", debug=debug_multi)
            rl.set_attributes(tag="rl", debug=debug_multi)

            if sollya.log(self.basis) != 1.0:
                lbh = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis))
                lbl = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis) - lbh)
                rh, rl = Mul222(rh, rl, lbh, lbl)
                return rh
            else:
                return rh

        result = compute_log(vx)
        result.set_attributes(tag="result", debug=debug_multi)

        if False:
            # building eval error map
            eval_error_map = {
                red_vx:
                Variable("red_vx",
                         precision=self.precision,
                         interval=red_vx.get_interval()),
                log_inv_hi:
                Variable("log_inv_hi",
                         precision=self.precision,
                         interval=table_high_interval),
                log_inv_lo:
                Variable("log_inv_lo",
                         precision=self.precision,
                         interval=table_low_interval),
                corr_exp:
                Variable("corr_exp_g",
                         precision=self.precision,
                         interval=self.precision.get_exponent_interval()),
            }
            # computing gappa error
            if is_gappa_installed():
                poly_eval_error = self.get_eval_error(result, eval_error_map)
                Log.report(Log.Info, "poly_eval_error: ", poly_eval_error)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debugd,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debugd,
                         likely=False)

        # exp=-1 case
        Log.report(Log.Info, "managing exp=-1 case")
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_multi)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal = compute_log(vx * S2100, exp_corr_factor=m100)

        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)), Return(result))))
        scheme = pre_scheme
        return scheme

    def numeric_emulate(self, input_value):
        return sollya.log(input_value) / sollya.log(self.basis)

    standard_test_cases = [(sollya.parse("0x1.42af3ap-1"), None)]