コード例 #1
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="log1pf.c",
                 function_name="log1pf"):
        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        self.precision = precision
        self.processor = target
        func_implementation = CodeFunction(self.function_name,
                                           output_format=self.precision)
        vx = func_implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # debug utilities
        debugf = ML_Debug(display_format="%f")
        debuglf = ML_Debug(display_format="%lf")
        debugx = ML_Debug(display_format="%x")
        debuglx = ML_Debug(display_format="%\"PRIx64\"", )
        debugd = ML_Debug(display_format="%d",
                          pre_process=lambda v: "(int) %s" % v)
        debugld = ML_Debug(display_format="%ld")
        #debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_lftolx = ML_Debug(
            display_format="%\"PRIx64\" ev=%x",
            pre_process=lambda v:
            "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v)
        debug_ddtolx = ML_Debug(
            display_format="%\"PRIx64\" %\"PRIx64\"",
            pre_process=lambda v:
            "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" %
            (v, v))
        debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}",
                            pre_process=lambda v: "%s.hi, %s.lo" % (v, v))

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        log2_hi_value = round(
            log(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), sollya.RN)
        log2_lo_value = round(
            log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = 7
        log_table = ML_Table(dimensions=[2**table_index_size, 2],
                             storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in xrange(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            inv_value = (1.0 + (inv_approx_table[i][0] / S2**9)) * S2**-1
            value_high = round(
                log(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        # case close to 0: ctz
        ctz_exp_limit = -7
        ctz_cond = vx_exp < ctz_exp_limit
        ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

        ctz_poly_degree = sup(
            guessdegree(
                log1p(sollya.x) / sollya.x, ctz_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        ctz_poly_object = Polynomial.build_from_approximation(
            log1p(sollya.x) / sollya.x, ctz_poly_degree,
            [self.precision] * (ctz_poly_degree + 1), ctz_interval,
            sollya.absolute)

        print "generating polynomial evaluation scheme"
        ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            ctz_poly_object, vx, unified_precision=self.precision)
        ctz_poly.set_attributes(tag="ctz_poly", debug=debug_lftolx)

        ctz_result = vx * ctz_poly

        neg_input = Comparison(vx,
                               -1,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")

        log_function_code = CodeFunction(
            "new_log", [Variable("x", precision=ML_Binary64)],
            output_format=ML_Binary64)
        log_call_generator = FunctionOperator(
            log_function_code.get_name(),
            arity=1,
            output_precision=ML_Binary64,
            declare_prototype=log_function_code)
        newlog_function = FunctionObject(log_function_code.get_name(),
                                         (ML_Binary64, ), ML_Binary64,
                                         log_call_generator)

        # case away from 0.0
        pre_vxp1 = vx + 1.0
        pre_vxp1.set_attributes(tag="pre_vxp1", debug=debug_lftolx)
        pre_vxp1_exp = ExponentExtraction(pre_vxp1,
                                          tag="pre_vxp1_exp",
                                          debug=debugd)
        cm500 = Constant(-500, precision=ML_Int32)
        c0 = Constant(0, precision=ML_Int32)
        cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size() -
                                          2)
        scaling_factor_exp = Select(cond_scaling, cm500, c0)
        scaling_factor = ExponentInsertion(scaling_factor_exp,
                                           precision=self.precision,
                                           tag="scaling_factor")

        vxp1 = pre_vxp1 * scaling_factor
        vxp1.set_attributes(tag="vxp1", debug=debug_lftolx)
        vxp1_exp = ExponentExtraction(vxp1, tag="vxp1_exp", debug=debugd)

        vxp1_inv = DivisionSeed(vxp1,
                                precision=self.precision,
                                tag="vxp1_inv",
                                debug=debug_lftolx,
                                silent=True)

        vxp1_dirty_inv = ExponentInsertion(-vxp1_exp,
                                           precision=self.precision,
                                           tag="vxp1_dirty_inv",
                                           debug=debug_lftolx)

        table_index = BitLogicAnd(BitLogicRightShift(
            TypeCast(vxp1, precision=int_precision, debug=debuglx),
            self.precision.get_field_size() - 7,
            debug=debuglx),
                                  0x7f,
                                  tag="table_index",
                                  debug=debuglx)

        # argument reduction
        # TODO: detect if single operand inverse seed is supported by the targeted architecture
        pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv,
                                                          precision=ML_UInt64),
                                                 Constant(-2,
                                                          precision=ML_UInt64),
                                                 precision=ML_UInt64),
                                     precision=self.precision,
                                     tag="pre_arg_red_index",
                                     debug=debug_lftolx)
        arg_red_index = Select(Equal(table_index, 0),
                               vxp1_dirty_inv,
                               pre_arg_red_index,
                               tag="arg_red_index",
                               debug=debug_lftolx)

        red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0,
                          (arg_red_index * vx - 1.0) + arg_red_index)
        #red_vxp1 = arg_red_index * vxp1 - 1.0
        red_vxp1.set_attributes(tag="red_vxp1", debug=debug_lftolx)

        log_inv_lo = TableLoad(log_table,
                               table_index,
                               1,
                               tag="log_inv_lo",
                               debug=debug_lftolx)
        log_inv_hi = TableLoad(log_table,
                               table_index,
                               0,
                               tag="log_inv_hi",
                               debug=debug_lftolx)

        inv_err = S2**-6  # TODO: link to target DivisionSeed precision

        print "building mathematical polynomial"
        approx_interval = Interval(-inv_err, inv_err)
        poly_degree = sup(
            guessdegree(
                log(1 + sollya.x) / sollya.x, approx_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        global_poly_object = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree,
            [self.precision] * (poly_degree + 1), approx_interval,
            sollya.absolute)
        poly_object = global_poly_object.sub_poly(start_index=1)

        print "generating polynomial evaluation scheme"
        _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vxp1, unified_precision=self.precision)
        _poly.set_attributes(tag="poly", debug=debug_lftolx)
        print global_poly_object.get_sollya_object()

        vxp1_inv_exp = ExponentExtraction(vxp1_inv,
                                          tag="vxp1_inv_exp",
                                          debug=debugd)
        corr_exp = -vxp1_exp + scaling_factor_exp  # vxp1_inv_exp

        #poly = (red_vxp1) * (1 +  _poly)
        #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

        pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly +
                                    (-corr_exp * log2_lo - log_inv_lo))
        pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
        exact_log2_hi_exp = -corr_exp * log2_hi
        exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp",
                                         debug=debug_lftolx,
                                         prevent_optimization=True)
        #std_result =  exact_log2_hi_exp + pre_result

        exact_log2_lo_exp = -corr_exp * log2_lo
        exact_log2_lo_exp.set_attributes(
            tag="exact_log2_lo_exp",
            debug=debug_lftolx)  #, prevent_optimization = True)

        init = exact_log2_lo_exp - log_inv_lo
        init.set_attributes(tag="init",
                            debug=debug_lftolx,
                            prevent_optimization=True)
        fma0 = (red_vxp1 * _poly + init)  # - log_inv_lo)
        fma0.set_attributes(tag="fma0", debug=debug_lftolx)
        step0 = fma0
        step0.set_attributes(
            tag="step0", debug=debug_lftolx)  #, prevent_optimization = True)
        step1 = step0 + red_vxp1
        step1.set_attributes(tag="step1",
                             debug=debug_lftolx,
                             prevent_optimization=True)
        step2 = -log_inv_hi + step1
        step2.set_attributes(tag="step2",
                             debug=debug_lftolx,
                             prevent_optimization=True)
        std_result = exact_log2_hi_exp + step2
        std_result.set_attributes(tag="std_result",
                                  debug=debug_lftolx,
                                  prevent_optimization=True)

        # main scheme
        print "MDL scheme"
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal, Return(vx),
                    ConditionBlock(ctz_cond, Statement(Return(ctz_result), ),
                                   Statement(Return(std_result))))))
        scheme = pre_scheme

        #print scheme.get_str(depth = None, display_precision = True)

        opt_eng = OptimizationEngine(self.processor)

        # fusing FMA
        print "MDL fusing FMA"
        scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        #print scheme.get_str(depth = None, display_precision = True)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=ML_Binary32)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        print "silencing operation"
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        func_implementation.set_scheme(scheme)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        opt_eng.factorize_fast_path(scheme)
        #print scheme.get_str(depth = None, display_precision = True)

        cg = CCodeGenerator(self.processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = func_implementation.get_definition(cg,
                                                         C_Code,
                                                         static_cst=True)
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        #print self.result.get(cg)
        output_stream = open("%s.c" % func_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
コード例 #2
0
ファイル: ml_log1p.py プロジェクト: metalibm/metalibm
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", self.precision)
        sollya_precision = self.get_input_precision().sollya_object

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
                kwords["arg_value"] = vx
                kwords["function_name"] = self.function_name
                return RaiseReturn(*args, **kwords)

        # 2-limb approximation of log(2)
        # hi part precision is reduced to provide exact operation
        # when multiplied by an exponent value
        log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)


        int_precision = self.precision.get_integer_format()

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_rcp_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(dummy_rcp_seed, language = None, table_getter = lambda self: self.approx_table_map)

        # table creation
        table_index_size = inv_approx_table.index_size
        log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
        # storing accurate logarithm approximation of value returned
        # by the fast reciprocal operation
        for i in range(0, 2**table_index_size):
            inv_value = inv_approx_table[i]
            value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low


        neg_input = Comparison(vx, -1, likely=False, precision=ML_Bool, specifier=Comparison.Less, debug=debug_multi, tag="neg_input")
        vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, precision=ML_Bool, debug=debug_multi, tag="nan_or_inf")
        vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan")
        vx_inf    = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf")
        vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal")

        # for x = m.2^e, such that e >= 0
        #
        # log(1+x) = log(1 + m.2^e)
        #          = log(2^e . 2^-e + m.2^e)
        #          = log(2^e . (2^-e + m))
        #          = log(2^e) + log(2^-e + m)
        #          = e . log(2) + log (2^-e + m)
        #
        # t = (2^-e + m)
        # t = m_t . 2^e_t
        # r ~ 1 / m_t   => r.m_t ~ 1 ~ 0
        #
        # t' = t . 2^-e_t
        #    = 2^-e-e_t + m . 2^-e_t
        #
        # if e >= 0, then 2^-e <= 1, then 1 <= m + 2^-e <= 3
        # r = m_r . 2^e_r
        #
        # log(1+x) = e.log(2) + log(r . 2^e_t . 2^-e_t . (2^-e + m) / r)
        #          = e.log(2) + log(r . 2^(-e-e_t) + r.m.2^-e_t) + e_t . log(2)- log(r)
        #          = (e+e_t).log(2) + log(r . t') - log(r)
        #          = (e+e_t).log(2) + log(r . t') - log(r)
        #          = (e+e_t).log(2) + P_log1p(r . t' - 1) - log(r)
        #
        #

        # argument reduction
        m = MantissaExtraction(vx, tag="vx", precision=self.precision, debug=debug_multi)
        e = ExponentExtraction(vx, tag="e", precision=int_precision, debug=debug_multi)

        # 2^-e
        TwoMinusE = ExponentInsertion(-e, tag="Two_minus_e", precision=self.precision, debug=debug_multi)
        t = Addition(TwoMinusE, m, precision=self.precision, tag="t", debug=debug_multi)

        m_t = MantissaExtraction(t, tag="m_t", precision=self.precision, debug=debug_multi)
        e_t = ExponentExtraction(t, tag="e_t", precision=int_precision, debug=debug_multi)

        # 2^(-e-e_t)
        TwoMinusEEt = ExponentInsertion(-e-e_t, tag="Two_minus_e_et", precision=self.precision)
        TwoMinusEt = ExponentInsertion(-e_t, tag="Two_minus_et", precision=self.precision, debug=debug_multi)

        rcp_mt = ReciprocalSeed(m_t, tag="rcp_mt", precision=self.precision, debug=debug_multi)

        INDEX_SIZE = table_index_size
        table_index = generic_mantissa_msb_index_fct(INDEX_SIZE, m_t)
        table_index.set_attributes(tag="table_index", debug=debug_multi)

        log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_multi) 
        log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_multi)

        inv_err = S2**-6 # TODO: link to target DivisionSeed precision

        Log.report(Log.Info, "building mathematical polynomial")
        approx_interval = Interval(-inv_err, inv_err)
        approx_fct = sollya.log1p(sollya.x) / (sollya.x)
        poly_degree = sup(guessdegree(approx_fct, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
        Log.report(Log.Debug, "poly_degree is {}", poly_degree)
        global_poly_object = Polynomial.build_from_approximation(approx_fct, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
        poly_object = global_poly_object # .sub_poly(start_index=1)

        EXT_PRECISION_MAP = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble,
            ML_SingleSingle: ML_TripleSingle,
            ML_DoubleDouble: ML_TripleDouble
        }
        if not self.precision in EXT_PRECISION_MAP:
            Log.report(Log.Error, "no extended precision available for {}", self.precision)

        ext_precision = EXT_PRECISION_MAP[self.precision]

        # pre_rtp = r . 2^(-e-e_t) + m .2^-e_t
        pre_rtp = Addition(
            rcp_mt * TwoMinusEEt,
            Multiplication(
                rcp_mt,
                Multiplication(
                    m,
                    TwoMinusEt,
                    precision=self.precision,
                    tag="pre_mult",
                    debug=debug_multi,
                ),
                precision=ext_precision,
                tag="pre_mult2",
                debug=debug_multi,
            ),
            precision=ext_precision,
            tag="pre_rtp",
            debug=debug_multi
        )
        pre_red_vx = Addition(
            pre_rtp,
            -1,
            precision=ext_precision,
        )

        red_vx = Conversion(pre_red_vx, precision=self.precision, tag="red_vx", debug=debug_multi)

        Log.report(Log.Info, "generating polynomial evaluation scheme")
        poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vx, unified_precision=self.precision)

        poly.set_attributes(tag="poly", debug=debug_multi)
        Log.report(Log.Debug, "{}", global_poly_object.get_sollya_object())

        fp_e = Conversion(e + e_t, precision=self.precision, tag="fp_e", debug=debug_multi)


        ext_poly = Multiplication(red_vx, poly, precision=ext_precision)

        pre_result = Addition(
            Addition(
                fp_e * log2_hi,
                fp_e * log2_lo,
                precision=ext_precision
            ),
            Addition(
                Addition(
                    -log_inv_hi,
                    -log_inv_lo,
                    precision=ext_precision
                ),
                ext_poly,
                precision=ext_precision
            ),
            precision=ext_precision
        )

        result = Conversion(pre_result, precision=self.precision, tag="result", debug=debug_multi)


        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(neg_input,
            Statement(
                ClearException(),
                Raise(ML_FPE_Invalid),
                Return(FP_QNaN(self.precision))
            ),
            ConditionBlock(vx_nan_or_inf,
                ConditionBlock(vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(
                        ClearException(),
                        ConditionBlock(vx_snan,
                            Raise(ML_FPE_Invalid)
                        ),
                        Return(FP_QNaN(self.precision))
                    )
                ),
                Return(result)
            )
        )
        scheme = pre_scheme
        return scheme
コード例 #3
0
  def generate_scheme(self):
    """Produce an abstract scheme for the logarithm.

    This abstract scheme will be used by the code generation backend.
    """
    if self.precision not in [ML_Binary32, ML_Binary64]:
        Log.report(Log.Error, "The demanded precision is not supported")

    vx = self.implementation.add_input_variable("x", self.precision)


    def default_bool_convert(optree, precision=None, **kw):
        return bool_convert(optree, precision, -1, 0, **kw) \
                if isinstance(self.processor, VectorBackend) \
                else bool_convert(optree, precision, 1, 0, **kw)

    precision = self.precision.sollya_object
    int_prec = self.precision.get_integer_format()
    Log.report(Log.Info, "int_prec is %s" % int_prec)
    uint_prec = self.precision.get_unsigned_integer_format()


    Log.report(Log.Info, "MDL constants")
    cgpe_scheme_idx = int(self.cgpe_index)
    table_index_size = int(self.tbl_index_size)
    #
    table_nb_elements = 2**(table_index_size)
    table_dimensions = [2*table_nb_elements]  # two values are stored for each element
    field_size = Constant(self.precision.get_field_size(),
                          precision = int_prec,
                          tag = 'field_size')
    if self.log_radix == EXP_1:
      log2_hi = Constant(
        round(log(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log(2) - round(log(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    elif self.log_radix == 10:
      log2_hi = Constant(
        round(log10(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log10(2) - round(log10(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    # ... if log_radix == '2' then log2(2) == 1

    # subnormal_mask aims at trapping positive subnormals except zero.
    # That's why we will subtract 1 to the integer bitstring of the input, and
    # then compare for Less (strict) the resulting integer bitstring to this
    # mask, e.g.  0x7fffff for binary32.
    if self.no_subnormal == False:
      subnormal_mask = Constant((1 << self.precision.get_field_size()) - 1,
                                precision = int_prec, tag = 'subnormal_mask')
    fp_one = Constant(1.0, precision = self.precision, tag = 'fp_one')
    fp_one_as_uint = TypeCast(fp_one, precision = uint_prec,
                              tag = 'fp_one_as_uint')
    int_zero = Constant(0, precision = int_prec, tag = 'int_zero')
    int_one  = Constant(1, precision = int_prec, tag = 'int_one')
    table_mantissa_half_ulp = Constant(
            1 << (self.precision.field_size - table_index_size - 1),
            precision = int_prec
            )
    table_s_exp_index_mask = Constant(
            ~((table_mantissa_half_ulp.get_value() << 1) - 1),
            precision = uint_prec
            )

    Log.report(Log.Info, "MDL table")
    # The table holds approximations of -log(2^tau * r_i) so we first compute
    # the index value for which tau changes from 1 to 0.
    cut = sqrt(2.)
    tau_index_limit = floor(table_nb_elements * (2./cut - 1))
    sollya_logtbl = [
      (-log1p(float(i) / table_nb_elements)
      + (0 if i <= tau_index_limit else log(2.))) / log(self.log_radix)
      for i in range(table_nb_elements)
    ]
    # ...
    init_logtbl_hi = [
            round(sollya_logtbl[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl_lo = [
            round(sollya_logtbl[i] - init_logtbl_hi[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl = [tmp[i] for i in range(len(init_logtbl_hi)) for tmp in [init_logtbl_hi, init_logtbl_lo]]
    log1p_table = ML_NewTable(dimensions = table_dimensions,
                              storage_precision = self.precision,
                              init_data = init_logtbl,
                              tag = 'ml_log1p_table')
    # ...
    if self.no_rcp:
      sollya_rcptbl = [
        (1/((1+float(i)/table_nb_elements)+2**(-1-int(self.tbl_index_size))))
        for i in range(table_nb_elements)
      ]
      init_rcptbl = [
            round(sollya_rcptbl[i],
                  int(self.tbl_index_size)+1, # self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
      ]
      rcp_table = ML_NewTable(dimensions = [table_nb_elements],
                              storage_precision = self.precision,
                              init_data = init_rcptbl,
                              tag = 'ml_rcp_table')
    # ...

    Log.report(Log.Info, 'MDL unified subnormal handling')
    vx_as_int = TypeCast(vx, precision = int_prec, tag = 'vx_as_int')
    if self.no_subnormal == False:
      vx_as_uint = TypeCast(vx, precision = uint_prec, tag = 'vx_as_uint')
      # Avoid the 0.0 case by subtracting 1 from vx_as_int
      tmp = Comparison(vx_as_int - 1, subnormal_mask,
                       specifier = Comparison.Less)
      is_subnormal = default_bool_convert(
        tmp, # Will catch negative values as well as NaNs with sign bit set
        precision = int_prec)
      is_subnormal.set_attributes(tag = "is_subnormal")
      if not(isinstance(self.processor, VectorBackend)):
        is_subnormal = Subtraction(Constant(0, precision = int_prec),
                                   is_subnormal,
                                   precision = int_prec)

      #################################################
      # Vectorizable integer based subnormal handling #
      #################################################
      # 1. lzcnt
      # custom lzcount-like for subnormal numbers using FPU (see draft article)
      Zi = BitLogicOr(vx_as_uint, fp_one_as_uint, precision = uint_prec, tag="Zi")
      Zf = Subtraction(
        TypeCast(Zi, precision = self.precision),
        fp_one,
        precision = self.precision,
        tag="Zf")
      # Zf exponent is -(nlz(x) - exponent_size).
      # 2. compute shift value
      # Vectorial comparison on x86+sse/avx is going to look like
      # '|0x00|0xff|0x00|0x00|' and that's why we use Negate.
      # But for scalar code generation, comparison will rather be either 0 or 1
      # in C. Thus mask below won't be correct for a scalar implementation.
      # FIXME: Can we know the backend that will be called and choose in
      # consequence? Should we make something arch-agnostic instead?
      #
      n_value = BitLogicAnd(
        Addition(
          DirtyExponentExtraction(Zf, self.precision),
          Constant(
            self.precision.get_bias(),
            precision = int_prec),
          precision = int_prec),
        is_subnormal,
        precision = int_prec,
        tag = "n_value")
      alpha = Negation(n_value, tag="alpha")
      #
      # 3. shift left
      # renormalized_mantissa = BitLogicLeftShift(vx_as_int, value)
      normal_vx_as_int = BitLogicLeftShift(vx_as_int, alpha)
      # 4. set exponent to the right value
      # Compute the exponent to add : (p-1)-(value) + 1 = p-1-value
      # The final "+ 1" comes from the fact that once renormalized, the
      # floating-point datum has a biased exponent of 1
      #tmp0 = Subtraction(
      #        field_size,
      #        value,
      #        precision = int_prec,
      #        tag="tmp0")
      # Set the value to 0 if the number is not subnormal
      #tmp1 = BitLogicAnd(tmp0, is_subnormal)
      #renormalized_exponent = BitLogicLeftShift(
      #        tmp1,
      #        field_size
      #        )
    else: # no_subnormal == True
      normal_vx_as_int = vx_as_int
      
    #normal_vx_as_int = renormalized_mantissa + renormalized_exponent
    normal_vx = TypeCast(normal_vx_as_int, precision = self.precision,
                         tag = 'normal_vx')

    # alpha = BitLogicAnd(field_size, is_subnormal, tag = 'alpha')
    # XXX Extract the mantissa, see if this is supported in the x86 vector
    # backend or if it still uses the support_lib.
    vx_mantissa = MantissaExtraction(normal_vx, precision = self.precision)

    Log.report(Log.Info, "MDL scheme")
    if self.force_division == True:
      rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    elif self.no_rcp == False:
      rcp_m = ReciprocalSeed(vx_mantissa, precision = self.precision)
      if not self.processor.is_supported_operation(rcp_m):
        if self.precision == ML_Binary64:
          # Try using a binary32 FastReciprocal
          binary32_m = Conversion(vx_mantissa, precision = ML_Binary32)
          rcp_m = ReciprocalSeed(binary32_m, precision = ML_Binary32)
          rcp_m = Conversion(rcp_m, precision = ML_Binary64)
        if not self.processor.is_supported_operation(rcp_m):
          # FIXME An approximation table could be used instead but for vector
          # implementations another GATHER would be required.
          # However this may well be better than a division...
          rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    else: # ... use a look-up table
      rcp_shift = BitLogicLeftShift(normal_vx_as_int, self.precision.get_exponent_size() + 1)
      rcp_idx = BitLogicRightShift(rcp_shift, self.precision.get_exponent_size() + 1 + self.precision.get_field_size() - int(self.tbl_index_size))
      rcp_m = TableLoad(rcp_table, rcp_idx, tag = 'rcp_idx',
                        debug = debug_multi)
    #  
    rcp_m.set_attributes(tag = 'rcp_m')

    # exponent is normally either 0 or -1, since m is in [1, 2). Possible
    # optimization?
    # exponent = ExponentExtraction(rcp_m, precision = self.precision,
    #         tag = 'exponent')

    ri_round = TypeCast(
            Addition(
                TypeCast(rcp_m, precision = int_prec),
                table_mantissa_half_ulp,
                precision = int_prec
                ),
            precision = uint_prec
            )
    ri_fast_rndn = BitLogicAnd(
            ri_round,
            table_s_exp_index_mask,
            tag = 'ri_fast_rndn',
            precision = uint_prec
            )
    # u = m * ri - 1
    ul = None
    if self.no_rcp == True: # ... u does not fit on a single word
      tmp_u, tmp_ul = Mul211(vx_mantissa,         
                             TypeCast(ri_fast_rndn, precision = self.precision), 
                             fma = (self.no_fma == False))
      fp_minus_one = Constant(-1.0, precision = self.precision, tag = 'fp_minus_one')
      u, ul = Add212(fp_minus_one, tmp_u, tmp_ul)      
      u.set_attributes(tag='uh')
      ul.set_attributes(tag='ul')
    elif self.no_fma == False:
      u = FusedMultiplyAdd(
        vx_mantissa,
        TypeCast(ri_fast_rndn, precision = self.precision),
        fp_one,
        specifier = FusedMultiplyAdd.Subtract,
        tag = 'u')
    else: # disable FMA
      # tmph + tmpl = m * ri, where tmph ~ 1
      tmph, tmpl = Mul211(vx_mantissa,         
                          TypeCast(ri_fast_rndn, precision = self.precision), 
                          fma = False)
      # u_tmp = tmph - 1 ... exact due to Sterbenz
      u_tmp = Subtraction(tmph, fp_one, precision = self.precision)
      # u = u_tmp - tmpl ... exact since the result u is representable as a single word
      u = Addition(u_tmp, tmpl, precision = self.precision, tag = 'u')
    
    unneeded_bits = Constant(
            self.precision.field_size - table_index_size,
            precision=uint_prec,
            tag="unneeded_bits"
            )
    assert self.precision.field_size - table_index_size >= 0
    ri_bits = BitLogicRightShift(
            ri_fast_rndn,
            unneeded_bits,
            precision = uint_prec,
            tag = "ri_bits"
            )
    # Retrieve mantissa's MSBs + first bit of exponent, for tau computation in case
    # exponent is 0 (i.e. biased 127, i.e. first bit of exponent is set.).
    # In this particular case, i = 0 but tau is 1
    # table_index does not need to be as long as uint_prec might be,
    # try and keep it the size of size_t.
    size_t_prec = ML_UInt32
    signed_size_t_prec = ML_Int32
    table_index_mask = Constant(
            (1 << (table_index_size + 1)) - 1,
            precision = size_t_prec
            )
    table_index = BitLogicAnd(
            Conversion(ri_bits, precision = size_t_prec),
            table_index_mask,
            tag = 'table_index',
            precision = size_t_prec
            )
    # Compute tau using the tau_index_limit value.
    tmp = default_bool_convert(
            Comparison(
                TypeCast(table_index, precision = signed_size_t_prec),
                Constant(tau_index_limit, precision = signed_size_t_prec),
                specifier = Comparison.Greater
                if isinstance(self.processor, VectorBackend)
                else Comparison.LessOrEqual
                ),
            precision = signed_size_t_prec,
            tag="tmp"
            )
    # A true tmp will typically be -1 for VectorBackends, but 1 for standard C.
    tau = Conversion(
        Addition(tmp, Constant(1, precision=signed_size_t_prec), precision = signed_size_t_prec, tag="pre_add")
            if isinstance(self.processor, VectorBackend)
            else tmp,
            precision=int_prec,
            tag="pre_tau"
        )
    tau.set_attributes(tag = 'tau')
    # Update table_index: keep only table_index_size bits
    table_index_hi = BitLogicAnd(
            table_index,
            Constant((1 << table_index_size) - 1, precision = size_t_prec),
            precision = size_t_prec
            )
    # table_index_hi = table_index_hi << 1
    table_index_hi = BitLogicLeftShift(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_hi"
            )
    # table_index_lo = table_index_hi + 1
    table_index_lo = Addition(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_lo"
            )

    tbl_hi = TableLoad(log1p_table, table_index_hi, tag = 'tbl_hi',
                       debug = debug_multi)
    tbl_lo = TableLoad(log1p_table, table_index_lo, tag = 'tbl_lo',
                       debug = debug_multi)
    # Compute exponent e + tau - alpha, but first subtract the bias.
    if self.no_subnormal == False:
      tmp_eptau = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
      exponent = Subtraction(tmp_eptau, alpha, precision = int_prec)
    else:
      exponent = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
    #
    fp_exponent = Conversion(exponent, precision = self.precision,
                             tag = 'fp_exponent')

    Log.report(Log.Info, 'MDL polynomial approximation')
    if self.log_radix == EXP_1:
      sollya_function = log(1 + sollya.x)
    elif self.log_radix == 2:
      sollya_function = log2(1 + sollya.x)
    elif self.log_radix == 10:
      sollya_function = log10(1 + sollya.x)
    # ...
    if self.force_division == True: # rcp accuracy is 2^(-p)
      boundrcp = 2**(-self.precision.get_precision())
    else:
      boundrcp = 1.5 * 2**(-12)           # ... see Intel intrinsics guide
      if self.precision in [ML_Binary64]:
        if not self.processor.is_supported_operation(rcp_m):
          boundrcp = (1+boundrcp)*(1+2**(-24)) - 1
        else:
          boundrcp = 2**(-14)             # ... see Intel intrinsics guide
    arg_red_mag = boundrcp + 2**(-table_index_size-1) + boundrcp * 2**(-table_index_size-1)
    if self.no_rcp == False:
      approx_interval = Interval(-arg_red_mag, arg_red_mag)
    else:
      approx_interval = Interval(-2**(-int(self.tbl_index_size)+1),2**(-int(self.tbl_index_size)+1))
    max_eps = 2**-(2*(self.precision.get_field_size()))
    Log.report(Log.Info, "max acceptable error for polynomial = {}".format(float.hex(max_eps)))
    poly_degree = sup(
            guessdegree(
                sollya_function,
                approx_interval,
                max_eps,
                )
            )
    Log.report(Log.Info, "poly degree is ", poly_degree)
    if self.log_radix == EXP_1:
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(2, int(poly_degree) + 1), # Force 1st 2 coeffs to 0 and 1, resp.
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree - 1),
        approx_interval,
        sollya.absolute,
        0 + sollya._x_) # Force the first 2 coefficients to 0 and 1, resp.
    else: # ... == '2' or '10'
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(1, int(poly_degree) + 1), # Force 1st coeff to 0
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree),
        approx_interval,
        sollya.absolute,
        0) # Force the first coefficients to 0

    Log.report(Log.Info, str(poly_object))

    constant_precision = ML_SingleSingle if self.precision == ML_Binary32 \
            else ML_DoubleDouble if self.precision == ML_Binary64 \
            else None
    if is_cgpe_available():
        log1pu_poly = PolynomialSchemeEvaluator.generate_cgpe_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision, scheme_id = cgpe_scheme_idx
                )
    else:
        Log.report(Log.Warning,
                "CGPE not available, falling back to std poly evaluator")
        log1pu_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision
                )

    # XXX Dirty implementation of double-(self.precision) poly
    def dirty_poly_node_conversion(node, variable_h, variable_l, use_fma):
        return dirty_multi_node_expand(
          node, self.precision, mem_map={variable_h: (variable_h, variable_l)}, fma=use_fma)
    log1pu_poly_hi, log1pu_poly_lo = dirty_poly_node_conversion(log1pu_poly, u, ul,
                                                                use_fma=(self.no_fma == False))

    log1pu_poly_hi.set_attributes(tag = 'log1pu_poly_hi')
    log1pu_poly_lo.set_attributes(tag = 'log1pu_poly_lo')

    # Compute log(2) * (e + tau - alpha)
    if self.log_radix != 2: # 'e' or '10'
      log2e_hi, log2e_lo = Mul212(fp_exponent, log2_hi, log2_lo, 
                                  fma = (self.no_fma == False))
   
    # Add log1p(u)
    if self.log_radix != 2: # 'e' or '10'
      tmp_res_hi, tmp_res_lo = Add222(log2e_hi, log2e_lo,
                                      log1pu_poly_hi, log1pu_poly_lo)
    else:
      tmp_res_hi, tmp_res_lo = Add212(fp_exponent,
                                      log1pu_poly_hi, log1pu_poly_lo)

    # Add -log(2^(tau)/m) approximation retrieved by two table lookups
    logx_hi = Add122(tmp_res_hi, tmp_res_lo, tbl_hi, tbl_lo)[0]
    logx_hi.set_attributes(tag = 'logx_hi')

    scheme = Return(logx_hi, precision = self.precision)

    return scheme
コード例 #4
0
ファイル: ml_log1p.py プロジェクト: metalibm/metalibm
 def numeric_emulate(self, input_value):
     return log1p(input_value)
コード例 #5
0
ファイル: ml_log1p.py プロジェクト: templeblock/metalibm
  def generate_scheme(self):
    vx = self.implementation.add_input_variable("x", self.precision) 
    sollya_precision = self.get_input_precision().sollya_object

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)


    log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
    log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

    log2_hi = Constant(log2_hi_value, precision = self.precision)
    log2_lo = Constant(log2_lo_value, precision = self.precision)

    vx_exp  = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    int_precision = self.precision.get_integer_format()

    # retrieving processor inverse approximation table
    dummy_var = Variable("dummy", precision = self.precision)
    dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
    inv_approx_table = self.processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map)

    # table creation
    table_index_size = 7
    log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
    log_table[0][0] = 0.0
    log_table[0][1] = 0.0
    for i in range(1, 2**table_index_size):
        #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
        inv_value = inv_approx_table[i] # (1.0 + (inv_approx_table[i] / S2**9) ) * S2**-1
        value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
        log_table[i][0] = value_high
        log_table[i][1] = value_low


    vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    # case close to 0: ctz
    ctz_exp_limit = -7
    ctz_cond = vx_exp < ctz_exp_limit
    ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

    ctz_poly_degree = sup(guessdegree(log1p(sollya.x)/sollya.x, ctz_interval, S2**-(self.precision.get_field_size()+1))) + 1
    ctz_poly_object = Polynomial.build_from_approximation(log1p(sollya.x)/sollya.x, ctz_poly_degree, [self.precision]*(ctz_poly_degree+1), ctz_interval, sollya.absolute)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(ctz_poly_object, vx, unified_precision = self.precision)
    ctz_poly.set_attributes(tag = "ctz_poly", debug = debug_lftolx)

    ctz_result = vx * ctz_poly

    neg_input = Comparison(vx, -1, likely = False, specifier = Comparison.Less, debug = debugd, tag = "neg_input")
    vx_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debugd, tag = "nan_or_inf")
    vx_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False, debug = debugd, tag = "snan")
    vx_inf  = Test(vx, specifier = Test.IsInfty, likely = False, debug = debugd, tag = "inf")
    vx_subnormal = Test(vx, specifier = Test.IsSubnormal, likely = False, debug = debugd, tag = "vx_subnormal")
    
    log_function_code = CodeFunction("new_log", [Variable("x", precision = ML_Binary64)], output_format = ML_Binary64) 
    log_call_generator = FunctionOperator(log_function_code.get_name(), arity = 1, output_precision = ML_Binary64, declare_prototype = log_function_code)
    newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64,), ML_Binary64, log_call_generator)


    # case away from 0.0
    pre_vxp1 = vx + 1.0
    pre_vxp1.set_attributes(tag = "pre_vxp1", debug = debug_lftolx)
    pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag = "pre_vxp1_exp", debug = debugd)
    cm500 = Constant(-500, precision = ML_Int32)
    c0 = Constant(0, precision = ML_Int32)
    cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size()-2)
    scaling_factor_exp = Select(cond_scaling, cm500, c0)
    scaling_factor = ExponentInsertion(scaling_factor_exp, precision = self.precision, tag = "scaling_factor")

    vxp1 = pre_vxp1 * scaling_factor
    vxp1.set_attributes(tag = "vxp1", debug = debug_lftolx)
    vxp1_exp = ExponentExtraction(vxp1, tag = "vxp1_exp", debug = debugd)

    vxp1_inv = ReciprocalSeed(vxp1, precision = self.precision, tag = "vxp1_inv", debug = debug_lftolx, silent = True)

    vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision = self.precision, tag = "vxp1_dirty_inv", debug = debug_lftolx)

    table_index = BitLogicAnd(BitLogicRightShift(TypeCast(vxp1, precision = int_precision, debug = debuglx), self.precision.get_field_size() - 7, debug = debuglx), 0x7f, tag = "table_index", debug = debuglx) 

    # argument reduction
    # TODO: detect if single operand inverse seed is supported by the targeted architecture
    pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision = ML_UInt64), Constant(-2, precision = ML_UInt64), precision = ML_UInt64), precision = self.precision, tag = "pre_arg_red_index", debug = debug_lftolx)
    arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag = "arg_red_index", debug = debug_lftolx)

    red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index)
    #red_vxp1 = arg_red_index * vxp1 - 1.0
    red_vxp1.set_attributes(tag = "red_vxp1", debug = debug_lftolx)

    log_inv_lo = TableLoad(log_table, table_index, 1, tag = "log_inv_lo", debug = debug_lftolx) 
    log_inv_hi = TableLoad(log_table, table_index, 0, tag = "log_inv_hi", debug = debug_lftolx)

    inv_err = S2**-6 # TODO: link to target DivisionSeed precision

    Log.report(Log.Info, "building mathematical polynomial")
    approx_interval = Interval(-inv_err, inv_err)
    poly_degree = sup(guessdegree(log(1+sollya.x)/sollya.x, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
    global_poly_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
    poly_object = global_poly_object.sub_poly(start_index = 1)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    _poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, red_vxp1, unified_precision = self.precision)
    _poly.set_attributes(tag = "poly", debug = debug_lftolx)
    Log.report(Log.Info, global_poly_object.get_sollya_object())


    vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag = "vxp1_inv_exp", debug = debugd)
    corr_exp = Conversion(-vxp1_exp + scaling_factor_exp, precision = self.precision)# vxp1_inv_exp

    #poly = (red_vxp1) * (1 +  _poly)
    #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

    pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo))
    pre_result.set_attributes(tag = "pre_result", debug = debug_lftolx)
    exact_log2_hi_exp = - corr_exp * log2_hi
    exact_log2_hi_exp.set_attributes(tag = "exact_log2_hi_exp", debug = debug_lftolx, prevent_optimization = True)
    #std_result =  exact_log2_hi_exp + pre_result

    exact_log2_lo_exp = - corr_exp * log2_lo
    exact_log2_lo_exp.set_attributes(tag = "exact_log2_lo_exp", debug = debug_lftolx)#, prevent_optimization = True)
    
    init = exact_log2_lo_exp  - log_inv_lo
    init.set_attributes(tag = "init", debug = debug_lftolx, prevent_optimization = True)
    fma0 = (red_vxp1 * _poly + init) # - log_inv_lo)
    fma0.set_attributes(tag = "fma0", debug = debug_lftolx)
    step0 = fma0 
    step0.set_attributes(tag = "step0", debug = debug_lftolx) #, prevent_optimization = True)
    step1 = step0 + red_vxp1
    step1.set_attributes(tag = "step1", debug = debug_lftolx, prevent_optimization = True)
    step2 = -log_inv_hi + step1
    step2.set_attributes(tag = "step2", debug = debug_lftolx, prevent_optimization = True)
    std_result = exact_log2_hi_exp + step2
    std_result.set_attributes(tag = "std_result", debug = debug_lftolx, prevent_optimization = True)


    # main scheme
    Log.report(Log.Info, "MDL scheme")
    pre_scheme = ConditionBlock(neg_input,
        Statement(
            ClearException(),
            Raise(ML_FPE_Invalid),
            Return(FP_QNaN(self.precision))
        ),
        ConditionBlock(vx_nan_or_inf,
            ConditionBlock(vx_inf,
                Statement(
                    ClearException(),
                    Return(FP_PlusInfty(self.precision)),
                ),
                Statement(
                    ClearException(),
                    ConditionBlock(vx_snan,
                        Raise(ML_FPE_Invalid)
                    ),
                    Return(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(vx_subnormal,
                Return(vx),
                ConditionBlock(ctz_cond,
                    Statement(
                        Return(ctz_result),
                    ),
                    Statement(
                        Return(std_result)
                    )
                )
            )
        )
    )
    scheme = pre_scheme
    return scheme