Example #1
0
    def __init__(self,
                 precision=ML_Binary64,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="log_fixed.c",
                 function_name="log_fixed"):
        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        self.precision = precision
        self.processor = target
        func_implementation = CodeFunction(self.function_name,
                                           output_format=self.precision)
        vx = func_implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # debug utilities
        debugf = ML_Debug(display_format="%f")
        debuglf = ML_Debug(display_format="%lf")
        debugx = ML_Debug(display_format="%x")
        debuglx = ML_Debug(display_format="%\"PRIx64\"", )
        debugd = ML_Debug(display_format="%d",
                          pre_process=lambda v: "(int) %s" % v)
        debugld = ML_Debug(display_format="%ld")
        #debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_lftolx = ML_Debug(
            display_format="%\"PRIx64\" ev=%x",
            pre_process=lambda v:
            "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v)
        debug_ddtolx = ML_Debug(
            display_format="%\"PRIx64\" %\"PRIx64\"",
            pre_process=lambda v:
            "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" %
            (v, v))
        debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}",
                            pre_process=lambda v: "%s.hi, %s.lo" % (v, v))

        vx_exp = RawSignExpExtraction(vx,
                                      tag="vx_exp",
                                      precision=ML_Int32,
                                      debug=debugd)
        vx_exp_u = Conversion(vx_exp, precision=ML_UInt32)
        vx_exp_u.set_precision(ML_UInt32)
        tt = CountLeadingZeros(vx_exp_u)
        tt_u = Conversion(tt, precision=ML_UInt32)
        t = tt_u + vx_exp_u
        scheme = Statement(Return(t))

        #print scheme.get_str(depth = None, display_precision = True)

        opt_eng = OptimizationEngine(self.processor)

        # fusing FMA
        if fuse_fma:
            print "MDL fusing FMA"
            scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        #print scheme.get_str(depth = None, display_precision = True)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=self.precision)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        print "silencing operation"
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        func_implementation.set_scheme(scheme)

        # check processor support
        opt_eng.check_processor_support(scheme)

        #print scheme.get_str(depth = None, display_precision = True)

        # factorizing fast path
        opt_eng.factorize_fast_path(scheme)
        #print scheme.get_str(depth = None, display_precision = True)

        cg = CCodeGenerator(self.processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = func_implementation.get_definition(cg,
                                                         C_Code,
                                                         static_cst=True)
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        #print self.result.get(cg)
        output_stream = open("%s.c" % func_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
Example #2
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="log1pf.c",
                 function_name="log1pf"):
        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        self.precision = precision
        self.processor = target
        func_implementation = CodeFunction(self.function_name,
                                           output_format=self.precision)
        vx = func_implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # debug utilities
        debugf = ML_Debug(display_format="%f")
        debuglf = ML_Debug(display_format="%lf")
        debugx = ML_Debug(display_format="%x")
        debuglx = ML_Debug(display_format="%\"PRIx64\"", )
        debugd = ML_Debug(display_format="%d",
                          pre_process=lambda v: "(int) %s" % v)
        debugld = ML_Debug(display_format="%ld")
        #debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_lftolx = ML_Debug(
            display_format="%\"PRIx64\" ev=%x",
            pre_process=lambda v:
            "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v)
        debug_ddtolx = ML_Debug(
            display_format="%\"PRIx64\" %\"PRIx64\"",
            pre_process=lambda v:
            "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" %
            (v, v))
        debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}",
                            pre_process=lambda v: "%s.hi, %s.lo" % (v, v))

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        log2_hi_value = round(
            log(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), sollya.RN)
        log2_lo_value = round(
            log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = 7
        log_table = ML_Table(dimensions=[2**table_index_size, 2],
                             storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in xrange(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            inv_value = (1.0 + (inv_approx_table[i][0] / S2**9)) * S2**-1
            value_high = round(
                log(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        # case close to 0: ctz
        ctz_exp_limit = -7
        ctz_cond = vx_exp < ctz_exp_limit
        ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

        ctz_poly_degree = sup(
            guessdegree(
                log1p(sollya.x) / sollya.x, ctz_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        ctz_poly_object = Polynomial.build_from_approximation(
            log1p(sollya.x) / sollya.x, ctz_poly_degree,
            [self.precision] * (ctz_poly_degree + 1), ctz_interval,
            sollya.absolute)

        print "generating polynomial evaluation scheme"
        ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            ctz_poly_object, vx, unified_precision=self.precision)
        ctz_poly.set_attributes(tag="ctz_poly", debug=debug_lftolx)

        ctz_result = vx * ctz_poly

        neg_input = Comparison(vx,
                               -1,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")

        log_function_code = CodeFunction(
            "new_log", [Variable("x", precision=ML_Binary64)],
            output_format=ML_Binary64)
        log_call_generator = FunctionOperator(
            log_function_code.get_name(),
            arity=1,
            output_precision=ML_Binary64,
            declare_prototype=log_function_code)
        newlog_function = FunctionObject(log_function_code.get_name(),
                                         (ML_Binary64, ), ML_Binary64,
                                         log_call_generator)

        # case away from 0.0
        pre_vxp1 = vx + 1.0
        pre_vxp1.set_attributes(tag="pre_vxp1", debug=debug_lftolx)
        pre_vxp1_exp = ExponentExtraction(pre_vxp1,
                                          tag="pre_vxp1_exp",
                                          debug=debugd)
        cm500 = Constant(-500, precision=ML_Int32)
        c0 = Constant(0, precision=ML_Int32)
        cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size() -
                                          2)
        scaling_factor_exp = Select(cond_scaling, cm500, c0)
        scaling_factor = ExponentInsertion(scaling_factor_exp,
                                           precision=self.precision,
                                           tag="scaling_factor")

        vxp1 = pre_vxp1 * scaling_factor
        vxp1.set_attributes(tag="vxp1", debug=debug_lftolx)
        vxp1_exp = ExponentExtraction(vxp1, tag="vxp1_exp", debug=debugd)

        vxp1_inv = DivisionSeed(vxp1,
                                precision=self.precision,
                                tag="vxp1_inv",
                                debug=debug_lftolx,
                                silent=True)

        vxp1_dirty_inv = ExponentInsertion(-vxp1_exp,
                                           precision=self.precision,
                                           tag="vxp1_dirty_inv",
                                           debug=debug_lftolx)

        table_index = BitLogicAnd(BitLogicRightShift(
            TypeCast(vxp1, precision=int_precision, debug=debuglx),
            self.precision.get_field_size() - 7,
            debug=debuglx),
                                  0x7f,
                                  tag="table_index",
                                  debug=debuglx)

        # argument reduction
        # TODO: detect if single operand inverse seed is supported by the targeted architecture
        pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv,
                                                          precision=ML_UInt64),
                                                 Constant(-2,
                                                          precision=ML_UInt64),
                                                 precision=ML_UInt64),
                                     precision=self.precision,
                                     tag="pre_arg_red_index",
                                     debug=debug_lftolx)
        arg_red_index = Select(Equal(table_index, 0),
                               vxp1_dirty_inv,
                               pre_arg_red_index,
                               tag="arg_red_index",
                               debug=debug_lftolx)

        red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0,
                          (arg_red_index * vx - 1.0) + arg_red_index)
        #red_vxp1 = arg_red_index * vxp1 - 1.0
        red_vxp1.set_attributes(tag="red_vxp1", debug=debug_lftolx)

        log_inv_lo = TableLoad(log_table,
                               table_index,
                               1,
                               tag="log_inv_lo",
                               debug=debug_lftolx)
        log_inv_hi = TableLoad(log_table,
                               table_index,
                               0,
                               tag="log_inv_hi",
                               debug=debug_lftolx)

        inv_err = S2**-6  # TODO: link to target DivisionSeed precision

        print "building mathematical polynomial"
        approx_interval = Interval(-inv_err, inv_err)
        poly_degree = sup(
            guessdegree(
                log(1 + sollya.x) / sollya.x, approx_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        global_poly_object = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree,
            [self.precision] * (poly_degree + 1), approx_interval,
            sollya.absolute)
        poly_object = global_poly_object.sub_poly(start_index=1)

        print "generating polynomial evaluation scheme"
        _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vxp1, unified_precision=self.precision)
        _poly.set_attributes(tag="poly", debug=debug_lftolx)
        print global_poly_object.get_sollya_object()

        vxp1_inv_exp = ExponentExtraction(vxp1_inv,
                                          tag="vxp1_inv_exp",
                                          debug=debugd)
        corr_exp = -vxp1_exp + scaling_factor_exp  # vxp1_inv_exp

        #poly = (red_vxp1) * (1 +  _poly)
        #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

        pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly +
                                    (-corr_exp * log2_lo - log_inv_lo))
        pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
        exact_log2_hi_exp = -corr_exp * log2_hi
        exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp",
                                         debug=debug_lftolx,
                                         prevent_optimization=True)
        #std_result =  exact_log2_hi_exp + pre_result

        exact_log2_lo_exp = -corr_exp * log2_lo
        exact_log2_lo_exp.set_attributes(
            tag="exact_log2_lo_exp",
            debug=debug_lftolx)  #, prevent_optimization = True)

        init = exact_log2_lo_exp - log_inv_lo
        init.set_attributes(tag="init",
                            debug=debug_lftolx,
                            prevent_optimization=True)
        fma0 = (red_vxp1 * _poly + init)  # - log_inv_lo)
        fma0.set_attributes(tag="fma0", debug=debug_lftolx)
        step0 = fma0
        step0.set_attributes(
            tag="step0", debug=debug_lftolx)  #, prevent_optimization = True)
        step1 = step0 + red_vxp1
        step1.set_attributes(tag="step1",
                             debug=debug_lftolx,
                             prevent_optimization=True)
        step2 = -log_inv_hi + step1
        step2.set_attributes(tag="step2",
                             debug=debug_lftolx,
                             prevent_optimization=True)
        std_result = exact_log2_hi_exp + step2
        std_result.set_attributes(tag="std_result",
                                  debug=debug_lftolx,
                                  prevent_optimization=True)

        # main scheme
        print "MDL scheme"
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal, Return(vx),
                    ConditionBlock(ctz_cond, Statement(Return(ctz_result), ),
                                   Statement(Return(std_result))))))
        scheme = pre_scheme

        #print scheme.get_str(depth = None, display_precision = True)

        opt_eng = OptimizationEngine(self.processor)

        # fusing FMA
        print "MDL fusing FMA"
        scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        #print scheme.get_str(depth = None, display_precision = True)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=ML_Binary32)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        print "silencing operation"
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        func_implementation.set_scheme(scheme)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        opt_eng.factorize_fast_path(scheme)
        #print scheme.get_str(depth = None, display_precision = True)

        cg = CCodeGenerator(self.processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = func_implementation.get_definition(cg,
                                                         C_Code,
                                                         static_cst=True)
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        #print self.result.get(cg)
        output_stream = open("%s.c" % func_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
Example #3
0
from metalibm_core.core.ml_entity import ML_Entity, ML_EntityBasis, DefaultEntityArgTemplate
from metalibm_core.code_generation.generator_utility import FunctionOperator, FO_Result, FO_Arg

from metalibm_core.utility.ml_template import *
from metalibm_core.utility.log_report import Log
from metalibm_core.utility.debug_utils import *
from metalibm_core.utility.num_utils import ulp
from metalibm_core.utility.gappa_utils import is_gappa_installed

from metalibm_core.core.ml_hdl_format import *
from metalibm_core.core.ml_hdl_operations import *

from metalibm_hw_blocks.lzc import ML_LeadingZeroCounter

## Helper for debug enabling
debug_std = ML_Debug(display_format=" -radix 2 ")
debug_dec = ML_Debug(display_format=" -radix 10 ")
debug_dec_unsigned = ML_Debug(display_format=" -decimal -unsigned ")


## Wrapper for zero extension
# @param op the input operation tree
# @param s integer size of the extension
# @return the Zero extended operation node
def zext(op, s):
    s = int(s)
    op_size = op.get_precision().get_bit_size()
    ext_precision = ML_StdLogicVectorFormat(op_size + s)
    return ZeroExt(op, s, precision=ext_precision)

Example #4
0
    def generate_scheme(self):
        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = VirtualFormat(base_format=self.precision,
                                     support_format=ML_StdLogicVectorFormat(
                                         self.precision.get_bit_size()),
                                     get_cst=get_virtual_cst)
        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        # reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        vy = self.implementation.add_input_signal("y", io_precision)

        vx_precision = self.precision
        vy_precision = self.precision
        result_precision = self.precision

        # precision for first operand vx which is to be statically
        # positionned
        p = vx_precision.get_mantissa_size()
        # precision for second operand vy which is to be dynamically shifted
        q = vy_precision.get_mantissa_size()
        # precision of output
        o = result_precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_vx_precision = ML_StdLogicVectorFormat(
            vx_precision.get_exponent_size())
        exp_vy_precision = ML_StdLogicVectorFormat(
            vy_precision.get_exponent_size())

        mant_vx_precision = ML_StdLogicVectorFormat(p - 1)
        mant_vy_precision = ML_StdLogicVectorFormat(q - 1)

        mant_vx = MantissaExtraction(vx, precision=mant_vx_precision)
        mant_vy = MantissaExtraction(vy, precision=mant_vy_precision)

        exp_vx = RawExponentExtraction(vx, precision=exp_vx_precision)
        exp_vy = RawExponentExtraction(vy, precision=exp_vy_precision)

        # Maximum number of leading zero for normalized <vx>
        L_x = 0
        # Maximum number of leading zero for normalized <vy>
        L_y = 0

        sign_vx = CopySign(vx, precision=ML_StdLogic)
        sign_vy = CopySign(vy, precision=ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        effective_op = BitLogicXor(sign_vx,
                                   sign_vy,
                                   precision=ML_StdLogic,
                                   tag="effective_op",
                                   debug=ML_Debug(display_format="-radix 2"))

        exp_vx_bias = vx_precision.get_bias()
        exp_vy_bias = vy_precision.get_bias()

        exp_offset = max(o + L_y, q) + 2
        exp_bias = exp_offset + exp_vx_bias - exp_vy_bias
        # Determine a working precision to accomodate exponent difference
        # FIXME: check interval and exponent operations size
        exp_precision_ext_size = max(vx_precision.get_exponent_size(),
                                     vy_precision.get_exponent_size()) + 2
        exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size)
        # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + offset
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = Subtraction(
            Addition(zext(
                exp_vx,
                exp_precision_ext_size - vx_precision.get_exponent_size()),
                     Constant(exp_bias, precision=exp_precision_ext),
                     precision=exp_precision_ext),
            zext(exp_vy,
                 exp_precision_ext_size - vy_precision.get_exponent_size()),
            precision=exp_precision_ext,
            tag="exp_diff",
            debug=debug_std)
        signed_exp_diff = SignCast(exp_diff,
                                   specifier=SignCast.Signed,
                                   precision=exp_precision_ext)
        datapath_full_width = exp_offset + max(o + L_x, p) + 2 + q
        max_exp_diff = datapath_full_width - q
        exp_diff_lt_0 = Comparison(signed_exp_diff,
                                   Constant(0, precision=exp_precision_ext),
                                   specifier=Comparison.Less,
                                   precision=ML_Bool,
                                   tag="exp_diff_lt_0",
                                   debug=debug_std)
        exp_diff_gt_max_diff = Comparison(signed_exp_diff,
                                          Constant(
                                              max_exp_diff,
                                              precision=exp_precision_ext),
                                          specifier=Comparison.Greater,
                                          precision=ML_Bool)

        shift_amount_prec = ML_StdLogicVectorFormat(
            int(floor(log2(max_exp_diff)) + 1))

        mant_shift = Select(exp_diff_lt_0,
                            Constant(0, precision=shift_amount_prec),
                            Select(exp_diff_gt_max_diff,
                                   Constant(max_exp_diff,
                                            precision=shift_amount_prec),
                                   Truncate(exp_diff,
                                            precision=shift_amount_prec),
                                   precision=shift_amount_prec),
                            precision=shift_amount_prec,
                            tag="mant_shift",
                            debug=ML_Debug(display_format="-radix 10"))

        mant_ext_size = max_exp_diff
        shift_prec = ML_StdLogicVectorFormat(datapath_full_width)
        shifted_mant_vy = BitLogicRightShift(rzext(mant_vy, mant_ext_size),
                                             mant_shift,
                                             precision=shift_prec,
                                             tag="shifted_mant_vy",
                                             debug=debug_std)
        # vx is right-extended by q+2 bits
        # and left extend by exp_offset
        mant_vx_ext = zext(rzext(mant_vx, q + 2), exp_offset + 1)

        add_prec = ML_StdLogicVectorFormat(datapath_full_width + 1)

        mant_vx_add_op = Select(Comparison(effective_op,
                                           Constant(1, precision=ML_StdLogic),
                                           precision=ML_Bool,
                                           specifier=Comparison.Equal),
                                Negation(mant_vx_ext,
                                         precision=add_prec,
                                         tag="neg_mant_vx"),
                                mant_vx_ext,
                                precision=add_prec,
                                tag="mant_vx_add_op",
                                debug=ML_Debug(display_format=" "))

        mant_add = Addition(zext(shifted_mant_vy, 1),
                            mant_vx_add_op,
                            precision=add_prec,
                            tag="mant_add",
                            debug=ML_Debug(display_format=" -radix 2"))

        # if the addition overflows, then it meant vx has been negated and
        # the 2's complement addition cancelled the negative MSB, thus
        # the addition result is positive, and the result is of the sign of Y
        # else the result is of opposite sign to Y
        add_is_negative = BitLogicAnd(CopySign(mant_add,
                                               precision=ML_StdLogic),
                                      effective_op,
                                      precision=ML_StdLogic,
                                      tag="add_is_negative",
                                      debug=ML_Debug(" -radix 2"))
        # Negate mantissa addition result if it is negative
        mant_add_abs = Select(Comparison(add_is_negative,
                                         Constant(1, precision=ML_StdLogic),
                                         specifier=Comparison.Equal,
                                         precision=ML_Bool),
                              Negation(mant_add,
                                       precision=add_prec,
                                       tag="neg_mant_add",
                                       debug=debug_std),
                              mant_add,
                              precision=add_prec,
                              tag="mant_add_abs",
                              debug=debug_std)

        res_sign = BitLogicXor(add_is_negative,
                               sign_vy,
                               precision=ML_StdLogic,
                               tag="res_sign")

        # Precision for leading zero count
        lzc_width = int(floor(log2(datapath_full_width + 1)) + 1)
        lzc_prec = ML_StdLogicVectorFormat(lzc_width)

        lzc_args = ML_LeadingZeroCounter.get_default_args(
            width=(datapath_full_width + 1))
        LZC_entity = ML_LeadingZeroCounter(lzc_args)
        lzc_entity_list = LZC_entity.generate_scheme()
        lzc_implementation = LZC_entity.get_implementation()

        lzc_component = lzc_implementation.get_component_object()

        #lzc_in = SubSignalSelection(mant_add, p+1, 2*p+3)
        lzc_in = mant_add_abs  # SubSignalSelection(mant_add_abs, 0, 3*p+3, precision = ML_StdLogicVectorFormat(3*p+4))

        add_lzc = Signal("add_lzc",
                         precision=lzc_prec,
                         var_type=Signal.Local,
                         debug=debug_dec)
        add_lzc = PlaceHolder(
            add_lzc, lzc_component(io_map={
                "x": lzc_in,
                "vr_out": add_lzc
            }))

        # Index of output mantissa least significant bit
        mant_lsb_index = datapath_full_width - o + 1

        #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec)
        # CP stands for close path, the data path where X and Y are within 1 exp diff
        res_normed_mant = BitLogicLeftShift(mant_add_abs,
                                            add_lzc,
                                            precision=add_prec,
                                            tag="res_normed_mant",
                                            debug=debug_std)
        pre_mant_field = SubSignalSelection(
            res_normed_mant,
            mant_lsb_index,
            datapath_full_width - 1,
            precision=ML_StdLogicVectorFormat(o - 1))

        ## Helper function to extract a single bit
        #  from a vector of bits signal
        def BitExtraction(optree, index, **kw):
            return VectorElementSelection(optree,
                                          index,
                                          precision=ML_StdLogic,
                                          **kw)

        def IntCst(value):
            return Constant(value, precision=ML_Integer)

        round_bit = BitExtraction(res_normed_mant, IntCst(mant_lsb_index - 1))
        mant_lsb = BitExtraction(res_normed_mant, IntCst(mant_lsb_index))
        sticky_prec = ML_StdLogicVectorFormat(datapath_full_width - o)
        sticky_input = SubSignalSelection(res_normed_mant,
                                          0,
                                          datapath_full_width - o - 1,
                                          precision=sticky_prec)
        sticky_bit = Select(Comparison(sticky_input,
                                       Constant(0, precision=sticky_prec),
                                       specifier=Comparison.NotEqual,
                                       precision=ML_Bool),
                            Constant(1, precision=ML_StdLogic),
                            Constant(0, precision=ML_StdLogic),
                            precision=ML_StdLogic,
                            tag="sticky_bit",
                            debug=debug_std)

        # increment selection for rouding to nearest (tie to even)
        round_increment_RN = BitLogicAnd(round_bit,
                                         BitLogicOr(sticky_bit,
                                                    mant_lsb,
                                                    precision=ML_StdLogic),
                                         precision=ML_StdLogic,
                                         tag="round_increment_RN",
                                         debug=debug_std)

        rounded_mant = Addition(zext(pre_mant_field, 1),
                                round_increment_RN,
                                precision=ML_StdLogicVectorFormat(o),
                                tag="rounded_mant",
                                debug=debug_std)
        rounded_overflow = BitExtraction(rounded_mant,
                                         IntCst(o - 1),
                                         tag="rounded_overflow",
                                         debug=debug_std)
        res_mant_field = Select(Comparison(rounded_overflow,
                                           Constant(1, precision=ML_StdLogic),
                                           specifier=Comparison.Equal,
                                           precision=ML_Bool),
                                SubSignalSelection(rounded_mant, 1, o - 1),
                                SubSignalSelection(rounded_mant, 0, o - 2),
                                precision=ML_StdLogicVectorFormat(o - 1),
                                tag="final_mant",
                                debug=debug_std)

        res_exp_tmp_size = max(vx_precision.get_exponent_size(),
                               vy_precision.get_exponent_size()) + 2

        res_exp_tmp_prec = ML_StdLogicVectorFormat(res_exp_tmp_size)

        exp_vy_biased = Addition(zext(
            exp_vy, res_exp_tmp_size - vy_precision.get_exponent_size()),
                                 Constant(vy_precision.get_bias() + 1,
                                          precision=res_exp_tmp_prec),
                                 precision=res_exp_tmp_prec,
                                 tag="exp_vy_biased",
                                 debug=debug_dec)
        # vx's exponent is biased with the format bias
        # plus the exponent offset so it is left align to datapath MSB
        exp_vx_biased = Addition(
            zext(exp_vx, res_exp_tmp_size - vx_precision.get_exponent_size()),
            Constant(vx_precision.get_bias() + exp_offset + 1,
                     precision=res_exp_tmp_prec),
            precision=res_exp_tmp_prec,
            tag="exp_vx_biased",
            debug=debug_dec)

        # If exp diff is less than 0, then we must consider that vy's exponent is
        # the meaningful one and thus compute result exponent with respect
        # to vy's exponent value
        res_exp_base = Select(exp_diff_lt_0,
                              exp_vy_biased,
                              exp_vx_biased,
                              precision=res_exp_tmp_prec,
                              tag="res_exp_base",
                              debug=debug_dec)

        # Eventually we add the result exponent base
        # with the exponent offset and the leading zero count
        res_exp_ext = Addition(Subtraction(
            Addition(zext(res_exp_base, 0),
                     Constant(-result_precision.get_bias(),
                              precision=res_exp_tmp_prec),
                     precision=res_exp_tmp_prec),
            zext(add_lzc, res_exp_tmp_size - lzc_width),
            precision=res_exp_tmp_prec),
                               rounded_overflow,
                               precision=res_exp_tmp_prec,
                               tag="res_exp_ext",
                               debug=debug_std)

        res_exp_prec = ML_StdLogicVectorFormat(
            result_precision.get_exponent_size())

        res_exp = Truncate(res_exp_ext,
                           precision=res_exp_prec,
                           tag="res_exp",
                           debug=debug_dec_unsigned)

        vr_out = TypeCast(FloatBuild(
            res_sign,
            res_exp,
            res_mant_field,
            precision=self.precision,
        ),
                          precision=io_precision,
                          tag="result",
                          debug=debug_std)

        self.implementation.add_output_signal("vr_out", vr_out)

        return lzc_entity_list + [self.implementation]
Example #5
0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
###############################################################################

from metalibm_core.core.attributes import ML_Debug, ML_AdvancedDebug, ML_MultiDebug
from metalibm_core.core.ml_formats import *

# debug utilities
# display single precision and double precision numbers
debugf = ML_Debug(display_format="%f")

debuglf = ML_Debug(display_format="%lf")

# display hexadecimal format for integer
debugx = ML_Debug(display_format="%x")

# display 64-bit hexadecimal format for integer
debuglx = ML_Debug(display_format="%\"PRIx64\"", )

# display long/int integer
debugd = ML_Debug(display_format="%d", pre_process=lambda v: "(int) %s" % v)

# display long long/ long int integer
debugld = ML_Debug(display_format="%ld")
Example #6
0
    def generate_scheme(self):
        ## Generate Fused multiply and add comput <x> . <y> + <z>
        Log.report(
            Log.Info,
            "generating fixed MPFMA with {ed} extra digit(s) and sign-magnitude accumulator: {sm}"
            .format(ed=self.extra_digit, sm=self.sign_magnitude))

        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = HdlVirtualFormat(self.precision)
        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        # reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable

        # maximum weigth for a mantissa product digit
        max_prod_exp = self.precision.get_emax() * 2 + 1
        # minimum wieght for a mantissa product digit
        min_prod_exp = self.precision.get_emin_subnormal() * 2

        ## Most and least significant digit index for the
        #  accumulator
        acc_msb_index = max_prod_exp + self.extra_digit
        acc_lsb_index = min_prod_exp

        acc_width = acc_msb_index - min_prod_exp + 1
        # precision of the accumulator
        acc_prec = ML_StdLogicVectorFormat(acc_width)

        reset = self.implementation.add_input_signal("reset", ML_StdLogic)

        vx = self.implementation.add_input_signal("x", io_precision)
        vy = self.implementation.add_input_signal("y", io_precision)

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        acc = self.implementation.add_input_signal("acc", acc_prec)
        if self.sign_magnitude:
            # the accumulator is in sign-magnitude representation
            sign_acc = self.implementation.add_input_signal(
                "sign_acc", ML_StdLogic)
        else:
            sign_acc = CopySign(acc,
                                precision=ML_StdLogic,
                                tag="sign_acc",
                                debug=debug_std)

        vx_precision = self.precision
        vy_precision = self.precision
        result_precision = acc_prec

        # precision for first operand vx which is to be statically
        # positionned
        p = vx_precision.get_mantissa_size()
        # precision for second operand vy which is to be dynamically shifted
        q = vy_precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_vx_precision = ML_StdLogicVectorFormat(
            vx_precision.get_exponent_size())
        exp_vy_precision = ML_StdLogicVectorFormat(
            vy_precision.get_exponent_size())

        mant_vx_precision = ML_StdLogicVectorFormat(p - 1)
        mant_vy_precision = ML_StdLogicVectorFormat(q - 1)

        mant_vx = MantissaExtraction(vx, precision=mant_vx_precision)
        mant_vy = MantissaExtraction(vy, precision=mant_vy_precision)

        exp_vx = ExponentExtraction(vx,
                                    precision=exp_vx_precision,
                                    tag="exp_vx",
                                    debug=debug_dec)
        exp_vy = ExponentExtraction(vy,
                                    precision=exp_vy_precision,
                                    tag="exp_vy",
                                    debug=debug_dec)

        # Maximum number of leading zero for normalized <vx> mantissa
        L_x = 0
        # Maximum number of leading zero for normalized <vy> mantissa
        L_y = 0
        # Maximum number of leading zero for the product of <x>.<y>
        # mantissa.
        L_xy = L_x + L_y + 1

        sign_vx = CopySign(vx, precision=ML_StdLogic)
        sign_vy = CopySign(vy, precision=ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        sign_xy = BitLogicXor(sign_vx,
                              sign_vy,
                              precision=ML_StdLogic,
                              tag="sign_xy",
                              debug=ML_Debug(display_format="-radix 2"))
        effective_op = BitLogicXor(sign_xy,
                                   sign_acc,
                                   precision=ML_StdLogic,
                                   tag="effective_op",
                                   debug=ML_Debug(display_format="-radix 2"))

        exp_vx_bias = vx_precision.get_bias()
        exp_vy_bias = vy_precision.get_bias()

        # <acc> is statically positionned in the datapath,
        # it may even constitute the whole datapath
        #
        # the product is shifted with respect to the fix accumulator

        exp_bias = (exp_vx_bias + exp_vy_bias)

        # because of the mantissa range [1, 2[, the product exponent
        # is located one bit to the right (lower) of the product MSB
        prod_exp_offset = 1

        # Determine a working precision to accomodate exponent difference
        # FIXME: check interval and exponent operations size
        exp_precision_ext_size = max(
            vx_precision.get_exponent_size(),
            vy_precision.get_exponent_size(),
            abs(ceil(log2(abs(acc_msb_index)))),
            abs(ceil(log2(abs(acc_lsb_index)))),
            abs(ceil(log2(abs(exp_bias + prod_exp_offset)))),
        ) + 2
        Log.report(Log.Info,
                   "exp_precision_ext_size={}".format(exp_precision_ext_size))
        exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size)

        # static accumulator exponent
        exp_acc = Constant(acc_msb_index,
                           precision=exp_precision_ext,
                           tag="exp_acc",
                           debug=debug_cst_dec)

        # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + offset
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = Subtraction(
            exp_acc,
            Addition(Addition(zext(
                exp_vy,
                exp_precision_ext_size - vy_precision.get_exponent_size()),
                              zext(
                                  exp_vx, exp_precision_ext_size -
                                  vx_precision.get_exponent_size()),
                              precision=exp_precision_ext),
                     Constant(exp_bias + prod_exp_offset,
                              precision=exp_precision_ext,
                              tag="diff_bias",
                              debug=debug_cst_dec),
                     precision=exp_precision_ext,
                     tag="pre_exp_diff",
                     debug=debug_dec),
            precision=exp_precision_ext,
            tag="exp_diff",
            debug=debug_dec)
        signed_exp_diff = SignCast(exp_diff,
                                   specifier=SignCast.Signed,
                                   precision=exp_precision_ext)
        datapath_full_width = acc_width
        # the maximum exp diff is the size of the datapath
        # minus the bit size of the product
        max_exp_diff = datapath_full_width - (p + q)
        exp_diff_lt_0 = Comparison(signed_exp_diff,
                                   Constant(0, precision=exp_precision_ext),
                                   specifier=Comparison.Less,
                                   precision=ML_Bool,
                                   tag="exp_diff_lt_0",
                                   debug=debug_std)
        exp_diff_gt_max_diff = Comparison(signed_exp_diff,
                                          Constant(
                                              max_exp_diff,
                                              precision=exp_precision_ext),
                                          specifier=Comparison.Greater,
                                          precision=ML_Bool)

        shift_amount_prec = ML_StdLogicVectorFormat(
            int(floor(log2(max_exp_diff)) + 1))

        mant_shift = Select(exp_diff_lt_0,
                            Constant(0, precision=shift_amount_prec),
                            Select(exp_diff_gt_max_diff,
                                   Constant(max_exp_diff,
                                            precision=shift_amount_prec),
                                   Truncate(exp_diff,
                                            precision=shift_amount_prec),
                                   precision=shift_amount_prec),
                            precision=shift_amount_prec,
                            tag="mant_shift",
                            debug=ML_Debug(display_format="-radix 10"))

        prod_prec = ML_StdLogicVectorFormat(p + q)
        prod = Multiplication(mant_vx,
                              mant_vy,
                              precision=prod_prec,
                              tag="prod",
                              debug=debug_std)

        # attempt at pipelining the operator
        # self.implementation.start_new_stage()

        mant_ext_size = datapath_full_width - (p + q)
        shift_prec = ML_StdLogicVectorFormat(datapath_full_width)
        shifted_prod = BitLogicRightShift(rzext(prod, mant_ext_size),
                                          mant_shift,
                                          precision=shift_prec,
                                          tag="shifted_prod",
                                          debug=debug_std)

        ## Inserting a pipeline stage after the product shifting
        if self.pipelined: self.implementation.start_new_stage()

        if self.sign_magnitude:
            # the accumulator is in sign-magnitude representation

            acc_negated = Select(Comparison(sign_xy,
                                            sign_acc,
                                            specifier=Comparison.Equal,
                                            precision=ML_Bool),
                                 acc,
                                 BitLogicNegate(acc, precision=acc_prec),
                                 precision=acc_prec)

            # one extra MSB bit is added to the final addition
            # to detect overflows
            add_width = acc_width + 1
            add_prec = ML_StdLogicVectorFormat(add_width)

            # FIXME: implement with a proper compound adder
            mant_add_p0_ext = Addition(zext(shifted_prod, 1),
                                       zext(acc_negated, 1),
                                       precision=add_prec)
            mant_add_p1_ext = Addition(
                mant_add_p0_ext,
                Constant(1, precision=ML_StdLogic),
                precision=add_prec,
                tag="mant_add",
                debug=ML_Debug(display_format=" -radix 2"))
            # discarding carry overflow bit
            mant_add_p0 = SubSignalSelection(mant_add_p0_ext,
                                             0,
                                             acc_width - 1,
                                             precision=acc_prec)
            mant_add_p1 = SubSignalSelection(mant_add_p1_ext,
                                             0,
                                             acc_width - 1,
                                             precision=acc_prec)

            mant_add_pre_sign = CopySign(mant_add_p1_ext,
                                         precision=ML_StdLogic,
                                         tag="mant_add_pre_sign",
                                         debug=debug_std)
            mant_add = Select(Comparison(sign_xy,
                                         sign_acc,
                                         specifier=Comparison.Equal,
                                         precision=ML_Bool),
                              mant_add_p0,
                              Select(
                                  Comparison(mant_add_pre_sign,
                                             Constant(1,
                                                      precision=ML_StdLogic),
                                             specifier=Comparison.Equal,
                                             precision=ML_Bool),
                                  mant_add_p1,
                                  BitLogicNegate(mant_add_p0,
                                                 precision=acc_prec),
                                  precision=acc_prec,
                              ),
                              precision=acc_prec,
                              tag="mant_add")

            # if both operands had the same sign, then
            # mant_add is necessarily positive and the result
            # sign matches the input sign
            # if both operands had opposite signs, then
            # the result sign matches the product sign
            # if mant_add is positive, else the accumulator sign
            output_sign = Select(
                Comparison(effective_op,
                           Constant(1, precision=ML_StdLogic),
                           specifier=Comparison.Equal,
                           precision=ML_Bool),
                # if the effective op is a subtraction (prod - acc)
                BitLogicXor(sign_acc, mant_add_pre_sign,
                            precision=ML_StdLogic),
                # the effective op is an addition, thus result and
                # acc share sign
                sign_acc,
                precision=ML_StdLogic,
                tag="output_sign")

            if self.pipelined: self.implementation.start_new_stage()

            # adding output
            self.implementation.add_output_signal("vr_sign", output_sign)
            self.implementation.add_output_signal("vr_acc", mant_add)

        else:
            # 2s complement encoding of the accumulator,
            # the accumulator is never negated, only the producted
            # is negated if negative

            # negate shifted prod when required
            shifted_prod_op = Select(Comparison(sign_xy,
                                                Constant(
                                                    1, precision=ML_StdLogic),
                                                specifier=Comparison.Equal,
                                                precision=ML_Bool),
                                     Negation(shifted_prod,
                                              precision=shift_prec),
                                     shifted_prod,
                                     precision=shift_prec)

            add_prec = shift_prec  # ML_StdLogicVectorFormat(datapath_full_width + 1)

            mant_add = Addition(shifted_prod_op,
                                acc,
                                precision=acc_prec,
                                tag="mant_add",
                                debug=ML_Debug(display_format=" -radix 2"))

            if self.pipelined: self.implementation.start_new_stage()

            self.implementation.add_output_signal("vr_acc", mant_add)

        return [self.implementation]
Example #7
0
    def generate_scheme(self):
        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = VirtualFormat(base_format=self.precision,
                                     support_format=ML_StdLogicVectorFormat(
                                         self.precision.get_bit_size()),
                                     get_cst=get_virtual_cst)
        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        vy = self.implementation.add_input_signal("y", io_precision)

        p = self.precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_precision = ML_StdLogicVectorFormat(
            self.precision.get_exponent_size())

        mant_precision = ML_StdLogicVectorFormat(
            self.precision.get_field_size())

        mant_vx = MantissaExtraction(vx, precision=mant_precision)
        mant_vy = MantissaExtraction(vy, precision=mant_precision)

        exp_vx = ExponentExtraction(vx, precision=exp_precision)
        exp_vy = ExponentExtraction(vy, precision=exp_precision)

        sign_vx = CopySign(vx, precision=ML_StdLogic)
        sign_vy = CopySign(vy, precision=ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        effective_op = BitLogicXor(sign_vx,
                                   sign_vy,
                                   precision=ML_StdLogic,
                                   tag="effective_op",
                                   debug=ML_Debug(display_format="-radix 2"))

        ## Wrapper for zero extension
        # @param op the input operation tree
        # @param s integer size of the extension
        # @return the Zero extended operation node
        def zext(op, s):
            op_size = op.get_precision().get_bit_size()
            ext_precision = ML_StdLogicVectorFormat(op_size + s)
            return ZeroExt(op, s, precision=ext_precision)

        ## Generate the right zero extended output from @p optree
        def rzext(optree, ext_size):
            op_size = optree.get_precision().get_bit_size()
            ext_format = ML_StdLogicVectorFormat(ext_size)
            out_format = ML_StdLogicVectorFormat(op_size + ext_size)
            return Concatenation(optree,
                                 Constant(0, precision=ext_format),
                                 precision=out_format)

        exp_bias = p + 2
        exp_precision_ext = ML_StdLogicVectorFormat(
            self.precision.get_exponent_size() + 2)
        # Y is first aligned p+2 bit to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + precision + 2
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = Subtraction(Addition(zext(exp_vx, 2),
                                        Constant(exp_bias,
                                                 precision=exp_precision_ext),
                                        precision=exp_precision_ext),
                               zext(exp_vy, 2),
                               precision=exp_precision_ext,
                               tag="exp_diff")
        exp_diff_lt_0 = Comparison(exp_diff,
                                   Constant(0, precision=exp_precision_ext),
                                   specifier=Comparison.Less,
                                   precision=ML_Bool)
        exp_diff_gt_2pp4 = Comparison(exp_diff,
                                      Constant(2 * p + 4,
                                               precision=exp_precision_ext),
                                      specifier=Comparison.Greater,
                                      precision=ML_Bool)

        shift_amount_prec = ML_StdLogicVectorFormat(
            int(floor(log2(2 * p + 4)) + 1))

        mant_shift = Select(exp_diff_lt_0,
                            Constant(0, precision=shift_amount_prec),
                            Select(exp_diff_gt_2pp4,
                                   Constant(2 * p + 4,
                                            precision=shift_amount_prec),
                                   Truncate(exp_diff,
                                            precision=shift_amount_prec),
                                   precision=shift_amount_prec),
                            precision=shift_amount_prec,
                            tag="mant_shift",
                            debug=ML_Debug(display_format="-radix 10"))

        mant_ext_size = 2 * p + 4
        shift_prec = ML_StdLogicVectorFormat(3 * p + 4)
        shifted_mant_vy = BitLogicRightShift(rzext(mant_vy, mant_ext_size),
                                             mant_shift,
                                             precision=shift_prec,
                                             tag="shifted_mant_vy",
                                             debug=debug_std)
        mant_vx_ext = zext(rzext(mant_vx, p + 2), p + 2 + 1)

        add_prec = ML_StdLogicVectorFormat(3 * p + 5)

        mant_vx_add_op = Select(Comparison(effective_op,
                                           Constant(1, precision=ML_StdLogic),
                                           precision=ML_Bool,
                                           specifier=Comparison.Equal),
                                Negation(mant_vx_ext,
                                         precision=add_prec,
                                         tag="neg_mant_vx"),
                                mant_vx_ext,
                                precision=add_prec,
                                tag="mant_vx_add_op",
                                debug=ML_Debug(display_format=" "))

        mant_add = Addition(zext(shifted_mant_vy, 1),
                            mant_vx_add_op,
                            precision=add_prec,
                            tag="mant_add",
                            debug=ML_Debug(display_format=" -radix 2"))

        # if the addition overflows, then it meant vx has been negated and
        # the 2's complement addition cancelled the negative MSB, thus
        # the addition result is positive, and the result is of the sign of Y
        # else the result is of opposite sign to Y
        add_is_negative = BitLogicAnd(CopySign(mant_add,
                                               precision=ML_StdLogic),
                                      effective_op,
                                      precision=ML_StdLogic,
                                      tag="add_is_negative",
                                      debug=ML_Debug(" -radix 2"))
        # Negate mantissa addition result if it is negative
        mant_add_abs = Select(Comparison(add_is_negative,
                                         Constant(1, precision=ML_StdLogic),
                                         specifier=Comparison.Equal,
                                         precision=ML_Bool),
                              Negation(mant_add,
                                       precision=add_prec,
                                       tag="neg_mant_add"),
                              mant_add,
                              precision=add_prec,
                              tag="mant_add_abs")

        res_sign = BitLogicXor(add_is_negative,
                               sign_vy,
                               precision=ML_StdLogic,
                               tag="res_sign")

        # Precision for leading zero count
        lzc_width = int(floor(log2(3 * p + 5)) + 1)
        lzc_prec = ML_StdLogicVectorFormat(lzc_width)

        lzc_args = ML_LeadingZeroCounter.get_default_args(width=(3 * p + 5))
        LZC_entity = ML_LeadingZeroCounter(lzc_args)
        lzc_entity_list = LZC_entity.generate_scheme()
        lzc_implementation = LZC_entity.get_implementation()

        lzc_component = lzc_implementation.get_component_object()

        #lzc_in = SubSignalSelection(mant_add, p+1, 2*p+3)
        lzc_in = mant_add_abs  # SubSignalSelection(mant_add_abs, 0, 3*p+3, precision = ML_StdLogicVectorFormat(3*p+4))

        add_lzc = Signal("add_lzc",
                         precision=lzc_prec,
                         var_type=Signal.Local,
                         debug=debug_dec)
        add_lzc = PlaceHolder(
            add_lzc, lzc_component(io_map={
                "x": lzc_in,
                "vr_out": add_lzc
            }))

        #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec)
        # CP stands for close path, the data path where X and Y are within 1 exp diff
        res_normed_mant = BitLogicLeftShift(mant_add,
                                            add_lzc,
                                            precision=add_prec,
                                            tag="res_normed_mant",
                                            debug=debug_std)
        pre_mant_field = SubSignalSelection(
            res_normed_mant,
            2 * p + 5,
            3 * p + 3,
            precision=ML_StdLogicVectorFormat(p - 1))

        ## Helper function to extract a single bit
        #  from a vector of bits signal
        def BitExtraction(optree, index, **kw):
            return VectorElementSelection(optree,
                                          index,
                                          precision=ML_StdLogic,
                                          **kw)

        def IntCst(value):
            return Constant(value, precision=ML_Integer)

        round_bit = BitExtraction(res_normed_mant, IntCst(2 * p + 4))
        mant_lsb = BitExtraction(res_normed_mant, IntCst(2 * p + 5))
        sticky_prec = ML_StdLogicVectorFormat(2 * p + 4)
        sticky_input = SubSignalSelection(res_normed_mant,
                                          0,
                                          2 * p + 3,
                                          precision=sticky_prec)
        sticky_bit = Select(Comparison(sticky_input,
                                       Constant(0, precision=sticky_prec),
                                       specifier=Comparison.NotEqual,
                                       precision=ML_Bool),
                            Constant(1, precision=ML_StdLogic),
                            Constant(0, precision=ML_StdLogic),
                            precision=ML_StdLogic,
                            tag="sticky_bit",
                            debug=debug_std)

        # increment selection for rouding to nearest (tie to even)
        round_increment_RN = BitLogicAnd(round_bit,
                                         BitLogicOr(sticky_bit,
                                                    mant_lsb,
                                                    precision=ML_StdLogic),
                                         precision=ML_StdLogic,
                                         tag="round_increment_RN",
                                         debug=debug_std)

        rounded_mant = Addition(zext(pre_mant_field, 1),
                                round_increment_RN,
                                precision=ML_StdLogicVectorFormat(p),
                                tag="rounded_mant",
                                debug=debug_std)
        rounded_overflow = BitExtraction(rounded_mant,
                                         IntCst(p - 1),
                                         tag="rounded_overflow",
                                         debug=debug_std)
        res_mant_field = Select(Comparison(rounded_overflow,
                                           Constant(1, precision=ML_StdLogic),
                                           specifier=Comparison.Equal,
                                           precision=ML_Bool),
                                SubSignalSelection(rounded_mant, 1, p - 1),
                                SubSignalSelection(rounded_mant, 0, p - 2),
                                precision=ML_StdLogicVectorFormat(p - 1),
                                tag="final_mant",
                                debug=debug_std)

        res_exp_prec_size = self.precision.get_exponent_size() + 2
        res_exp_prec = ML_StdLogicVectorFormat(res_exp_prec_size)

        res_exp_ext = Addition(Subtraction(
            Addition(zext(exp_vx, 2),
                     Constant(3 + p, precision=res_exp_prec),
                     precision=res_exp_prec),
            zext(add_lzc, res_exp_prec_size - lzc_width),
            precision=res_exp_prec),
                               rounded_overflow,
                               precision=res_exp_prec,
                               tag="res_exp_ext",
                               debug=debug_std)

        res_exp = Truncate(res_exp_ext,
                           precision=ML_StdLogicVectorFormat(
                               self.precision.get_exponent_size()),
                           tag="res_exp",
                           debug=debug_dec)

        vr_out = TypeCast(FloatBuild(
            res_sign,
            res_exp,
            res_mant_field,
            precision=self.precision,
        ),
                          precision=io_precision,
                          tag="result",
                          debug=debug_std)

        self.implementation.add_output_signal("vr_out", vr_out)

        return lzc_entity_list + [self.implementation]
Example #8
0
    def generate_scheme(self):
        ## Generate Fused multiply and add comput <x> . <y> + <z>
        Log.report(
            Log.Info,
            "generating MPFMA with acc precision {acc_precision} and precision {precision}"
            .format(acc_precision=self.acc_precision,
                    precision=self.precision))

        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        prod_input_precision = VirtualFormat(
            base_format=self.precision,
            support_format=ML_StdLogicVectorFormat(
                self.precision.get_bit_size()),
            get_cst=get_virtual_cst)

        accumulator_precision = VirtualFormat(
            base_format=self.acc_precision,
            support_format=ML_StdLogicVectorFormat(
                self.acc_precision.get_bit_size()),
            get_cst=get_virtual_cst)

        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        # reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable
        vx = self.implementation.add_input_signal("x", prod_input_precision)
        vy = self.implementation.add_input_signal("y", prod_input_precision)
        vz = self.implementation.add_input_signal("z", accumulator_precision)

        # extra reset input port
        reset = self.implementation.add_input_signal("reset", ML_StdLogic)

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        vx_precision = self.precision
        vy_precision = self.precision
        vz_precision = self.acc_precision
        result_precision = self.acc_precision

        # precision for first operand vx which is to be statically
        # positionned
        p = vx_precision.get_mantissa_size()
        # precision for second operand vy which is to be dynamically shifted
        q = vy_precision.get_mantissa_size()
        # precision for
        r = vz_precision.get_mantissa_size()
        # precision of output
        o = result_precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_vx_precision = ML_StdLogicVectorFormat(
            vx_precision.get_exponent_size())
        exp_vy_precision = ML_StdLogicVectorFormat(
            vy_precision.get_exponent_size())
        exp_vz_precision = ML_StdLogicVectorFormat(
            vz_precision.get_exponent_size())

        # MantissaExtraction performs the implicit
        # digit computation and concatenation
        mant_vx_precision = ML_StdLogicVectorFormat(p)
        mant_vy_precision = ML_StdLogicVectorFormat(q)
        mant_vz_precision = ML_StdLogicVectorFormat(r)

        mant_vx = MantissaExtraction(vx, precision=mant_vx_precision)
        mant_vy = MantissaExtraction(vy, precision=mant_vy_precision)
        mant_vz = MantissaExtraction(vz, precision=mant_vz_precision)

        exp_vx = ExponentExtraction(vx, precision=exp_vx_precision)
        exp_vy = ExponentExtraction(vy, precision=exp_vy_precision)
        exp_vz = ExponentExtraction(vz, precision=exp_vz_precision)

        # Maximum number of leading zero for normalized <vx> mantissa
        L_x = 0
        # Maximum number of leading zero for normalized <vy> mantissa
        L_y = 0
        # Maximum number of leading zero for normalized <vz> mantissa
        L_z = 0
        # Maximum number of leading zero for the product of <x>.<y>
        # mantissa.
        L_xy = L_x + L_y + 1

        sign_vx = CopySign(vx, precision=ML_StdLogic)
        sign_vy = CopySign(vy, precision=ML_StdLogic)
        sign_vz = CopySign(vz, precision=ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        sign_xy = BitLogicXor(sign_vx,
                              sign_vy,
                              precision=ML_StdLogic,
                              tag="sign_xy",
                              debug=ML_Debug(display_format="-radix 2"))
        effective_op = BitLogicXor(sign_xy,
                                   sign_vz,
                                   precision=ML_StdLogic,
                                   tag="effective_op",
                                   debug=ML_Debug(display_format="-radix 2"))

        exp_vx_bias = vx_precision.get_bias()
        exp_vy_bias = vy_precision.get_bias()
        exp_vz_bias = vz_precision.get_bias()

        # x.y is statically positionned in the datapath
        # while z is shifted
        # This is justified by the fact that z alignment may be performed
        # in parallel with the multiplication of x and y mantissas
        # The product is positionned <exp_offset>-bit to the right of datapath MSB
        # (without including an extra carry bit)
        exp_offset = max(o + L_z, r) + 2
        exp_bias = exp_offset + (exp_vx_bias + exp_vy_bias) - exp_vz_bias

        # because of the mantissa range [1, 2[, the product exponent
        # is located one bit to the right (lower) of the product MSB
        prod_exp_offset = 1

        # Determine a working precision to accomodate exponent difference
        # FIXME: check interval and exponent operations size
        exp_precision_ext_size = max(vx_precision.get_exponent_size(),
                                     vy_precision.get_exponent_size(),
                                     vz_precision.get_exponent_size()) + 2
        exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size)
        # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + offset
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = Subtraction(Addition(Addition(
            zext(exp_vy,
                 exp_precision_ext_size - vy_precision.get_exponent_size()),
            zext(exp_vx,
                 exp_precision_ext_size - vx_precision.get_exponent_size()),
            precision=exp_precision_ext),
                                        Constant(exp_bias + prod_exp_offset,
                                                 precision=exp_precision_ext),
                                        precision=exp_precision_ext),
                               zext(
                                   exp_vz, exp_precision_ext_size -
                                   vz_precision.get_exponent_size()),
                               precision=exp_precision_ext,
                               tag="exp_diff",
                               debug=debug_std)
        signed_exp_diff = SignCast(exp_diff,
                                   specifier=SignCast.Signed,
                                   precision=exp_precision_ext)
        datapath_full_width = exp_offset + max(o + L_xy, p + q) + 2 + r
        max_exp_diff = datapath_full_width - r
        exp_diff_lt_0 = Comparison(signed_exp_diff,
                                   Constant(0, precision=exp_precision_ext),
                                   specifier=Comparison.Less,
                                   precision=ML_Bool,
                                   tag="exp_diff_lt_0",
                                   debug=debug_std)
        exp_diff_gt_max_diff = Comparison(signed_exp_diff,
                                          Constant(
                                              max_exp_diff,
                                              precision=exp_precision_ext),
                                          specifier=Comparison.Greater,
                                          precision=ML_Bool)

        shift_amount_prec = ML_StdLogicVectorFormat(
            int(floor(log2(max_exp_diff)) + 1))

        mant_shift = Select(exp_diff_lt_0,
                            Constant(0, precision=shift_amount_prec),
                            Select(exp_diff_gt_max_diff,
                                   Constant(max_exp_diff,
                                            precision=shift_amount_prec),
                                   Truncate(exp_diff,
                                            precision=shift_amount_prec),
                                   precision=shift_amount_prec),
                            precision=shift_amount_prec,
                            tag="mant_shift",
                            debug=ML_Debug(display_format="-radix 10"))

        prod_prec = ML_StdLogicVectorFormat(p + q)
        prod = Multiplication(mant_vx,
                              mant_vy,
                              precision=prod_prec,
                              tag="prod",
                              debug=debug_std)

        mant_ext_size = max_exp_diff
        print("mant_ext_size: %d" % max_exp_diff)
        print("datapath_full_width: %d" % datapath_full_width)
        shift_prec = ML_StdLogicVectorFormat(datapath_full_width)
        mant_vz_ext = rzext(mant_vz, mant_ext_size)
        shifted_mant_vz = BitLogicRightShift(mant_vz_ext,
                                             mant_shift,
                                             precision=shift_prec,
                                             tag="shifted_mant_vz",
                                             debug=debug_std)

        # Inserting  pipeline stage
        # after production computation
        # and addend alignment shift
        if self.pipelined: self.implementation.start_new_stage()

        # vx is right-extended by q+2 bits
        # and left extend by exp_offset
        prod_ext = zext(rzext(prod, r + 2), exp_offset + 1)

        add_prec = ML_StdLogicVectorFormat(datapath_full_width + 1)

        ## Here we make the supposition that
        #  the product is slower to compute than
        #  aligning <vz> and negating it if necessary
        #  which means that mant_add as the same sign as the product
        #prod_add_op = Select(
        #  Comparison(
        #    effective_op,
        #    Constant(1, precision = ML_StdLogic),
        #    precision = ML_Bool,
        #    specifier = Comparison.Equal
        #  ),
        #  Negation(prod_ext, precision = add_prec, tag = "neg_prod"),
        #  prod_ext,
        #  precision = add_prec,
        #  tag = "prod_add_op",
        #  debug = ML_Debug(display_format = " ")
        #)
        addend_op = Select(Comparison(effective_op,
                                      Constant(1, precision=ML_StdLogic),
                                      precision=ML_Bool,
                                      specifier=Comparison.Equal),
                           BitLogicNegate(zext(shifted_mant_vz, 1),
                                          precision=add_prec,
                                          tag="neg_addend_Op"),
                           zext(shifted_mant_vz, 1),
                           precision=add_prec,
                           tag="addend_op",
                           debug=debug_std)

        prod_add_op = prod_ext

        # Compound Addition
        mant_add_p1 = Addition(Addition(addend_op,
                                        prod_add_op,
                                        precision=add_prec),
                               Constant(1, precision=ML_StdLogic),
                               precision=add_prec,
                               tag="mant_add_p1",
                               debug=ML_Debug(display_format=" -radix 2"))
        mant_add_p0 = Addition(addend_op,
                               prod_add_op,
                               precision=add_prec,
                               tag="mant_add_p0",
                               debug=ML_Debug(display_format=" -radix 2"))

        # if the addition overflows, then it meant vx has been negated and
        # the 2's complement addition cancelled the negative MSB, thus
        # the addition result is positive, and the result is of the sign of Y
        # else the result is of opposite sign to Y
        add_is_negative = BitLogicAnd(CopySign(mant_add_p1,
                                               precision=ML_StdLogic),
                                      effective_op,
                                      precision=ML_StdLogic,
                                      tag="add_is_negative",
                                      debug=ML_Debug(" -radix 2"))
        # Negate mantissa addition result if it is negative
        mant_add_abs = Select(Comparison(add_is_negative,
                                         Constant(1, precision=ML_StdLogic),
                                         specifier=Comparison.Equal,
                                         precision=ML_Bool),
                              BitLogicNegate(mant_add_p0,
                                             precision=add_prec,
                                             tag="neg_mant_add_p0",
                                             debug=debug_std),
                              mant_add_p1,
                              precision=add_prec,
                              tag="mant_add_abs",
                              debug=debug_std)

        # determining result sign, mant_add
        # as the same sign as the product
        res_sign = BitLogicXor(add_is_negative,
                               sign_xy,
                               precision=ML_StdLogic,
                               tag="res_sign")

        print("pre lzc stage: %d " % self.implementation.get_current_stage())
        # adding pipeline stage after addition computation
        if self.pipelined: self.implementation.start_new_stage()

        print("lzc stage: %d " % self.implementation.get_current_stage())

        # Precision for leading zero count
        lzc_width = int(floor(log2(datapath_full_width + 1)) + 1)
        lzc_prec = ML_StdLogicVectorFormat(lzc_width)

        current_stage = self.implementation.get_current_stage()
        print("saving current_stage: %d" % current_stage)

        lzc_args = ML_LeadingZeroCounter.get_default_args(
            width=(datapath_full_width + 1))
        LZC_entity = ML_LeadingZeroCounter(lzc_args)
        lzc_entity_list = LZC_entity.generate_scheme()
        lzc_implementation = LZC_entity.get_implementation()

        lzc_component = lzc_implementation.get_component_object()

        #self.implementation.set_current_stage(current_stage)
        # Attributes dynamic field (init_stage and init_op)
        # constructors must be initialized back after
        # building a sub-operator inside this operator
        self.implementation.instanciate_dyn_attributes()

        # lzc_in = mant_add_abs

        add_lzc_sig = Signal("add_lzc",
                             precision=lzc_prec,
                             var_type=Signal.Local,
                             debug=debug_dec)
        add_lzc = PlaceHolder(add_lzc_sig,
                              lzc_component(io_map={
                                  "x": mant_add_abs,
                                  "vr_out": add_lzc_sig
                              },
                                            tag="lzc_i"),
                              tag="place_holder")

        # adding pipeline stage after leading zero count
        if self.pipelined: self.implementation.start_new_stage()

        # Index of output mantissa least significant bit
        mant_lsb_index = datapath_full_width - o + 1

        #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec)
        # CP stands for close path, the data path where X and Y are within 1 exp diff
        res_normed_mant = BitLogicLeftShift(mant_add_abs,
                                            add_lzc,
                                            precision=add_prec,
                                            tag="res_normed_mant",
                                            debug=debug_std)
        pre_mant_field = SubSignalSelection(
            res_normed_mant,
            mant_lsb_index,
            datapath_full_width - 1,
            precision=ML_StdLogicVectorFormat(o - 1))

        ## Helper function to extract a single bit
        #  from a vector of bits signal
        def BitExtraction(optree, index, **kw):
            return VectorElementSelection(optree,
                                          index,
                                          precision=ML_StdLogic,
                                          **kw)

        def IntCst(value):
            return Constant(value, precision=ML_Integer)

        # adding pipeline stage after normalization shift
        if self.pipelined: self.implementation.start_new_stage()

        round_bit = BitExtraction(res_normed_mant, IntCst(mant_lsb_index - 1))
        mant_lsb = BitExtraction(res_normed_mant, IntCst(mant_lsb_index))
        sticky_prec = ML_StdLogicVectorFormat(datapath_full_width - o)
        sticky_input = SubSignalSelection(res_normed_mant,
                                          0,
                                          datapath_full_width - o - 1,
                                          precision=sticky_prec)
        sticky_bit = Select(Comparison(sticky_input,
                                       Constant(0, precision=sticky_prec),
                                       specifier=Comparison.NotEqual,
                                       precision=ML_Bool),
                            Constant(1, precision=ML_StdLogic),
                            Constant(0, precision=ML_StdLogic),
                            precision=ML_StdLogic,
                            tag="sticky_bit",
                            debug=debug_std)

        # increment selection for rouding to nearest (tie to even)
        round_increment_RN = BitLogicAnd(round_bit,
                                         BitLogicOr(sticky_bit,
                                                    mant_lsb,
                                                    precision=ML_StdLogic),
                                         precision=ML_StdLogic,
                                         tag="round_increment_RN",
                                         debug=debug_std)

        rounded_mant = Addition(zext(pre_mant_field, 1),
                                round_increment_RN,
                                precision=ML_StdLogicVectorFormat(o),
                                tag="rounded_mant",
                                debug=debug_std)
        rounded_overflow = BitExtraction(rounded_mant,
                                         IntCst(o - 1),
                                         tag="rounded_overflow",
                                         debug=debug_std)
        res_mant_field = Select(Comparison(rounded_overflow,
                                           Constant(1, precision=ML_StdLogic),
                                           specifier=Comparison.Equal,
                                           precision=ML_Bool),
                                SubSignalSelection(rounded_mant, 1, o - 1),
                                SubSignalSelection(rounded_mant, 0, o - 2),
                                precision=ML_StdLogicVectorFormat(o - 1),
                                tag="final_mant",
                                debug=debug_std)

        res_exp_tmp_size = max(vx_precision.get_exponent_size(),
                               vy_precision.get_exponent_size(),
                               vz_precision.get_exponent_size()) + 2

        res_exp_tmp_prec = ML_StdLogicVectorFormat(res_exp_tmp_size)

        # Product biased exponent
        # is computed from both x and y exponent
        exp_xy_biased = Addition(Addition(
            Addition(zext(exp_vy,
                          res_exp_tmp_size - vy_precision.get_exponent_size()),
                     Constant(vy_precision.get_bias(),
                              precision=res_exp_tmp_prec),
                     precision=res_exp_tmp_prec,
                     tag="exp_vy_biased",
                     debug=debug_dec),
            Addition(zext(exp_vx,
                          res_exp_tmp_size - vx_precision.get_exponent_size()),
                     Constant(vx_precision.get_bias(),
                              precision=res_exp_tmp_prec),
                     precision=res_exp_tmp_prec,
                     tag="exp_vx_biased",
                     debug=debug_dec),
            precision=res_exp_tmp_prec),
                                 Constant(
                                     exp_offset + 1,
                                     precision=res_exp_tmp_prec,
                                 ),
                                 precision=res_exp_tmp_prec,
                                 tag="exp_xy_biased",
                                 debug=debug_dec)
        # vz's exponent is biased with the format bias
        # plus the exponent offset so it is left align to datapath MSB
        exp_vz_biased = Addition(
            zext(exp_vz, res_exp_tmp_size - vz_precision.get_exponent_size()),
            Constant(
                vz_precision.get_bias() + 1,  # + exp_offset + 1, 
                precision=res_exp_tmp_prec),
            precision=res_exp_tmp_prec,
            tag="exp_vz_biased",
            debug=debug_dec)

        # If exp diff is less than 0, then we must consider that vz's exponent is
        # the meaningful one and thus compute result exponent with respect
        # to vz's exponent value
        res_exp_base = Select(exp_diff_lt_0,
                              exp_vz_biased,
                              exp_xy_biased,
                              precision=res_exp_tmp_prec,
                              tag="res_exp_base",
                              debug=debug_dec)

        # Eventually we add the result exponent base
        # with the exponent offset and the leading zero count
        res_exp_ext = Addition(Subtraction(
            Addition(zext(res_exp_base, 0),
                     Constant(-result_precision.get_bias(),
                              precision=res_exp_tmp_prec),
                     precision=res_exp_tmp_prec),
            zext(add_lzc, res_exp_tmp_size - lzc_width),
            precision=res_exp_tmp_prec),
                               rounded_overflow,
                               precision=res_exp_tmp_prec,
                               tag="res_exp_ext",
                               debug=debug_std)

        res_exp_prec = ML_StdLogicVectorFormat(
            result_precision.get_exponent_size())

        res_exp = Truncate(res_exp_ext,
                           precision=res_exp_prec,
                           tag="res_exp",
                           debug=debug_dec_unsigned)

        vr_out = TypeCast(FloatBuild(
            res_sign,
            res_exp,
            res_mant_field,
            precision=accumulator_precision,
        ),
                          precision=accumulator_precision,
                          tag="result",
                          debug=debug_std)

        # adding pipeline stage after rouding
        if self.pipelined: self.implementation.start_new_stage()

        self.implementation.add_output_signal("vr_out", vr_out)

        return lzc_entity_list + [self.implementation]
Example #9
0
from metalibm_core.core.polynomials import *
from metalibm_core.core.ml_entity import ML_Entity, ML_EntityBasis, DefaultEntityArgTemplate
from metalibm_core.code_generation.generator_utility import FunctionOperator, FO_Result, FO_Arg

from metalibm_core.utility.ml_template import *
from metalibm_core.utility.log_report import Log
from metalibm_core.utility.debug_utils import *
from metalibm_core.utility.num_utils import ulp
from metalibm_core.utility.gappa_utils import is_gappa_installed

from metalibm_core.core.ml_hdl_format import *
from metalibm_core.core.ml_hdl_operations import *

from metalibm_hw_blocks.lzc import ML_LeadingZeroCounter

debug_std = ML_Debug(display_format=" -radix 2 ")
debug_dec = ML_Debug(display_format=" -radix 10 ")


class FP_Adder(ML_Entity("fp_adder")):
    def __init__(
        self,
        arg_template=DefaultEntityArgTemplate,
        precision=ML_Binary32,
        libm_compliant=True,
        debug_flag=False,
        target=VHDLBackend(),
        output_file="fp_adder.vhd",
        entity_name="fp_adder",
        language=VHDL_Code,
    ):
Example #10
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 num_iter=3,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="__divsf3.c",
                 function_name="__divsf3"):
        # declaring CodeFunction and retrieving input variable
        self.precision = precision
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name,
                                          output_format=precision)
        vx = exp_implementation.add_input_variable("x", precision)
        vy = exp_implementation.add_input_variable("y", precision)
        processor = target

        class NR_Iteration(object):
            def __init__(self, approx, divisor, force_fma=False):
                self.approx = approx
                self.divisor = divisor
                self.force_fma = force_fma
                if force_fma:
                    self.error = FusedMultiplyAdd(
                        divisor,
                        approx,
                        1.0,
                        specifier=FusedMultiplyAdd.SubtractNegate)
                    self.new_approx = FusedMultiplyAdd(
                        self.error,
                        self.approx,
                        self.approx,
                        specifier=FusedMultiplyAdd.Standard)
                else:
                    self.error = 1 - divisor * approx
                    self.new_approx = self.approx + self.error * self.approx

            def get_new_approx(self):
                return self.new_approx

            def get_hint_rules(self, gcg, gappa_code, exact):
                divisor = self.divisor.get_handle().get_node()
                approx = self.approx.get_handle().get_node()
                new_approx = self.new_approx.get_handle().get_node()

                Attributes.set_default_precision(ML_Exact)

                if self.force_fma:
                    rule0 = FusedMultiplyAdd(
                        divisor,
                        approx,
                        1.0,
                        specifier=FusedMultiplyAdd.SubtractNegate)
                else:
                    rule0 = 1.0 - divisor * approx
                rule1 = 1.0 - divisor * (approx - exact) - 1.0

                rule2 = new_approx - exact
                subrule = approx * (2 - divisor * approx)
                rule3 = (new_approx - subrule
                         ) - (approx - exact) * (approx - exact) * divisor

                if self.force_fma:
                    new_error = FusedMultiplyAdd(
                        divisor,
                        approx,
                        1.0,
                        specifier=FusedMultiplyAdd.SubtractNegate)
                    rule4 = FusedMultiplyAdd(new_error, approx, approx)
                else:
                    rule4 = approx + (1 - divisor * approx) * approx

                Attributes.unset_default_precision()

                # registering hints
                gcg.add_hint(gappa_code, rule0, rule1)
                gcg.add_hint(gappa_code, rule2, rule3)
                gcg.add_hint(gappa_code, subrule, rule4)

        debugf = ML_Debug(display_format="%f")
        debuglf = ML_Debug(display_format="%lf")
        debugx = ML_Debug(display_format="%x")
        debuglx = ML_Debug(display_format="%lx")
        debugd = ML_Debug(display_format="%d")
        #debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_lftolx = ML_Debug(
            display_format="%\"PRIx64\" ev=%x",
            pre_process=lambda v:
            "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v)
        debug_ddtolx = ML_Debug(
            display_format="%\"PRIx64\" %\"PRIx64\"",
            pre_process=lambda v:
            "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" %
            (v, v))
        debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}",
                            pre_process=lambda v: "%s.hi, %s.lo" % (v, v))

        ex = Max(Min(ExponentExtraction(vx), 1020),
                 -1020,
                 tag="ex",
                 debug=debugd)
        ey = Max(Min(ExponentExtraction(vy), 1020),
                 -1020,
                 tag="ey",
                 debug=debugd)

        exact_ex = ExponentExtraction(vx, tag="exact_ex")
        exact_ey = ExponentExtraction(vy, tag="exact_ey")

        Attributes.set_default_rounding_mode(ML_RoundToNearest)
        Attributes.set_default_silent(True)

        # computing the inverse square root
        init_approx = None

        scaling_factor_x = ExponentInsertion(-ex, tag="sfx_ei")
        scaling_factor_y = ExponentInsertion(-ey, tag="sfy_ei")

        scaled_vx = vx * scaling_factor_x
        scaled_vy = vy * scaling_factor_y

        scaled_vx.set_attributes(debug=debug_lftolx, tag="scaled_vx")
        scaled_vy.set_attributes(debug=debug_lftolx, tag="scaled_vy")

        scaled_vx.set_precision(ML_Binary64)
        scaled_vy.set_precision(ML_Binary64)

        # forcing vx precision to make processor support test
        init_approx_precision = DivisionSeed(scaled_vx,
                                             scaled_vy,
                                             precision=self.precision,
                                             tag="seed",
                                             debug=debug_lftolx)
        if not processor.is_supported_operation(init_approx_precision):
            if self.precision != ML_Binary32:
                px = Conversion(
                    scaled_vx, precision=ML_Binary32, tag="px",
                    debug=debugf) if self.precision != ML_Binary32 else vx
                py = Conversion(
                    scaled_vy, precision=ML_Binary32, tag="py",
                    debug=debugf) if self.precision != ML_Binary32 else vy

                init_approx_fp32 = Conversion(DivisionSeed(
                    px, py, precision=ML_Binary32, tag="seed", debug=debugf),
                                              precision=self.precision,
                                              tag="seed_ext",
                                              debug=debug_lftolx)
                if not processor.is_supported_operation(init_approx_fp32):
                    Log.report(
                        Log.Error,
                        "The target %s does not implement inverse square root seed"
                        % processor)
                else:
                    init_approx = init_approx_fp32
            else:
                Log.report(
                    Log.Error,
                    "The target %s does not implement inverse square root seed"
                    % processor)
        else:
            init_approx = init_approx_precision

        current_approx_std = init_approx
        # correctly-rounded inverse computation
        num_iteration = num_iter

        Attributes.unset_default_rounding_mode()
        Attributes.unset_default_silent()

        def compute_div(_init_approx, _vx=None, _vy=None, scale_result=None):
            inv_iteration_list = []
            Attributes.set_default_rounding_mode(ML_RoundToNearest)
            Attributes.set_default_silent(True)
            _current_approx = _init_approx
            for i in range(num_iteration):
                new_iteration = NR_Iteration(
                    _current_approx,
                    _vy,
                    force_fma=False if (i != num_iteration - 1) else True)
                inv_iteration_list.append(new_iteration)
                _current_approx = new_iteration.get_new_approx()
                _current_approx.set_attributes(tag="iter_%d" % i,
                                               debug=debug_lftolx)

            def dividend_mult(div_approx,
                              inv_approx,
                              dividend,
                              divisor,
                              index,
                              force_fma=False):
                #yerr = dividend - div_approx * divisor
                yerr = FMSN(div_approx, divisor, dividend)
                yerr.set_attributes(tag="yerr%d" % index, debug=debug_lftolx)
                #new_div = div_approx + yerr * inv_approx
                new_div = FMA(yerr, inv_approx, div_approx)
                new_div.set_attributes(tag="new_div%d" % index,
                                       debug=debug_lftolx)
                return new_div

            # multiplication correction iteration
            # to get correctly rounded full division
            _current_approx.set_attributes(tag="final_approx",
                                           debug=debug_lftolx)
            current_div_approx = _vx * _current_approx
            num_dividend_mult_iteration = 1
            for i in range(num_dividend_mult_iteration):
                current_div_approx = dividend_mult(current_div_approx,
                                                   _current_approx, _vx, _vy,
                                                   i)

            # last iteration
            yerr_last = FMSN(current_div_approx, _vy,
                             _vx)  #, clearprevious = True)
            Attributes.unset_default_rounding_mode()
            Attributes.unset_default_silent()
            last_div_approx = FMA(yerr_last,
                                  _current_approx,
                                  current_div_approx,
                                  rounding_mode=ML_GlobalRoundMode)

            yerr_last.set_attributes(tag="yerr_last", debug=debug_lftolx)

            pre_result = last_div_approx
            pre_result.set_attributes(tag="unscaled_div_result",
                                      debug=debug_lftolx)
            if scale_result != None:
                #result = pre_result * ExponentInsertion(ex) * ExponentInsertion(-ey)
                scale_factor_0 = Max(Min(scale_result, 950),
                                     -950,
                                     tag="scale_factor_0",
                                     debug=debugd)
                scale_factor_1 = Max(Min(scale_result - scale_factor_0, 950),
                                     -950,
                                     tag="scale_factor_1",
                                     debug=debugd)
                scale_factor_2 = scale_result - (scale_factor_1 +
                                                 scale_factor_0)
                scale_factor_2.set_attributes(debug=debugd,
                                              tag="scale_factor_2")

                result = ((pre_result * ExponentInsertion(scale_factor_0)) *
                          ExponentInsertion(scale_factor_1)
                          ) * ExponentInsertion(scale_factor_2)
            else:
                result = pre_result
            result.set_attributes(tag="result", debug=debug_lftolx)

            ext_pre_result = FMA(yerr_last,
                                 _current_approx,
                                 current_div_approx,
                                 precision=ML_DoubleDouble,
                                 tag="ext_pre_result",
                                 debug=debug_ddtolx)
            subnormal_pre_result = SpecificOperation(
                ext_pre_result,
                ex - ey,
                precision=self.precision,
                specifier=SpecificOperation.Subnormalize,
                tag="subnormal_pre_result",
                debug=debug_lftolx)
            sub_scale_factor = ex - ey
            sub_scale_factor_0 = Max(Min(sub_scale_factor, 950),
                                     -950,
                                     tag="sub_scale_factor_0",
                                     debug=debugd)
            sub_scale_factor_1 = Max(Min(sub_scale_factor - sub_scale_factor_0,
                                         950),
                                     -950,
                                     tag="sub_scale_factor_1",
                                     debug=debugd)
            sub_scale_factor_2 = sub_scale_factor - (sub_scale_factor_1 +
                                                     sub_scale_factor_0)
            sub_scale_factor_2.set_attributes(debug=debugd,
                                              tag="sub_scale_factor_2")
            #subnormal_result = (subnormal_pre_result * ExponentInsertion(ex, tag ="sr_ex_ei")) * ExponentInsertion(-ey, tag = "sr_ey_ei")
            subnormal_result = (
                subnormal_pre_result *
                ExponentInsertion(sub_scale_factor_0)) * ExponentInsertion(
                    sub_scale_factor_1,
                    tag="sr_ey_ei") * ExponentInsertion(sub_scale_factor_2)
            subnormal_result.set_attributes(debug=debug_lftolx,
                                            tag="subnormal_result")
            return result, subnormal_result, _current_approx, inv_iteration_list

        def bit_match(fp_optree, bit_id, likely=False, **kwords):
            return NotEqual(BitLogicAnd(
                TypeCast(fp_optree, precision=ML_Int64), 1 << bit_id),
                            0,
                            likely=likely,
                            **kwords)

        def extract_and_inject_sign(sign_source,
                                    sign_dest,
                                    int_precision=ML_Int64,
                                    fp_precision=self.precision,
                                    **kwords):
            int_sign_dest = sign_dest if isinstance(
                sign_dest.get_precision(), ML_Fixed_Format) else TypeCast(
                    sign_dest, precision=int_precision)
            return TypeCast(BitLogicOr(
                BitLogicAnd(TypeCast(sign_source, precision=int_precision),
                            1 << (self.precision.bit_size - 1)),
                int_sign_dest),
                            precision=fp_precision)

        x_zero = Test(vx, specifier=Test.IsZero, likely=False)
        y_zero = Test(vy, specifier=Test.IsZero, likely=False)

        comp_sign = Test(vx,
                         vy,
                         specifier=Test.CompSign,
                         tag="comp_sign",
                         debug=debuglx)

        y_nan = Test(vy, specifier=Test.IsNaN, likely=False)

        x_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False)
        y_snan = Test(vy, specifier=Test.IsSignalingNaN, likely=False)

        x_inf = Test(vx, specifier=Test.IsInfty, likely=False, tag="x_inf")
        y_inf = Test(vy,
                     specifier=Test.IsInfty,
                     likely=False,
                     tag="y_inf",
                     debug=debugd)

        scheme = None
        gappa_vx, gappa_vy = None, None
        gappa_init_approx = None
        gappa_current_approx = None

        if isinstance(processor, K1B_Processor):
            print "K1B specific generation"

            gappa_vx = vx
            gappa_vy = vy

            fast_init_approx = DivisionSeed(vx,
                                            vy,
                                            precision=self.precision,
                                            tag="fast_init_approx",
                                            debug=debug_lftolx)
            slow_init_approx = DivisionSeed(scaled_vx,
                                            scaled_vy,
                                            precision=self.precision,
                                            tag="slow_init_approx",
                                            debug=debug_lftolx)

            gappa_init_approx = fast_init_approx

            specific_case = bit_match(fast_init_approx,
                                      0,
                                      tag="b0_specific_case_bit",
                                      debug=debugd)
            y_subnormal_or_zero = bit_match(fast_init_approx,
                                            1,
                                            tag="b1_y_sub_or_zero",
                                            debug=debugd)
            x_subnormal_or_zero = bit_match(fast_init_approx,
                                            2,
                                            tag="b2_x_sub_or_zero",
                                            debug=debugd)
            y_inf_or_nan = bit_match(fast_init_approx,
                                     3,
                                     tag="b3_y_inf_or_nan",
                                     debug=debugd)
            inv_underflow = bit_match(fast_init_approx,
                                      4,
                                      tag="b4_inv_underflow",
                                      debug=debugd)
            x_inf_or_nan = bit_match(fast_init_approx,
                                     5,
                                     tag="b5_x_inf_or_nan",
                                     debug=debugd)
            mult_error_underflow = bit_match(fast_init_approx,
                                             6,
                                             tag="b6_mult_error_underflow",
                                             debug=debugd)
            mult_dividend_underflow = bit_match(
                fast_init_approx,
                7,
                tag="b7_mult_dividend_underflow",
                debug=debugd)
            mult_dividend_overflow = bit_match(fast_init_approx,
                                               8,
                                               tag="b8_mult_dividend_overflow",
                                               debug=debugd)
            direct_result_flag = bit_match(fast_init_approx,
                                           9,
                                           tag="b9_direct_result_flag",
                                           debug=debugd)
            div_overflow = bit_match(fast_init_approx,
                                     10,
                                     tag="b10_div_overflow",
                                     debug=debugd)

            # bit11/eb large = bit_match(fast_init_approx, 11)
            # bit12 = bit_match(fast_init_approx, 11)

            #slow_result, slow_result_subnormal, _, _ = compute_div(slow_init_approx, scaled_vx, scaled_vy, scale_result = (ExponentInsertion(ex, tag = "eiy_sr"), ExponentInsertion(-ey, tag ="eiy_sr")))
            slow_result, slow_result_subnormal, _, _ = compute_div(
                slow_init_approx, scaled_vx, scaled_vy, scale_result=ex - ey)
            fast_result, fast_result_subnormal, fast_current_approx, inv_iteration_list = compute_div(
                fast_init_approx, vx, vy, scale_result=None)
            gappa_current_approx = fast_current_approx

            pre_scheme = ConditionBlock(
                NotEqual(specific_case,
                         0,
                         tag="specific_case",
                         likely=True,
                         debug=debugd),
                Return(fast_result),
                ConditionBlock(
                    Equal(direct_result_flag, 0, tag="direct_result_case"),
                    Return(fast_init_approx),
                    ConditionBlock(
                        x_subnormal_or_zero | y_subnormal_or_zero
                        | inv_underflow | mult_error_underflow
                        | mult_dividend_overflow | mult_dividend_underflow,
                        ConditionBlock(
                            x_zero | y_zero,
                            Return(fast_init_approx),
                            ConditionBlock(
                                Test(slow_result, specifier=Test.IsSubnormal),
                                Return(slow_result_subnormal),
                                Return(slow_result)),
                        ),
                        ConditionBlock(
                            x_inf_or_nan,
                            Return(fast_init_approx),
                            ConditionBlock(
                                y_inf_or_nan,
                                Return(fast_init_approx),
                                ConditionBlock(
                                    NotEqual(div_overflow,
                                             0,
                                             tag="div_overflow_case"),
                                    Return(
                                        RoundedSignedOverflow(
                                            fast_init_approx,
                                            tag="signed_inf")),
                                    #Return(extract_and_inject_sign(fast_init_approx, FP_PlusInfty(self.precision) , tag = "signed_inf")),
                                    Return(FP_SNaN(self.precision))))))))

            scheme = Statement(fast_result, pre_scheme)

        else:
            print "generic generation"

            x_inf_or_nan = Test(vx, specifier=Test.IsInfOrNaN, likely=False)
            y_inf_or_nan = Test(vy,
                                specifier=Test.IsInfOrNaN,
                                likely=False,
                                tag="y_inf_or_nan",
                                debug=debugd)

            result, subnormal_result, gappa_current_approx, inv_iteration_list = compute_div(
                current_approx_std,
                scaled_vx,
                scaled_vy,
                scale_result=(ExponentInsertion(ex), ExponentInsertion(-ey)))
            gappa_vx = scaled_vx
            gappa_vy = scaled_vy
            gappa_init_approx = init_approx

            # x inf and y inf
            pre_scheme = ConditionBlock(
                x_inf_or_nan,
                ConditionBlock(
                    x_inf,
                    ConditionBlock(
                        y_inf_or_nan,
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                            Return(FP_QNaN(self.precision)),
                        ),
                        ConditionBlock(comp_sign,
                                       Return(FP_MinusInfty(self.precision)),
                                       Return(FP_PlusInfty(self.precision)))),
                    Statement(ConditionBlock(x_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    x_zero,
                    ConditionBlock(
                        y_zero | y_nan,
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                            Return(FP_QNaN(self.precision))), Return(vx)),
                    ConditionBlock(
                        y_inf_or_nan,
                        ConditionBlock(
                            y_inf,
                            Return(
                                Select(comp_sign, FP_MinusZero(self.precision),
                                       FP_PlusZero(self.precision))),
                            Statement(
                                ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                                Return(FP_QNaN(self.precision)))),
                        ConditionBlock(
                            y_zero,
                            Statement(
                                Raise(ML_FPE_DivideByZero),
                                ConditionBlock(
                                    comp_sign,
                                    Return(FP_MinusInfty(self.precision)),
                                    Return(FP_PlusInfty(self.precision)))),
                            ConditionBlock(
                                Test(result,
                                     specifier=Test.IsSubnormal,
                                     likely=False),
                                Statement(
                                    ConditionBlock(
                                        Comparison(
                                            yerr_last,
                                            0,
                                            specifier=Comparison.NotEqual,
                                            likely=True),
                                        Statement(
                                            Raise(ML_FPE_Inexact,
                                                  ML_FPE_Underflow))),
                                    Return(subnormal_result),
                                ),
                                Statement(
                                    ConditionBlock(
                                        Comparison(
                                            yerr_last,
                                            0,
                                            specifier=Comparison.NotEqual,
                                            likely=True),
                                        Raise(ML_FPE_Inexact)),
                                    Return(result)))))))
            rnd_mode = GetRndMode()
            scheme = Statement(rnd_mode, SetRndMode(ML_RoundToNearest),
                               yerr_last, SetRndMode(rnd_mode), pre_result,
                               ClearException(), result, pre_scheme)

        opt_eng = OptimizationEngine(processor)

        # fusing FMA
        if fuse_fma:
            print "MDL fusing FMA"
            scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=self.precision)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        #print "silencing operation"
        #opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        #print scheme.get_str(depth = None, display_precision = True)

        # check processor support
        print "checking processor support"
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        #opt_eng.factorize_fast_path(scheme)

        print "Gappa script generation"

        cg = CCodeGenerator(processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = exp_implementation.get_definition(cg,
                                                        C_Code,
                                                        static_cst=True)
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        self.result.add_header("support_lib/ml_special_values.h")

        output_stream = open(output_file, "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
        seed_var = Variable("seed",
                            precision=self.precision,
                            interval=Interval(0.5, 1))
        cg_eval_error_copy_map = {
            gappa_init_approx.get_handle().get_node():
            seed_var,
            gappa_vx.get_handle().get_node():
            Variable("x", precision=self.precision, interval=Interval(1, 2)),
            gappa_vy.get_handle().get_node():
            Variable("y", precision=self.precision, interval=Interval(1, 2)),
        }
        G1 = Constant(1, precision=ML_Exact)
        exact = G1 / gappa_vy
        exact.set_precision(ML_Exact)
        exact.set_tag("div_exact")
        gappa_goal = gappa_current_approx.get_handle().get_node() - exact
        gappa_goal.set_precision(ML_Exact)
        gappacg = GappaCodeGenerator(target,
                                     declare_cst=False,
                                     disable_debug=True)
        gappa_code = gappacg.get_interval_code(gappa_goal,
                                               cg_eval_error_copy_map)

        new_exact_node = exact.get_handle().get_node()

        for nr in inv_iteration_list:
            nr.get_hint_rules(gappacg, gappa_code, new_exact_node)

        seed_wrt_exact = seed_var - new_exact_node
        seed_wrt_exact.set_precision(ML_Exact)
        gappacg.add_hypothesis(gappa_code, seed_wrt_exact,
                               Interval(-S2**-7, S2**-7))

        try:
            eval_error = execute_gappa_script_extract(
                gappa_code.get(gappacg))["goal"]
            print "eval_error: ", eval_error
        except:
            print "error during gappa run"
Example #11
0
    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "expf.c", 
                 function_name = "expf"):

        # declaring target and instantiating optimization engine
        processor = target
        self.precision = precision
        opt_eng = OptimizationEngine(processor)
        gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True)

        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = exp_implementation.add_input_variable("x", self.precision) 


        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)


        test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
        test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
        test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

        test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
        return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax      = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax 
        exp_overflow_bound  = ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater)
        early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2 ** precision_emin
        exp_underflow_bound = floor(log(precision_min_value))


        early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less)
        early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision)))


        sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}


        # constant computation
        invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))


        log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision
        invlog2_cst = Constant(invlog2, precision = self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN) 
        log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f"))
        k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f"))
        ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact= True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact = True)
        r =  exact_hi_part - k * log2_lo
        r.set_tag("r")
        r.set_attributes(debug = ML_Debug(display_format = "%f"))

        opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma)

        tag_map = {}
        opt_eng.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx: Variable("x", precision = self.precision, interval = interval_vx),
            tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision)
        }
        #try:
        if 1:
            #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            Log.report(Log.Info, "eval error: %s" % eval_error)
        #except:
        #    Log.report(Log.Info, "gappa error evaluation failed")
        print r.get_str(depth = None, display_precision = True, display_attribute = True)
        print opt_r.get_str(depth = None, display_precision = True, display_attribute = True)

        approx_interval = Interval(-log(2)/2, log(2)/2)

        local_ulp = sup(ulp(exp(approx_interval), self.precision))
        print "ulp: ", local_ulp 
        error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1
        init_poly_degree = poly_degree

        return


        while 1: 
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m")
            poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision)
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            opt_poly = opt_eng.optimization_process(poly, self.precision)

            #print "poly: ", poly.get_str(depth = None, display_precision = True)
            #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval)
            poly_error_copy_map = {
                r.get_handle().get_node(): r_gappa_var
            }
            gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
            poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g")
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)

            global_poly_error = poly_eval_error + poly_approx_error
            global_rel_poly_error = global_poly_error / exp(approx_interval)
            print "global_poly_error: ", global_poly_error, global_rel_poly_error 
            flag = local_ulp > sup(abs(global_rel_poly_error))
            print "test: ", flag
            if flag: break
            else:
                if poly_degree > init_poly_degree + 5:
                    Log.report(Log.Error, "poly degree search did not converge")
                poly_degree += 1



        late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
        diff_k = ik - overflow_exp_offset 
        diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k")
        late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset)
        late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf)
        late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result))

        late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset)
        late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False)
        test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal)
        late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result))

        std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx)
        std_result.set_attributes(tag = "std_result", debug = debug_lftolx)
        result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result)))
        std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return)

        #print scheme.get_str(depth = None, display_precision = True)

        # fusing FMA
        if fuse_fma: 
            Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m")
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m")
        opt_eng.instantiate_abstract_precision(scheme, None)

        Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m")
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m")
        opt_eng.subexpression_sharing(scheme)

        Log.report(Log.Info, "\033[33;1m silencing operation \033[0m")
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        # check processor support
        Log.report(Log.Info, "\033[33;1m checking processor support \033[0m")
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        if fast_path_extract:
            Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m")
            opt_eng.factorize_fast_path(scheme)
        
        Log.report(Log.Info, "\033[33;1m generating source code \033[0m")
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        #self.result.add_header("support_lib/ml_types.h")
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree)
        self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object())
        if debug_flag:
            self.result.add_header("stdio.h")
            self.result.add_header("inttypes.h")
        output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
Example #12
0
    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 num_iter = 3,
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "__divsf3.c", 
                 function_name = "__divsf3"):
        # declaring CodeFunction and retrieving input variable
        self.precision = precision
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = precision)
        vx = exp_implementation.add_input_variable("x", precision) 
        vy = exp_implementation.add_input_variable("y", precision) 

        class NR_Iteration(object):
            def __init__(self, approx, divisor, force_fma = False):
                self.approx = approx
                self.divisor = divisor
                self.force_fma = force_fma
                if force_fma:
                    self.error = FusedMultiplyAdd(divisor, approx, 1.0, specifier = FusedMultiplyAdd.SubtractNegate)
                    self.new_approx = FusedMultiplyAdd(self.error, self.approx, self.approx, specifier = FusedMultiplyAdd.Standard)
                else:
                    self.error = 1 - divisor * approx
                    self.new_approx = self.approx + self.error * self.approx

            def get_new_approx(self):
                return self.new_approx

            def get_hint_rules(self, gcg, gappa_code, exact):
                divisor = self.divisor.get_handle().get_node()
                approx = self.approx.get_handle().get_node()
                new_approx = self.new_approx.get_handle().get_node()

                Attributes.set_default_precision(ML_Exact)


                if self.force_fma:
                    rule0 = FusedMultiplyAdd(divisor, approx, 1.0, specifier = FusedMultiplyAdd.SubtractNegate)
                else:
                    rule0 = 1.0 - divisor * approx
                rule1 = 1.0 - divisor * (approx - exact) - 1.0
                
                rule2 = new_approx - exact
                subrule = approx * (2 - divisor * approx)
                rule3 = (new_approx - subrule) - (approx - exact) * (approx - exact) * divisor

                if self.force_fma:
                    new_error = FusedMultiplyAdd(divisor, approx, 1.0, specifier = FusedMultiplyAdd.SubtractNegate)
                    rule4 = FusedMultiplyAdd(new_error, approx, approx)
                else:
                    rule4 = approx + (1 - divisor * approx) * approx

                Attributes.unset_default_precision()

                # registering hints
                gcg.add_hint(gappa_code, rule0, rule1)
                gcg.add_hint(gappa_code, rule2, rule3)
                gcg.add_hint(gappa_code, subrule, rule4)

        debugf        = ML_Debug(display_format = "%f")
        debuglf       = ML_Debug(display_format = "%lf")
        debugx        = ML_Debug(display_format = "%x")
        debuglx       = ML_Debug(display_format = "%lx")
        debugd        = ML_Debug(display_format = "%d")
        debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_ddtolx  = ML_Debug(display_format = "%\"PRIx64\" %\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" % (v, v))
        debug_dd      = ML_Debug(display_format = "{.hi=%lf, .lo=%lf}", pre_process = lambda v: "%s.hi, %s.lo" % (v, v))

        ex = Min(ExponentExtraction(vx, tag = "ex", debug = debugd), 1020)
        ey = Min(ExponentExtraction(vy, tag = "ey", debug = debugd), 1020)

        scaling_factor_x = ExponentInsertion(-ex) #ConditionalAllocation(Abs(ex) > 100, -ex, 0)
        scaling_factor_y = ExponentInsertion(-ey) #ConditionalAllocation(Abs(ey) > 100, -ey, 0)

        scaled_vx = vx * scaling_factor_x
        scaled_vy = vy * scaling_factor_y

        scaled_vx.set_attributes(debug = debug_lftolx, tag = "scaled_vx")
        scaled_vy.set_attributes(debug = debug_lftolx, tag = "scaled_vy")

        px = Conversion(scaled_vx, precision = ML_Binary32, tag = "px", debug=debugf) if self.precision != ML_Binary32 else vx
        py = Conversion(scaled_vy, precision = ML_Binary32, tag = "py", debug=debugf) if self.precision != ML_Binary32 else vy

        pre_init_approx = DivisionSeed(px, py, precision = ML_Binary32, tag = "seed", debug = debugf)  
        init_approx = Conversion(pre_init_approx, precision = self.precision, tag = "seedd", debug = debug_lftolx) if self.precision != ML_Binary32 else pre_init_approx

        current_approx = init_approx 
        # correctly-rounded inverse computation
        num_iteration = num_iter
        inv_iteration_list = []

        Attributes.set_default_rounding_mode(ML_RoundToNearest)
        Attributes.set_default_silent(True)

        for i in range(num_iteration):
            new_iteration = NR_Iteration(current_approx, scaled_vy, force_fma = False if (i != num_iteration - 1) else True)
            inv_iteration_list.append(new_iteration)
            current_approx = new_iteration.get_new_approx()
            current_approx.set_attributes(tag = "iter_%d" % i, debug = debug_lftolx)


        def dividend_mult(div_approx, inv_approx, dividend, divisor, index, force_fma = False):
            yerr = dividend - div_approx * divisor
            #yerr = FMSN(div_approx, divisor, dividend)
            yerr.set_attributes(tag = "yerr%d" % index, debug = debug_lftolx)
            new_div = div_approx + yerr * inv_approx
            #new_div = FMA(yerr, inv_approx, div_approx)
            new_div.set_attributes(tag = "new_div%d" % index, debug = debug_lftolx)
            return new_div

        # multiplication correction iteration
        # to get correctly rounded full division
        current_approx.set_attributes(tag = "final_approx", debug = debug_lftolx)
        current_div_approx = scaled_vx * current_approx
        num_dividend_mult_iteration = 1
        for i in range(num_dividend_mult_iteration):
            current_div_approx = dividend_mult(current_div_approx, current_approx, scaled_vx, scaled_vy, i)


        # last iteration
        yerr_last = FMSN(current_div_approx, scaled_vy, scaled_vx) #, clearprevious = True)
        Attributes.unset_default_rounding_mode()
        Attributes.unset_default_silent()
        last_div_approx = FMA(yerr_last, current_approx, current_div_approx)

        yerr_last.set_attributes(tag = "yerr_last", debug = debug_lftolx)

        pre_result = last_div_approx
        pre_result.set_attributes(tag = "unscaled_div_result", debug = debug_lftolx)
        result = pre_result * ExponentInsertion(ex) * ExponentInsertion(-ey)
        result.set_attributes(tag = "result", debug = debug_lftolx)


        x_inf_or_nan = Test(vx, specifier = Test.IsInfOrNaN, likely = False)
        y_inf_or_nan = Test(vy, specifier = Test.IsInfOrNaN, likely = False, tag = "y_inf_or_nan", debug = debugd)
        comp_sign = Test(vx, vy, specifier = Test.CompSign, tag = "comp_sign", debug = debuglx )
        x_zero = Test(vx, specifier = Test.IsZero, likely = False)
        y_zero = Test(vy, specifier = Test.IsZero, likely = False)

        y_nan = Test(vy, specifier = Test.IsNaN, likely = False)

        x_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False)
        y_snan = Test(vy, specifier = Test.IsSignalingNaN, likely = False)

        x_inf = Test(vx, specifier = Test.IsInfty, likely = False, tag = "x_inf")
        y_inf = Test(vy, specifier = Test.IsInfty, likely = False, tag = "y_inf", debug = debugd)

        # determining an extended precision 
        ext_precision_map = {
            ML_Binary32: ML_Binary64,
            ML_Binary64: ML_DoubleDouble,
        }
        ext_precision = ext_precision_map[self.precision]

        ext_pre_result = FMA(yerr_last, current_approx, current_div_approx, precision = ext_precision, tag = "ext_pre_result", debug = debug_ddtolx)
        subnormal_result = None
        if isinstance(ext_precision, ML_Compound_FP_Format):
            subnormal_pre_result = SpecificOperation(ext_pre_result, ex - ey, precision = self.precision, specifier = SpecificOperation.Subnormalize, tag = "subnormal_pre_result", debug = debug_lftolx)
            subnormal_result = (subnormal_pre_result * ExponentInsertion(ex)) * ExponentInsertion(-ey)
        else:
            subnormal_result = Conversion(ext_pre_result * ExponentInsertion(ex - ey, tag = "final_scaling_factor", precision = ext_precision), precision = self.precision)


        # x inf and y inf 
        pre_scheme = ConditionBlock(x_inf_or_nan, 
            ConditionBlock(x_inf,
                ConditionBlock(y_inf_or_nan, 
                    Statement(
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                        Return(FP_QNaN(self.precision)),
                    ),
                    ConditionBlock(comp_sign, Return(FP_MinusInfty(self.precision)), Return(FP_PlusInfty(self.precision)))
                ),
                Statement(
                    ConditionBlock(x_snan, Raise(ML_FPE_Invalid)),
                    Return(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(x_zero,
                ConditionBlock(y_zero | y_nan,
                    Statement(
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                        Return(FP_QNaN(self.precision))
                    ),
                    Return(vx)
                ),
                ConditionBlock(y_inf_or_nan,
                    ConditionBlock(y_inf,
                        Return(Select(comp_sign, FP_MinusZero(self.precision), FP_PlusZero(self.precision))),
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                            Return(FP_QNaN(self.precision))
                        )
                    ),
                    ConditionBlock(y_zero,
                        Statement(
                            Raise(ML_FPE_DivideByZero),
                            ConditionBlock(comp_sign, 
                                Return(FP_MinusInfty(self.precision)),
                                Return(FP_PlusInfty(self.precision))
                            )
                        ),
                        ConditionBlock(Test(result, specifier = Test.IsSubnormal, likely = False),
                            Statement(
                                ConditionBlock(Comparison(yerr_last, 0, specifier = Comparison.NotEqual, likely = True),
                                    Statement(Raise(ML_FPE_Inexact, ML_FPE_Underflow))
                                ),
                                Return(subnormal_result),
                            ),
                            Statement(
                                ConditionBlock(Comparison(yerr_last, 0, specifier = Comparison.NotEqual, likely = True),
                                    Raise(ML_FPE_Inexact)
                                ),
                                Return(result)
                            )
                        )
                    )
                )
            )
        )

        rnd_mode = GetRndMode()
        scheme = Statement(rnd_mode, SetRndMode(ML_RoundToNearest), yerr_last, SetRndMode(rnd_mode), pre_result, ClearException(), result, pre_scheme)


        processor = target

        opt_eng = OptimizationEngine(processor)

        # fusing FMA
        if fuse_fma:
            print "MDL fusing FMA"
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)


        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        #print "silencing operation"
        #opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        #print scheme.get_str(depth = None, display_precision = True)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        #opt_eng.factorize_fast_path(scheme)
        
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        self.result.add_header("support_lib/ml_special_values.h")

        output_stream = open(output_file, "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
        seed_var = Variable("seed", precision = self.precision, interval = Interval(0.5, 1))
        cg_eval_error_copy_map = {
            init_approx.get_handle().get_node(): seed_var,
            scaled_vx.get_handle().get_node(): Variable("x", precision = self.precision, interval = Interval(1, 2)),
            scaled_vy.get_handle().get_node(): Variable("y", precision = self.precision, interval = Interval(1, 2)),
        }
        G1 = Constant(1, precision = ML_Exact)
        exact = G1 / scaled_vy
        exact.set_precision(ML_Exact)
        exact.set_tag("div_exact")
        gappa_goal = current_approx.get_handle().get_node() - exact
        gappa_goal.set_precision(ML_Exact)
        gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
        gappa_code = gappacg.get_interval_code(gappa_goal, cg_eval_error_copy_map)

        new_exact_node = exact.get_handle().get_node()

        for nr in inv_iteration_list:
            nr.get_hint_rules(gappacg, gappa_code, new_exact_node)

        seed_wrt_exact = seed_var - new_exact_node
        seed_wrt_exact.set_precision(ML_Exact)
        gappacg.add_hypothesis(gappa_code, seed_wrt_exact, Interval(-S2**-7, S2**-7))

        eval_error = execute_gappa_script_extract(gappa_code.get(gappacg))["goal"]
        print "eval_error: ", eval_error
Example #13
0
# -*- coding: utf-8 -*-

from metalibm_core.core.attributes import ML_Debug, ML_AdvancedDebug, ML_MultiDebug
from metalibm_core.core.ml_formats import *

# debug utilities
# display single precision and double precision numbers
debugf        = ML_Debug(display_format = "%f")

debuglf       = ML_Debug(display_format = "%lf")

# display hexadecimal format for integer
debugx        = ML_Debug(display_format = "%x")

# display 64-bit hexadecimal format for integer
debuglx       = ML_Debug(display_format = "%\"PRIx64\"", )

# display long/int integer
debugd        = ML_Debug(display_format = "%d", pre_process = lambda v: "(int) %s" % v)

# display long long/ long int integer
debugld        = ML_Debug(display_format = "%ld")

debuglld        = ML_Debug(display_format = "%lld")


def fixed_point_pre_process(value, optree):
  scaling_factor = S2**-optree.get_precision().get_frac_size()
  return "(%e * (double)%s), %s" % (scaling_factor, value, value)

debug_fixed32 = ML_AdvancedDebug(display_format = "%e(%d)", pre_process = fixed_point_pre_process)