Python floor 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sollya

메소드/함수: floor

hotexamples.com에서의 예제들: 30

Python floor - 30개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sollya.floor에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: ml2_wide_sin.py 프로젝트: IanBriggs/OpTuner

        def split_domain(starting_domain, slivers):
            in_domains = [starting_domain]

            # abs
            out_domains = list()
            for I in in_domains:
                if sollya.inf(I) < 0 and sollya.sup(I) > 0:
                    out_domains.append(sollya.Interval(sollya.inf(I), 0))
                    out_domains.append(sollya.Interval(0, sollya.sup(I)))
                else:
                    out_domains.append(I)
            in_domains = out_domains

            # k
            out_domains = list()
            while len(in_domains) > 0:
                I = in_domains.pop()
                #print("in: [{}, {}]".format(float(sollya.inf(I)), float(sollya.sup(I))))
                unround_mult = I * n_invpi
                mult_low = sollya.floor(sollya.inf(unround_mult))
                mult_high = sollya.floor(sollya.sup(unround_mult))
                if mult_low == mult_high or (mult_low == -1
                                             and mult_high == 0):
                    #print("  accepted")
                    out_domains.append(I)
                    continue
                if sollya.sup(I) <= 0:
                    divider_low = (mult_low + 1) * n_pi
                    divider_high = divider_low - divider_low * 2**-53
                else:
                    divider_high = (mult_low + 1) * n_pi
                    divider_low = divider_high - divider_high * 2**-53

                lower_part = sollya.Interval(sollya.inf(I), divider_low)
                upper_part = sollya.Interval(divider_high, sollya.sup(I))
                #print("  -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part))))
                #print("  -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part))))
                in_domains.append(lower_part)
                in_domains.append(upper_part)
            in_domains = out_domains

            # subdivide each section into 2**subd sections
            for _ in range(slivers):
                out_domains = list()
                for I in in_domains:
                    mid = sollya.mid(I)
                    out_domains.append(sollya.Interval(sollya.inf(I), mid))
                    out_domains.append(sollya.Interval(mid, sollya.sup(I)))
                in_domains = out_domains

            in_domains = set(in_domains)
            in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x)))
            in_domains = [
                d for d in in_domains if sollya.inf(d) != sollya.sup(d)
            ]
            return in_domains

예제 #2

파일 보기

        def split_domain(starting_domain, slivers):
            in_domains = [starting_domain]

            out_domains = list()
            while len(in_domains) > 0:
                I = in_domains.pop()
                unround_e = sollya.log2(I)
                e_low = sollya.floor(sollya.inf(unround_e))
                e_high = sollya.floor(sollya.sup(unround_e))
                #print("in: [{}, {}] ({}, {})".format(float(sollya.inf(I)), float(sollya.sup(I)), int(e_low), int(e_high)))
                if e_low == e_high:
                    #print("  accepted")
                    out_domains.append(I)
                    continue
                e_range = sollya.Interval(e_low, e_low+1)
                I_range = 2**e_range
                for _ in range(100):
                    mid = sollya.mid(I_range)
                    e = sollya.floor(sollya.log2(mid))
                    if e == e_low:
                        I_range = sollya.Interval(mid, sollya.sup(I_range))
                    else:
                        I_range = sollya.Interval(sollya.inf(I_range), mid)

                    divider_high = sollya.sup(I_range)
                    divider_low = sollya.inf(I_range)

                lower_part = sollya.Interval(sollya.inf(I), divider_low)
                upper_part = sollya.Interval(divider_high, sollya.sup(I))
                #print("  -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part))))
                #print("  -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part))))
                in_domains.append(upper_part)
                in_domains.append(lower_part)
            in_domains = out_domains

            # subdivide each section into 2**subd sections
            for _ in range(slivers):
                out_domains = list()
                for I in in_domains:
                    mid = sollya.mid(I)
                    out_domains.append(sollya.Interval(sollya.inf(I), mid))
                    out_domains.append(sollya.Interval(mid, sollya.sup(I)))
                in_domains = out_domains

            in_domains = set(in_domains)
            in_domains = sorted(in_domains, key=lambda x:float(sollya.inf(x)))
            in_domains = [d for d in in_domains if sollya.inf(d) != sollya.sup(d)]
            return in_domains

예제 #3

파일 보기

def solve_format_CLZ(optree):
    """ Legalize CountLeadingZeros precision
    
        Args:
            optree (CountLeadingZeros): input node
            
        Returns:
            ML_Format: legal format for CLZ
    """
    assert isinstance(optree, CountLeadingZeros)
    op_input = optree.get_input(0)
    input_precision = op_input.get_precision()

    if is_fixed_point(input_precision):
        if input_precision.get_signed():
            Log.report(Log.Warning , "signed format in solve_format_CLZ")
        # +1 for carry overflow
        int_size = int(sollya.floor(sollya.log2(input_precision.get_bit_size()))) + 1 
        frac_size = 0
        return fixed_point(
            int_size,
            frac_size,
            signed=False
        )
    else:
        Log.report(Log.Warning , "unsupported format in solve_format_CLZ")
        return optree.get_precision()

예제 #4

파일 보기

    def get_lzc_output_width(width):
        """ Compute the size of a standard leading zero count result for
        a width-bit output

        @param width [int] input width
        @return output width (in bits) """
        return int(floor(log2(width))) + 1

예제 #5

파일 보기

 def get_integer_coding(self, value, language=C_Code):
     if FP_SpecialValue.is_special_value(value):
         return self.get_special_value_coding(value, language)
     elif value == ml_infty:
         return self.get_special_value_coding(FP_PlusInfty(self), language)
     elif value == -ml_infty:
         return self.get_special_value_coding(FP_MinusInfty(self), language)
     else:
         value = sollya.round(value, self.get_sollya_object(), sollya.RN)
         # FIXME: managing negative zero
         sign = int(1 if value < 0 else 0)
         value = abs(value)
         if value == 0.0:
             Log.report(Log.Warning,
                        "+0.0 forced during get_integer_coding conversion")
             exp_biased = 0
             mant = 0
         else:
             exp = int(sollya.floor(sollya.log2(value)))
             exp_biased = int(exp - self.get_bias())
             if exp < self.get_emin_normal():
                 exp_biased = 0
                 mant = int((value / S2**self.get_emin_subnormal()))
             else:
                 mant = int(
                     (value / S2**exp - 1.0) * (S2**self.get_field_size()))
         return mant | (exp_biased << self.get_field_size()) | (
             sign << (self.get_field_size() + self.get_exponent_size()))

예제 #6

파일 보기

 def round_sollya_object(self, value, round_mode=sollya.RN):
     rnd_function = {
         sollya.RN: sollya.nearestint,
         sollya.RD: sollya.floor,
         sollya.RU: sollya.ceil,
         sollya.RZ: lambda x: sollya.floor(x) if x > 0 \
                    else sollya.ceil(x)
     }[round_mode]
     scale_factor = S2**self.get_frac_size()
     return rnd_function(scale_factor * value) / scale_factor

예제 #7

파일 보기

파일: implementpoly.py 프로젝트: metalibm/metalibm

def get_accuracy_from_epsilon(epsilon):
    """ convert a numerical relative error into
        a number of accuracy bits

        :param epsilon: error to convert
        :type epsilon: number
        :return: accuracy corresponding to the error
        :rtype: SollyaObject
    """
    return sollya.floor(-sollya.log2(abs(epsilon)))

예제 #8

파일 보기

def get_fixed_type_from_interval(interval, precision):
    """ generate a fixed-point format which can encode
        @p interval without overflow, and which spans
        @p precision bits """
    lo = inf(interval)
    hi = sup(interval)
    signed = True if lo < 0 else False
    msb_index = int(floor(sollya.log2(max(abs(lo), abs(hi))))) + 1
    extra_digit = 1 if signed else 0
    return fixed_point(msb_index + extra_digit,
                       -(msb_index - precision),
                       signed=signed)

예제 #9

파일 보기

  def evaluate_argument_reduction(self, in_interval, in_prec, inv_size, inv_prec):
    one = Constant(1, precision = ML_Exact, tag = "one")
    
    dx =     Variable("dx",
                      precision = ML_Custom_FixedPoint_Format(0, in_prec, False),
                      interval = in_interval)
    
    # do the argument reduction
    x =       Addition(dx, one, tag = "x",
                       precision = ML_Exact)
    x1 =    Conversion(x, tag = "x1",
                       precision = ML_Custom_FixedPoint_Format(0, inv_size, False),
                       rounding_mode = ML_RoundTowardMinusInfty)
    s = Multiplication(dx, Constant(S2**inv_size, precision = ML_Exact),
                       precision = ML_Exact,
                       tag = "interval_index_table")
    inv_x1 =  Division(one, x1, tag = "ix1",
                       precision = ML_Exact)
    inv_x = Conversion(inv_x1,  tag = "ix",
                       precision = ML_Custom_FixedPoint_Format(1, inv_prec, False),
                       rounding_mode = ML_RoundTowardPlusInfty)
    y = Multiplication(x, inv_x, tag = "y",
                       precision = ML_Exact)
    dy =   Subtraction(y, one,  tag = "dy", 
                       precision = ML_Exact)
    
    # add the necessary goals and hints
    dx_gappa = Variable("dx_gappa", interval = dx.get_interval(), precision = dx.get_precision())
    swap_map = {dx: dx_gappa}

    # goal: dz (result of the argument reduction)
    gappa_code = self.gappa_engine.get_interval_code_no_copy(dy.copy(swap_map), bound_list = [swap_map[dx]])
    #self.gappa_engine.add_goal(gappa_code, s.copy(swap_map)) # range of index of table
    # hints. are the ones with isAppox=True really necessary ?
    self.gappa_engine.add_hint(gappa_code, x.copy(swap_map), x1.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code, inv_x1.copy(swap_map), inv_x.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code,
                               Multiplication(x1, inv_x1, precision = ML_Exact).copy(swap_map), one,
                               Comparison(swap_map[inv_x1], Constant(0, precision = ML_Exact),
                                          specifier = Comparison.NotEqual, precision = ML_Bool))
    # execute and parse the result
    result = execute_gappa_script_extract(gappa_code.get(self.gappa_engine))
    out_interval = result['goal']
    length_table = 1 + floor(sup(in_interval) * S2**inv_size).getConstantAsInt()
    sizeof_table = length_table * (16 + ML_Custom_FixedPoint_Format(1, inv_prec, False).get_c_bit_size()/8)
    return {
      'out_interval': out_interval,
      'length_table': length_table,
      'sizeof_table': sizeof_table,
    }

예제 #10

파일 보기

    def generate_scheme(self):
        lzc_width = int(floor(log2(self.width))) + 1
        Log.report(Log.Info, "width of lzc out is {}".format(lzc_width))
        input_precision = ML_StdLogicVectorFormat(self.width)
        precision = ML_StdLogicVectorFormat(lzc_width)
        # declaring main input variable
        vx = self.implementation.add_input_signal("x", input_precision)
        vr_out = Signal("lzc", precision=precision, var_type=Variable.Local)
        tmp_lzc = Variable("tmp_lzc",
                           precision=precision,
                           var_type=Variable.Local)
        iterator = Variable("i", precision=ML_Integer, var_type=Variable.Local)
        lzc_loop = RangeLoop(
            iterator,
            Interval(0, self.width - 1),
            ConditionBlock(
                Comparison(VectorElementSelection(vx,
                                                  iterator,
                                                  precision=ML_StdLogic),
                           Constant(1, precision=ML_StdLogic),
                           specifier=Comparison.Equal,
                           precision=ML_Bool),
                ReferenceAssign(
                    tmp_lzc,
                    Conversion(Subtraction(Constant(self.width - 1,
                                                    precision=ML_Integer),
                                           iterator,
                                           precision=ML_Integer),
                               precision=precision),
                )),
            specifier=RangeLoop.Increasing,
        )
        lzc_process = Process(Statement(
            ReferenceAssign(tmp_lzc, Constant(self.width,
                                              precision=precision)), lzc_loop,
            ReferenceAssign(vr_out, tmp_lzc)),
                              sensibility_list=[vx])

        self.implementation.add_process(lzc_process)

        self.implementation.add_output_signal("vr_out", vr_out)

        return [self.implementation]

예제 #11

파일 보기

def generate_payne_hanek(vx,
                         frac_pi,
                         precision,
                         n=100,
                         k=4,
                         chunk_num=None,
                         debug=False):
    """ generate payne and hanek argument reduction for frac_pi * variable """
    # determining integer format corresponding to
    # floating point precision argument
    int_precision = {ML_Binary64: ML_Int64, ML_Binary32: ML_Int32}[precision]

    cst_msb = floor(log2(abs(frac_pi)))
    cst_exp_range = cst_msb - precision.get_emin_subnormal() + 1

    # chunk size has to be so than multiplication by a splitted <v> (vx_hi or vx_lo)
    # is exact
    chunk_size = 20  # precision.get_field_size() / 2 - 2
    chunk_number = int(ceil((cst_exp_range + chunk_size - 1) / chunk_size))
    scaling_factor = S2**-(chunk_size / 2)

    chunk_size_cst = Constant(chunk_size, precision=ML_Int32)
    cst_msb_node = Constant(cst_msb, precision=ML_Int32)

    p = precision.get_field_size()

    # adapting debug format to precision argument
    debug_precision = {
        ML_Binary32: debug_ftox,
        ML_Binary64: debug_lftolx
    }[precision] if debug else None

    # saving sollya's global precision
    old_global_prec = get_prec()
    prec(cst_exp_range + 100)

    # table to store chunk of constant multiplicand
    cst_table = ML_Table(dimensions=[chunk_number, 1],
                         storage_precision=precision,
                         tag="PH_cst_table")
    # table to store sqrt(scaling_factor) corresponding to the cst multiplicand chunks
    scale_table = ML_Table(dimensions=[chunk_number, 1],
                           storage_precision=precision,
                           tag="PH_scale_table")
    tmp_cst = frac_pi

    # this loop divide the digits of frac_pi into chunks
    # the chunk lsb weight is given by a shift from
    # cst_msb, multiple of the chunk index
    for i in xrange(chunk_number):
        value_div_factor = S2**(chunk_size * (i + 1) - cst_msb)
        local_cst = int(tmp_cst * value_div_factor) / value_div_factor
        local_scale = (scaling_factor**i)
        # storing scaled constant chunks
        cst_table[i][0] = local_cst / (local_scale**2)
        scale_table[i][0] = local_scale
        tmp_cst = tmp_cst - local_cst

    vx_exp = ExponentExtraction(vx)
    msb_exp = -vx_exp + p - 1 + k
    msb_exp.set_attributes(tag="msb_exp", debug=(debugd if debug else None))

    msb_index = Select(cst_msb_node < msb_exp, 0,
                       (cst_msb_node - msb_exp) / chunk_size_cst)
    msb_index.set_attributes(tag="msb_index",
                             debug=(debugd if debug else None))

    lsb_exp = -vx_exp + p - 1 - n
    lsb_exp.set_attributes(tag="lsb_exp", debug=(debugd if debug else None))

    lsb_index = (cst_msb_node - lsb_exp) / chunk_size_cst
    lsb_index.set_attributes(tag="lsb_index",
                             debug=(debugd if debug else None))

    half_size = precision.get_field_size() / 2 + 1

    vx_hi = TypeCast(BitLogicAnd(
        TypeCast(vx, precision=ML_Int64),
        Constant(~(2**half_size - 1), precision=ML_Int64)),
                     precision=precision)
    vx_hi.set_attributes(tag="vx_hi", debug=debug_precision)

    vx_lo = vx - vx_hi
    vx_lo.set_attributes(tag="vx_lo", debug=debug_precision)

    vi = Variable("i", precision=ML_Int32, var_type=Variable.Local)

    half_scaling = Constant(S2**(-chunk_size / 2), precision=precision)

    i1 = Constant(1, precision=ML_Int32)

    acc = Variable("acc", precision=precision, var_type=Variable.Local)
    acc_int = Variable("acc_int",
                       precision=int_precision,
                       var_type=Variable.Local)

    init_loop = Statement(
        vx_hi,
        vx_lo,
        ReferenceAssign(vi, msb_index),
        ReferenceAssign(acc, Constant(0, precision=precision)),
        ReferenceAssign(acc_int, Constant(0, precision=precision)),
    )

    cst_load = TableLoad(cst_table,
                         vi,
                         0,
                         tag="cst_load",
                         debug=debug_precision)
    sca_load = TableLoad(scale_table,
                         vi,
                         0,
                         tag="sca_load",
                         debug=debug_precision)

    hi_mult = (vx_hi * sca_load) * (cst_load * sca_load)
    hi_mult.set_attributes(tag="hi_mult", debug=debug_precision)
    pre_hi_mult_int = NearestInteger(hi_mult,
                                     precision=int_precision,
                                     tag="hi_mult_int",
                                     debug=(debuglld if debug else None))
    hi_mult_int_f = Conversion(pre_hi_mult_int,
                               precision=precision,
                               tag="hi_mult_int_f",
                               debug=debug_precision)
    pre_hi_mult_red = (hi_mult - hi_mult_int_f).modify_attributes(
        tag="hi_mult_red", debug=debug_precision)

    # for the first chunks (vx_hi * <constant chunk>) exceeds 2**k+1 and may be
    # discard (whereas it may lead to overflow during integer conversion
    pre_exclude_hi = ((cst_msb_node - (vi + i1) * chunk_size + i1) +
                      (vx_exp + Constant(-half_size + 1, precision=ML_Int32))
                      ).modify_attributes(tag="pre_exclude_hi",
                                          debug=(debugd if debug else None))
    pre_exclude_hi.propagate_precision(ML_Int32,
                                       [cst_msb_node, vi, vx_exp, i1])
    Ck = Constant(k, precision=ML_Int32)
    exclude_hi = pre_exclude_hi <= Ck
    exclude_hi.set_attributes(tag="exclude_hi",
                              debug=(debugd if debug else None))

    hi_mult_red = Select(exclude_hi, pre_hi_mult_red,
                         Constant(0, precision=precision))
    hi_mult_int = Select(exclude_hi, pre_hi_mult_int,
                         Constant(0, precision=int_precision))

    lo_mult = (vx_lo * sca_load) * (cst_load * sca_load)
    lo_mult.set_attributes(tag="lo_mult", debug=debug_precision)
    lo_mult_int = NearestInteger(lo_mult,
                                 precision=int_precision,
                                 tag="lo_mult_int",
                                 debug=(debuglld if debug else None))
    lo_mult_int_f = Conversion(lo_mult_int,
                               precision=precision,
                               tag="lo_mult_int_f",
                               debug=debug_precision)
    lo_mult_red = (lo_mult - lo_mult_int_f).modify_attributes(
        tag="lo_mult_red", debug=debug_precision)

    acc_expr = (acc + hi_mult_red) + lo_mult_red
    int_expr = ((acc_int + hi_mult_int) + lo_mult_int) % 2**(k + 1)

    CF1 = Constant(1, precision=precision)
    CI1 = Constant(1, precision=int_precision)

    acc_expr_int = NearestInteger(acc_expr, precision=int_precision)

    normalization = Statement(
        ReferenceAssign(
            acc, acc_expr - Conversion(acc_expr_int, precision=precision)),
        ReferenceAssign(acc_int, int_expr + acc_expr_int),
    )

    acc_expr.set_attributes(tag="acc_expr", debug=debug_precision)
    int_expr.set_attributes(tag="int_expr",
                            debug=(debuglld if debug else None))

    red_loop = Loop(
        init_loop,
        vi <= lsb_index,
        Statement(
            acc_expr,
            int_expr,
            normalization,
            #ReferenceAssign(acc, acc_expr),
            #ReferenceAssign(acc_int, int_expr),
            ReferenceAssign(vi, vi + 1)))
    result = Statement(lsb_index, msb_index, red_loop)

    # restoring sollya's global precision
    prec(old_global_prec)

    return result, acc, acc_int

예제 #12

파일 보기

파일: ml_fp_gen_adder.py 프로젝트: metalibm/metalibm

    def generate_scheme(self):
        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = VirtualFormat(base_format=self.precision,
                                     support_format=ML_StdLogicVectorFormat(
                                         self.precision.get_bit_size()),
                                     get_cst=get_virtual_cst)
        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        # reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        vy = self.implementation.add_input_signal("y", io_precision)

        vx_precision = self.precision
        vy_precision = self.precision
        result_precision = self.precision

        # precision for first operand vx which is to be statically
        # positionned
        p = vx_precision.get_mantissa_size()
        # precision for second operand vy which is to be dynamically shifted
        q = vy_precision.get_mantissa_size()
        # precision of output
        o = result_precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_vx_precision = ML_StdLogicVectorFormat(
            vx_precision.get_exponent_size())
        exp_vy_precision = ML_StdLogicVectorFormat(
            vy_precision.get_exponent_size())

        mant_vx_precision = ML_StdLogicVectorFormat(p - 1)
        mant_vy_precision = ML_StdLogicVectorFormat(q - 1)

        mant_vx = MantissaExtraction(vx, precision=mant_vx_precision)
        mant_vy = MantissaExtraction(vy, precision=mant_vy_precision)

        exp_vx = RawExponentExtraction(vx, precision=exp_vx_precision)
        exp_vy = RawExponentExtraction(vy, precision=exp_vy_precision)

        # Maximum number of leading zero for normalized <vx>
        L_x = 0
        # Maximum number of leading zero for normalized <vy>
        L_y = 0

        sign_vx = CopySign(vx, precision=ML_StdLogic)
        sign_vy = CopySign(vy, precision=ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        effective_op = BitLogicXor(sign_vx,
                                   sign_vy,
                                   precision=ML_StdLogic,
                                   tag="effective_op",
                                   debug=ML_Debug(display_format="-radix 2"))

        exp_vx_bias = vx_precision.get_bias()
        exp_vy_bias = vy_precision.get_bias()

        exp_offset = max(o + L_y, q) + 2
        exp_bias = exp_offset + exp_vx_bias - exp_vy_bias
        # Determine a working precision to accomodate exponent difference
        # FIXME: check interval and exponent operations size
        exp_precision_ext_size = max(vx_precision.get_exponent_size(),
                                     vy_precision.get_exponent_size()) + 2
        exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size)
        # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + offset
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = Subtraction(
            Addition(zext(
                exp_vx,
                exp_precision_ext_size - vx_precision.get_exponent_size()),
                     Constant(exp_bias, precision=exp_precision_ext),
                     precision=exp_precision_ext),
            zext(exp_vy,
                 exp_precision_ext_size - vy_precision.get_exponent_size()),
            precision=exp_precision_ext,
            tag="exp_diff",
            debug=debug_std)
        signed_exp_diff = SignCast(exp_diff,
                                   specifier=SignCast.Signed,
                                   precision=exp_precision_ext)
        datapath_full_width = exp_offset + max(o + L_x, p) + 2 + q
        max_exp_diff = datapath_full_width - q
        exp_diff_lt_0 = Comparison(signed_exp_diff,
                                   Constant(0, precision=exp_precision_ext),
                                   specifier=Comparison.Less,
                                   precision=ML_Bool,
                                   tag="exp_diff_lt_0",
                                   debug=debug_std)
        exp_diff_gt_max_diff = Comparison(signed_exp_diff,
                                          Constant(
                                              max_exp_diff,
                                              precision=exp_precision_ext),
                                          specifier=Comparison.Greater,
                                          precision=ML_Bool)

        shift_amount_prec = ML_StdLogicVectorFormat(
            int(floor(log2(max_exp_diff)) + 1))

        mant_shift = Select(exp_diff_lt_0,
                            Constant(0, precision=shift_amount_prec),
                            Select(exp_diff_gt_max_diff,
                                   Constant(max_exp_diff,
                                            precision=shift_amount_prec),
                                   Truncate(exp_diff,
                                            precision=shift_amount_prec),
                                   precision=shift_amount_prec),
                            precision=shift_amount_prec,
                            tag="mant_shift",
                            debug=ML_Debug(display_format="-radix 10"))

        mant_ext_size = max_exp_diff
        shift_prec = ML_StdLogicVectorFormat(datapath_full_width)
        shifted_mant_vy = BitLogicRightShift(rzext(mant_vy, mant_ext_size),
                                             mant_shift,
                                             precision=shift_prec,
                                             tag="shifted_mant_vy",
                                             debug=debug_std)
        # vx is right-extended by q+2 bits
        # and left extend by exp_offset
        mant_vx_ext = zext(rzext(mant_vx, q + 2), exp_offset + 1)

        add_prec = ML_StdLogicVectorFormat(datapath_full_width + 1)

        mant_vx_add_op = Select(Comparison(effective_op,
                                           Constant(1, precision=ML_StdLogic),
                                           precision=ML_Bool,
                                           specifier=Comparison.Equal),
                                Negation(mant_vx_ext,
                                         precision=add_prec,
                                         tag="neg_mant_vx"),
                                mant_vx_ext,
                                precision=add_prec,
                                tag="mant_vx_add_op",
                                debug=ML_Debug(display_format=" "))

        mant_add = Addition(zext(shifted_mant_vy, 1),
                            mant_vx_add_op,
                            precision=add_prec,
                            tag="mant_add",
                            debug=ML_Debug(display_format=" -radix 2"))

        # if the addition overflows, then it meant vx has been negated and
        # the 2's complement addition cancelled the negative MSB, thus
        # the addition result is positive, and the result is of the sign of Y
        # else the result is of opposite sign to Y
        add_is_negative = BitLogicAnd(CopySign(mant_add,
                                               precision=ML_StdLogic),
                                      effective_op,
                                      precision=ML_StdLogic,
                                      tag="add_is_negative",
                                      debug=ML_Debug(" -radix 2"))
        # Negate mantissa addition result if it is negative
        mant_add_abs = Select(Comparison(add_is_negative,
                                         Constant(1, precision=ML_StdLogic),
                                         specifier=Comparison.Equal,
                                         precision=ML_Bool),
                              Negation(mant_add,
                                       precision=add_prec,
                                       tag="neg_mant_add",
                                       debug=debug_std),
                              mant_add,
                              precision=add_prec,
                              tag="mant_add_abs",
                              debug=debug_std)

        res_sign = BitLogicXor(add_is_negative,
                               sign_vy,
                               precision=ML_StdLogic,
                               tag="res_sign")

        # Precision for leading zero count
        lzc_width = int(floor(log2(datapath_full_width + 1)) + 1)
        lzc_prec = ML_StdLogicVectorFormat(lzc_width)

        lzc_args = ML_LeadingZeroCounter.get_default_args(
            width=(datapath_full_width + 1))
        LZC_entity = ML_LeadingZeroCounter(lzc_args)
        lzc_entity_list = LZC_entity.generate_scheme()
        lzc_implementation = LZC_entity.get_implementation()

        lzc_component = lzc_implementation.get_component_object()

        #lzc_in = SubSignalSelection(mant_add, p+1, 2*p+3)
        lzc_in = mant_add_abs  # SubSignalSelection(mant_add_abs, 0, 3*p+3, precision = ML_StdLogicVectorFormat(3*p+4))

        add_lzc = Signal("add_lzc",
                         precision=lzc_prec,
                         var_type=Signal.Local,
                         debug=debug_dec)
        add_lzc = PlaceHolder(
            add_lzc, lzc_component(io_map={
                "x": lzc_in,
                "vr_out": add_lzc
            }))

        # Index of output mantissa least significant bit
        mant_lsb_index = datapath_full_width - o + 1

        #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec)
        # CP stands for close path, the data path where X and Y are within 1 exp diff
        res_normed_mant = BitLogicLeftShift(mant_add_abs,
                                            add_lzc,
                                            precision=add_prec,
                                            tag="res_normed_mant",
                                            debug=debug_std)
        pre_mant_field = SubSignalSelection(
            res_normed_mant,
            mant_lsb_index,
            datapath_full_width - 1,
            precision=ML_StdLogicVectorFormat(o - 1))

        ## Helper function to extract a single bit
        #  from a vector of bits signal
        def BitExtraction(optree, index, **kw):
            return VectorElementSelection(optree,
                                          index,
                                          precision=ML_StdLogic,
                                          **kw)

        def IntCst(value):
            return Constant(value, precision=ML_Integer)

        round_bit = BitExtraction(res_normed_mant, IntCst(mant_lsb_index - 1))
        mant_lsb = BitExtraction(res_normed_mant, IntCst(mant_lsb_index))
        sticky_prec = ML_StdLogicVectorFormat(datapath_full_width - o)
        sticky_input = SubSignalSelection(res_normed_mant,
                                          0,
                                          datapath_full_width - o - 1,
                                          precision=sticky_prec)
        sticky_bit = Select(Comparison(sticky_input,
                                       Constant(0, precision=sticky_prec),
                                       specifier=Comparison.NotEqual,
                                       precision=ML_Bool),
                            Constant(1, precision=ML_StdLogic),
                            Constant(0, precision=ML_StdLogic),
                            precision=ML_StdLogic,
                            tag="sticky_bit",
                            debug=debug_std)

        # increment selection for rouding to nearest (tie to even)
        round_increment_RN = BitLogicAnd(round_bit,
                                         BitLogicOr(sticky_bit,
                                                    mant_lsb,
                                                    precision=ML_StdLogic),
                                         precision=ML_StdLogic,
                                         tag="round_increment_RN",
                                         debug=debug_std)

        rounded_mant = Addition(zext(pre_mant_field, 1),
                                round_increment_RN,
                                precision=ML_StdLogicVectorFormat(o),
                                tag="rounded_mant",
                                debug=debug_std)
        rounded_overflow = BitExtraction(rounded_mant,
                                         IntCst(o - 1),
                                         tag="rounded_overflow",
                                         debug=debug_std)
        res_mant_field = Select(Comparison(rounded_overflow,
                                           Constant(1, precision=ML_StdLogic),
                                           specifier=Comparison.Equal,
                                           precision=ML_Bool),
                                SubSignalSelection(rounded_mant, 1, o - 1),
                                SubSignalSelection(rounded_mant, 0, o - 2),
                                precision=ML_StdLogicVectorFormat(o - 1),
                                tag="final_mant",
                                debug=debug_std)

        res_exp_tmp_size = max(vx_precision.get_exponent_size(),
                               vy_precision.get_exponent_size()) + 2

        res_exp_tmp_prec = ML_StdLogicVectorFormat(res_exp_tmp_size)

        exp_vy_biased = Addition(zext(
            exp_vy, res_exp_tmp_size - vy_precision.get_exponent_size()),
                                 Constant(vy_precision.get_bias() + 1,
                                          precision=res_exp_tmp_prec),
                                 precision=res_exp_tmp_prec,
                                 tag="exp_vy_biased",
                                 debug=debug_dec)
        # vx's exponent is biased with the format bias
        # plus the exponent offset so it is left align to datapath MSB
        exp_vx_biased = Addition(
            zext(exp_vx, res_exp_tmp_size - vx_precision.get_exponent_size()),
            Constant(vx_precision.get_bias() + exp_offset + 1,
                     precision=res_exp_tmp_prec),
            precision=res_exp_tmp_prec,
            tag="exp_vx_biased",
            debug=debug_dec)

        # If exp diff is less than 0, then we must consider that vy's exponent is
        # the meaningful one and thus compute result exponent with respect
        # to vy's exponent value
        res_exp_base = Select(exp_diff_lt_0,
                              exp_vy_biased,
                              exp_vx_biased,
                              precision=res_exp_tmp_prec,
                              tag="res_exp_base",
                              debug=debug_dec)

        # Eventually we add the result exponent base
        # with the exponent offset and the leading zero count
        res_exp_ext = Addition(Subtraction(
            Addition(zext(res_exp_base, 0),
                     Constant(-result_precision.get_bias(),
                              precision=res_exp_tmp_prec),
                     precision=res_exp_tmp_prec),
            zext(add_lzc, res_exp_tmp_size - lzc_width),
            precision=res_exp_tmp_prec),
                               rounded_overflow,
                               precision=res_exp_tmp_prec,
                               tag="res_exp_ext",
                               debug=debug_std)

        res_exp_prec = ML_StdLogicVectorFormat(
            result_precision.get_exponent_size())

        res_exp = Truncate(res_exp_ext,
                           precision=res_exp_prec,
                           tag="res_exp",
                           debug=debug_dec_unsigned)

        vr_out = TypeCast(FloatBuild(
            res_sign,
            res_exp,
            res_mant_field,
            precision=self.precision,
        ),
                          precision=io_precision,
                          tag="result",
                          debug=debug_std)

        self.implementation.add_output_signal("vr_out", vr_out)

        return lzc_entity_list + [self.implementation]

예제 #13

파일 보기

    def generate_scheme(self):
        ## Generate Fused multiply and add comput <x> . <y> + <z>
        Log.report(
            Log.Info,
            "generating MPFMA with acc precision {acc_precision} and precision {precision}"
            .format(acc_precision=self.acc_precision,
                    precision=self.precision))

        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        prod_input_precision = self.precision

        accumulator_precision = self.acc_precision

        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        # reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable
        vx = self.implementation.add_input_signal("x", prod_input_precision)
        vy = self.implementation.add_input_signal("y", prod_input_precision)
        vz = self.implementation.add_input_signal("z", accumulator_precision)

        # extra reset input port
        reset = self.implementation.add_input_signal("reset", ML_StdLogic)

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        vx_precision = self.precision.get_base_format()
        vy_precision = self.precision.get_base_format()
        vz_precision = self.acc_precision.get_base_format()
        result_precision = self.acc_precision.get_base_format()

        # precision for first operand vx which is to be statically
        # positionned
        p = vx_precision.get_mantissa_size()
        # precision for second operand vy which is to be dynamically shifted
        q = vy_precision.get_mantissa_size()
        # precision for
        r = vz_precision.get_mantissa_size()
        # precision of output
        o = result_precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_vx_precision = ML_StdLogicVectorFormat(
            vx_precision.get_exponent_size())
        exp_vy_precision = ML_StdLogicVectorFormat(
            vy_precision.get_exponent_size())
        exp_vz_precision = ML_StdLogicVectorFormat(
            vz_precision.get_exponent_size())

        # MantissaExtraction performs the implicit
        # digit computation and concatenation
        mant_vx_precision = ML_StdLogicVectorFormat(p)
        mant_vy_precision = ML_StdLogicVectorFormat(q)
        mant_vz_precision = ML_StdLogicVectorFormat(r)

        mant_vx = MantissaExtraction(vx, precision=mant_vx_precision)
        mant_vy = MantissaExtraction(vy, precision=mant_vy_precision)
        mant_vz = MantissaExtraction(vz, precision=mant_vz_precision)

        exp_vx = RawExponentExtraction(vx, precision=exp_vx_precision)
        exp_vy = RawExponentExtraction(vy, precision=exp_vy_precision)
        exp_vz = RawExponentExtraction(vz, precision=exp_vz_precision)

        # Maximum number of leading zero for normalized <vx> mantissa
        L_x = 0
        # Maximum number of leading zero for normalized <vy> mantissa
        L_y = 0
        # Maximum number of leading zero for normalized <vz> mantissa
        L_z = 0
        # Maximum number of leading zero for the product of <x>.<y>
        # mantissa.
        L_xy = L_x + L_y + 1

        sign_vx = CopySign(vx, precision=ML_StdLogic)
        sign_vy = CopySign(vy, precision=ML_StdLogic)
        sign_vz = CopySign(vz, precision=ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        sign_xy = BitLogicXor(sign_vx,
                              sign_vy,
                              precision=ML_StdLogic,
                              tag="sign_xy",
                              debug=debug_std)
        effective_op = BitLogicXor(sign_xy,
                                   sign_vz,
                                   precision=ML_StdLogic,
                                   tag="effective_op",
                                   debug=debug_std)

        exp_vx_bias = vx_precision.get_bias()
        exp_vy_bias = vy_precision.get_bias()
        exp_vz_bias = vz_precision.get_bias()

        # x.y is statically positionned in the datapath
        # while z is shifted
        # This is justified by the fact that z alignment may be performed
        # in parallel with the multiplication of x and y mantissas
        # The product is positionned <exp_offset>-bit to the right of datapath MSB
        # (without including an extra carry bit)
        exp_offset = max(o + L_z, r) + 2
        exp_bias = exp_offset + (exp_vx_bias + exp_vy_bias) - exp_vz_bias

        # because of the mantissa range [1, 2[, the product exponent
        # is located one bit to the right (lower) of the product MSB
        prod_exp_offset = 1

        # Determine a working precision to accomodate exponent difference
        # FIXME: check interval and exponent operations size
        exp_precision_ext_size = max(vx_precision.get_exponent_size(),
                                     vy_precision.get_exponent_size(),
                                     vz_precision.get_exponent_size()) + 2
        exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size)
        # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + offset
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = UnsignedSubtraction(
            UnsignedAddition(UnsignedAddition(
                zext(exp_vy, exp_precision_ext_size -
                     vy_precision.get_exponent_size()),
                zext(exp_vx, exp_precision_ext_size -
                     vx_precision.get_exponent_size()),
                precision=exp_precision_ext),
                             Constant(exp_bias + prod_exp_offset,
                                      precision=exp_precision_ext),
                             precision=exp_precision_ext),
            zext(exp_vz,
                 exp_precision_ext_size - vz_precision.get_exponent_size()),
            precision=exp_precision_ext,
            tag="exp_diff",
            debug=debug_std)
        exp_precision_ext_signed = get_signed_precision(exp_precision_ext)
        signed_exp_diff = SignCast(exp_diff,
                                   specifier=SignCast.Signed,
                                   precision=exp_precision_ext_signed)
        datapath_full_width = exp_offset + max(o + L_xy, p + q) + 2 + r
        max_exp_diff = datapath_full_width - r
        exp_diff_lt_0 = Comparison(signed_exp_diff,
                                   Constant(
                                       0, precision=exp_precision_ext_signed),
                                   specifier=Comparison.Less,
                                   precision=ML_Bool,
                                   tag="exp_diff_lt_0",
                                   debug=debug_std)
        exp_diff_gt_max_diff = Comparison(
            signed_exp_diff,
            Constant(max_exp_diff, precision=exp_precision_ext_signed),
            specifier=Comparison.Greater,
            precision=ML_Bool)

        shift_amount_prec = ML_StdLogicVectorFormat(
            int(floor(log2(max_exp_diff)) + 1))

        mant_shift = Select(exp_diff_lt_0,
                            Constant(0, precision=shift_amount_prec),
                            Select(exp_diff_gt_max_diff,
                                   Constant(max_exp_diff,
                                            precision=shift_amount_prec),
                                   Truncate(exp_diff,
                                            precision=shift_amount_prec),
                                   precision=shift_amount_prec),
                            precision=shift_amount_prec,
                            tag="mant_shift",
                            debug=debug_dec)

        prod_prec = ML_StdLogicVectorFormat(p + q)
        prod = UnsignedMultiplication(mant_vx,
                                      mant_vy,
                                      precision=prod_prec,
                                      tag="prod",
                                      debug=debug_std)

        mant_ext_size = max_exp_diff
        shift_prec = ML_StdLogicVectorFormat(datapath_full_width)
        mant_vz_ext = rzext(mant_vz, mant_ext_size)
        shifted_mant_vz = BitLogicRightShift(mant_vz_ext,
                                             mant_shift,
                                             precision=shift_prec,
                                             tag="shifted_mant_vz",
                                             debug=debug_std)

        # Inserting  pipeline stage
        # after production computation
        # and addend alignment shift
        if self.pipelined: self.implementation.start_new_stage()

        # vx is right-extended by q+2 bits
        # and left extend by exp_offset
        prod_ext = zext(rzext(prod, r + 2), exp_offset + 1)

        add_prec = ML_StdLogicVectorFormat(datapath_full_width + 1)

        ## Here we make the supposition that
        #  the product is slower to compute than
        #  aligning <vz> and negating it if necessary
        #  which means that mant_add as the same sign as the product
        #prod_add_op = Select(
        #  Comparison(
        #    effective_op,
        #    Constant(1, precision = ML_StdLogic),
        #    precision = ML_Bool,
        #    specifier = Comparison.Equal
        #  ),
        #  Negation(prod_ext, precision = add_prec, tag = "neg_prod"),
        #  prod_ext,
        #  precision = add_prec,
        #  tag = "prod_add_op",
        #  debug = debug_cst_dec
        #)
        addend_op = Select(Comparison(effective_op,
                                      Constant(1, precision=ML_StdLogic),
                                      precision=ML_Bool,
                                      specifier=Comparison.Equal),
                           BitLogicNegate(zext(shifted_mant_vz, 1),
                                          precision=add_prec,
                                          tag="neg_addend_Op"),
                           zext(shifted_mant_vz, 1),
                           precision=add_prec,
                           tag="addend_op",
                           debug=debug_std)

        prod_add_op = prod_ext

        # Compound Addition
        mant_add_p1 = UnsignedAddition(UnsignedAddition(addend_op,
                                                        prod_add_op,
                                                        precision=add_prec),
                                       Constant(1, precision=ML_StdLogic),
                                       precision=add_prec,
                                       tag="mant_add_p1",
                                       debug=debug_std)
        mant_add_p0 = UnsignedAddition(addend_op,
                                       prod_add_op,
                                       precision=add_prec,
                                       tag="mant_add_p0",
                                       debug=debug_std)

        # if the addition overflows, then it meant vx has been negated and
        # the 2's complement addition cancelled the negative MSB, thus
        # the addition result is positive, and the result is of the sign of Y
        # else the result is of opposite sign to Y
        add_is_negative = BitLogicAnd(CopySign(mant_add_p1,
                                               precision=ML_StdLogic),
                                      effective_op,
                                      precision=ML_StdLogic,
                                      tag="add_is_negative",
                                      debug=debug_std)
        # Negate mantissa addition result if it is negative
        mant_add_abs = Select(Comparison(add_is_negative,
                                         Constant(1, precision=ML_StdLogic),
                                         specifier=Comparison.Equal,
                                         precision=ML_Bool),
                              BitLogicNegate(mant_add_p0,
                                             precision=add_prec,
                                             tag="neg_mant_add_p0",
                                             debug=debug_std),
                              mant_add_p1,
                              precision=add_prec,
                              tag="mant_add_abs",
                              debug=debug_std)

        # determining result sign, mant_add
        # as the same sign as the product
        res_sign = BitLogicXor(add_is_negative,
                               sign_xy,
                               precision=ML_StdLogic,
                               tag="res_sign")

        # adding pipeline stage after addition computation
        if self.pipelined: self.implementation.start_new_stage()

        # Precision for leading zero count
        lzc_width = int(floor(log2(datapath_full_width + 1)) + 1)
        lzc_prec = ML_StdLogicVectorFormat(lzc_width)

        current_stage = self.implementation.get_current_stage()

        lzc_args = ML_LeadingZeroCounter.get_default_args(
            width=(datapath_full_width + 1))
        LZC_entity = ML_LeadingZeroCounter(lzc_args)
        lzc_entity_list = LZC_entity.generate_scheme()
        lzc_implementation = LZC_entity.get_implementation()

        lzc_component = lzc_implementation.get_component_object()

        #self.implementation.set_current_stage(current_stage)
        # Attributes dynamic field (init_stage and init_op)
        # constructors must be initialized back after
        # building a sub-operator inside this operator
        self.implementation.instanciate_dyn_attributes()

        # lzc_in = mant_add_abs

        add_lzc_sig = Signal("add_lzc",
                             precision=lzc_prec,
                             var_type=Signal.Local,
                             debug=debug_dec)
        add_lzc = PlaceHolder(add_lzc_sig,
                              lzc_component(io_map={
                                  "x": mant_add_abs,
                                  "vr_out": add_lzc_sig
                              },
                                            tag="lzc_i"),
                              tag="place_holder")

        # adding pipeline stage after leading zero count
        if self.pipelined: self.implementation.start_new_stage()

        # Index of output mantissa least significant bit
        mant_lsb_index = datapath_full_width - o + 1

        #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec)
        # CP stands for close path, the data path where X and Y are within 1 exp diff
        res_normed_mant = BitLogicLeftShift(mant_add_abs,
                                            add_lzc,
                                            precision=add_prec,
                                            tag="res_normed_mant",
                                            debug=debug_std)
        pre_mant_field = SubSignalSelection(
            res_normed_mant,
            mant_lsb_index,
            datapath_full_width - 1,
            precision=ML_StdLogicVectorFormat(o - 1))

        ## Helper function to extract a single bit
        #  from a vector of bits signal
        def BitExtraction(optree, index, **kw):
            return VectorElementSelection(optree,
                                          index,
                                          precision=ML_StdLogic,
                                          **kw)

        def IntCst(value):
            return Constant(value, precision=ML_Integer)

        # adding pipeline stage after normalization shift
        if self.pipelined: self.implementation.start_new_stage()

        round_bit = BitExtraction(res_normed_mant, IntCst(mant_lsb_index - 1))
        mant_lsb = BitExtraction(res_normed_mant, IntCst(mant_lsb_index))
        sticky_prec = ML_StdLogicVectorFormat(datapath_full_width - o)
        sticky_input = SubSignalSelection(res_normed_mant,
                                          0,
                                          datapath_full_width - o - 1,
                                          precision=sticky_prec)
        sticky_bit = Select(Comparison(sticky_input,
                                       Constant(0, precision=sticky_prec),
                                       specifier=Comparison.NotEqual,
                                       precision=ML_Bool),
                            Constant(1, precision=ML_StdLogic),
                            Constant(0, precision=ML_StdLogic),
                            precision=ML_StdLogic,
                            tag="sticky_bit",
                            debug=debug_std)

        # increment selection for rouding to nearest (tie to even)
        round_increment_RN = BitLogicAnd(round_bit,
                                         BitLogicOr(sticky_bit,
                                                    mant_lsb,
                                                    precision=ML_StdLogic),
                                         precision=ML_StdLogic,
                                         tag="round_increment_RN",
                                         debug=debug_std)

        rounded_mant = UnsignedAddition(zext(pre_mant_field, 1),
                                        round_increment_RN,
                                        precision=ML_StdLogicVectorFormat(o),
                                        tag="rounded_mant",
                                        debug=debug_std)
        rounded_overflow = BitExtraction(rounded_mant,
                                         IntCst(o - 1),
                                         tag="rounded_overflow",
                                         debug=debug_std)
        res_mant_field = Select(Comparison(rounded_overflow,
                                           Constant(1, precision=ML_StdLogic),
                                           specifier=Comparison.Equal,
                                           precision=ML_Bool),
                                SubSignalSelection(rounded_mant, 1, o - 1),
                                SubSignalSelection(rounded_mant, 0, o - 2),
                                precision=ML_StdLogicVectorFormat(o - 1),
                                tag="final_mant",
                                debug=debug_std)

        res_exp_tmp_size = max(vx_precision.get_exponent_size(),
                               vy_precision.get_exponent_size(),
                               vz_precision.get_exponent_size()) + 2

        res_exp_tmp_prec = ML_StdLogicVectorFormat(res_exp_tmp_size)

        # Product biased exponent
        # is computed from both x and y exponent
        exp_xy_biased = UnsignedAddition(UnsignedAddition(
            UnsignedAddition(zext(
                exp_vy, res_exp_tmp_size - vy_precision.get_exponent_size()),
                             Constant(vy_precision.get_bias(),
                                      precision=res_exp_tmp_prec),
                             precision=res_exp_tmp_prec,
                             tag="exp_vy_biased",
                             debug=debug_dec),
            UnsignedAddition(zext(
                exp_vx, res_exp_tmp_size - vx_precision.get_exponent_size()),
                             Constant(vx_precision.get_bias(),
                                      precision=res_exp_tmp_prec),
                             precision=res_exp_tmp_prec,
                             tag="exp_vx_biased",
                             debug=debug_dec),
            precision=res_exp_tmp_prec),
                                         Constant(
                                             exp_offset + 1,
                                             precision=res_exp_tmp_prec,
                                         ),
                                         precision=res_exp_tmp_prec,
                                         tag="exp_xy_biased",
                                         debug=debug_dec)
        # vz's exponent is biased with the format bias
        # plus the exponent offset so it is left align to datapath MSB
        exp_vz_biased = UnsignedAddition(
            zext(exp_vz, res_exp_tmp_size - vz_precision.get_exponent_size()),
            Constant(
                vz_precision.get_bias() + 1,  # + exp_offset + 1,
                precision=res_exp_tmp_prec),
            precision=res_exp_tmp_prec,
            tag="exp_vz_biased",
            debug=debug_dec)

        # If exp diff is less than 0, then we must consider that vz's exponent is
        # the meaningful one and thus compute result exponent with respect
        # to vz's exponent value
        res_exp_base = Select(exp_diff_lt_0,
                              exp_vz_biased,
                              exp_xy_biased,
                              precision=res_exp_tmp_prec,
                              tag="res_exp_base",
                              debug=debug_dec)

        # Eventually we add the result exponent base
        # with the exponent offset and the leading zero count
        res_exp_ext = UnsignedAddition(UnsignedSubtraction(
            UnsignedAddition(zext(res_exp_base, 0),
                             Constant(-result_precision.get_bias(),
                                      precision=res_exp_tmp_prec),
                             precision=res_exp_tmp_prec),
            zext(add_lzc, res_exp_tmp_size - lzc_width),
            precision=res_exp_tmp_prec),
                                       rounded_overflow,
                                       precision=res_exp_tmp_prec,
                                       tag="res_exp_ext",
                                       debug=debug_std)

        res_exp_prec = ML_StdLogicVectorFormat(
            result_precision.get_exponent_size())

        res_exp = Truncate(res_exp_ext,
                           precision=res_exp_prec,
                           tag="res_exp",
                           debug=debug_dec_unsigned)

        vr_out = TypeCast(FloatBuild(
            res_sign,
            res_exp,
            res_mant_field,
            precision=accumulator_precision,
        ),
                          precision=accumulator_precision,
                          tag="result",
                          debug=debug_std)

        # adding pipeline stage after rouding
        if self.pipelined: self.implementation.start_new_stage()

        self.implementation.add_output_signal("vr_out", vr_out)

        return lzc_entity_list + [self.implementation]

예제 #14

파일 보기

파일: ml_fixed_mpfma.py 프로젝트: templeblock/metalibm

    def generate_scheme(self):
        ## Generate Fused multiply and add comput <x> . <y> + <z>
        Log.report(
            Log.Info,
            "generating fixed MPFMA with {ed} extra digit(s) and sign-magnitude accumulator: {sm}"
            .format(ed=self.extra_digit, sm=self.sign_magnitude))

        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = HdlVirtualFormat(self.precision)
        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        # reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable

        # maximum weigth for a mantissa product digit
        max_prod_exp = self.precision.get_emax() * 2 + 1
        # minimum wieght for a mantissa product digit
        min_prod_exp = self.precision.get_emin_subnormal() * 2

        ## Most and least significant digit index for the
        #  accumulator
        acc_msb_index = max_prod_exp + self.extra_digit
        acc_lsb_index = min_prod_exp

        acc_width = acc_msb_index - min_prod_exp + 1
        # precision of the accumulator
        acc_prec = ML_StdLogicVectorFormat(acc_width)

        reset = self.implementation.add_input_signal("reset", ML_StdLogic)

        vx = self.implementation.add_input_signal("x", io_precision)
        vy = self.implementation.add_input_signal("y", io_precision)

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        acc = self.implementation.add_input_signal("acc", acc_prec)
        if self.sign_magnitude:
            # the accumulator is in sign-magnitude representation
            sign_acc = self.implementation.add_input_signal(
                "sign_acc", ML_StdLogic)
        else:
            sign_acc = CopySign(acc,
                                precision=ML_StdLogic,
                                tag="sign_acc",
                                debug=debug_std)

        vx_precision = self.precision
        vy_precision = self.precision
        result_precision = acc_prec

        # precision for first operand vx which is to be statically
        # positionned
        p = vx_precision.get_mantissa_size()
        # precision for second operand vy which is to be dynamically shifted
        q = vy_precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_vx_precision = ML_StdLogicVectorFormat(
            vx_precision.get_exponent_size())
        exp_vy_precision = ML_StdLogicVectorFormat(
            vy_precision.get_exponent_size())

        mant_vx_precision = ML_StdLogicVectorFormat(p - 1)
        mant_vy_precision = ML_StdLogicVectorFormat(q - 1)

        mant_vx = MantissaExtraction(vx, precision=mant_vx_precision)
        mant_vy = MantissaExtraction(vy, precision=mant_vy_precision)

        exp_vx = ExponentExtraction(vx,
                                    precision=exp_vx_precision,
                                    tag="exp_vx",
                                    debug=debug_dec)
        exp_vy = ExponentExtraction(vy,
                                    precision=exp_vy_precision,
                                    tag="exp_vy",
                                    debug=debug_dec)

        # Maximum number of leading zero for normalized <vx> mantissa
        L_x = 0
        # Maximum number of leading zero for normalized <vy> mantissa
        L_y = 0
        # Maximum number of leading zero for the product of <x>.<y>
        # mantissa.
        L_xy = L_x + L_y + 1

        sign_vx = CopySign(vx, precision=ML_StdLogic)
        sign_vy = CopySign(vy, precision=ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        sign_xy = BitLogicXor(sign_vx,
                              sign_vy,
                              precision=ML_StdLogic,
                              tag="sign_xy",
                              debug=ML_Debug(display_format="-radix 2"))
        effective_op = BitLogicXor(sign_xy,
                                   sign_acc,
                                   precision=ML_StdLogic,
                                   tag="effective_op",
                                   debug=ML_Debug(display_format="-radix 2"))

        exp_vx_bias = vx_precision.get_bias()
        exp_vy_bias = vy_precision.get_bias()

        # <acc> is statically positionned in the datapath,
        # it may even constitute the whole datapath
        #
        # the product is shifted with respect to the fix accumulator

        exp_bias = (exp_vx_bias + exp_vy_bias)

        # because of the mantissa range [1, 2[, the product exponent
        # is located one bit to the right (lower) of the product MSB
        prod_exp_offset = 1

        # Determine a working precision to accomodate exponent difference
        # FIXME: check interval and exponent operations size
        exp_precision_ext_size = max(
            vx_precision.get_exponent_size(),
            vy_precision.get_exponent_size(),
            abs(ceil(log2(abs(acc_msb_index)))),
            abs(ceil(log2(abs(acc_lsb_index)))),
            abs(ceil(log2(abs(exp_bias + prod_exp_offset)))),
        ) + 2
        Log.report(Log.Info,
                   "exp_precision_ext_size={}".format(exp_precision_ext_size))
        exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size)

        # static accumulator exponent
        exp_acc = Constant(acc_msb_index,
                           precision=exp_precision_ext,
                           tag="exp_acc",
                           debug=debug_cst_dec)

        # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + offset
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = Subtraction(
            exp_acc,
            Addition(Addition(zext(
                exp_vy,
                exp_precision_ext_size - vy_precision.get_exponent_size()),
                              zext(
                                  exp_vx, exp_precision_ext_size -
                                  vx_precision.get_exponent_size()),
                              precision=exp_precision_ext),
                     Constant(exp_bias + prod_exp_offset,
                              precision=exp_precision_ext,
                              tag="diff_bias",
                              debug=debug_cst_dec),
                     precision=exp_precision_ext,
                     tag="pre_exp_diff",
                     debug=debug_dec),
            precision=exp_precision_ext,
            tag="exp_diff",
            debug=debug_dec)
        signed_exp_diff = SignCast(exp_diff,
                                   specifier=SignCast.Signed,
                                   precision=exp_precision_ext)
        datapath_full_width = acc_width
        # the maximum exp diff is the size of the datapath
        # minus the bit size of the product
        max_exp_diff = datapath_full_width - (p + q)
        exp_diff_lt_0 = Comparison(signed_exp_diff,
                                   Constant(0, precision=exp_precision_ext),
                                   specifier=Comparison.Less,
                                   precision=ML_Bool,
                                   tag="exp_diff_lt_0",
                                   debug=debug_std)
        exp_diff_gt_max_diff = Comparison(signed_exp_diff,
                                          Constant(
                                              max_exp_diff,
                                              precision=exp_precision_ext),
                                          specifier=Comparison.Greater,
                                          precision=ML_Bool)

        shift_amount_prec = ML_StdLogicVectorFormat(
            int(floor(log2(max_exp_diff)) + 1))

        mant_shift = Select(exp_diff_lt_0,
                            Constant(0, precision=shift_amount_prec),
                            Select(exp_diff_gt_max_diff,
                                   Constant(max_exp_diff,
                                            precision=shift_amount_prec),
                                   Truncate(exp_diff,
                                            precision=shift_amount_prec),
                                   precision=shift_amount_prec),
                            precision=shift_amount_prec,
                            tag="mant_shift",
                            debug=ML_Debug(display_format="-radix 10"))

        prod_prec = ML_StdLogicVectorFormat(p + q)
        prod = Multiplication(mant_vx,
                              mant_vy,
                              precision=prod_prec,
                              tag="prod",
                              debug=debug_std)

        # attempt at pipelining the operator
        # self.implementation.start_new_stage()

        mant_ext_size = datapath_full_width - (p + q)
        shift_prec = ML_StdLogicVectorFormat(datapath_full_width)
        shifted_prod = BitLogicRightShift(rzext(prod, mant_ext_size),
                                          mant_shift,
                                          precision=shift_prec,
                                          tag="shifted_prod",
                                          debug=debug_std)

        ## Inserting a pipeline stage after the product shifting
        if self.pipelined: self.implementation.start_new_stage()

        if self.sign_magnitude:
            # the accumulator is in sign-magnitude representation

            acc_negated = Select(Comparison(sign_xy,
                                            sign_acc,
                                            specifier=Comparison.Equal,
                                            precision=ML_Bool),
                                 acc,
                                 BitLogicNegate(acc, precision=acc_prec),
                                 precision=acc_prec)

            # one extra MSB bit is added to the final addition
            # to detect overflows
            add_width = acc_width + 1
            add_prec = ML_StdLogicVectorFormat(add_width)

            # FIXME: implement with a proper compound adder
            mant_add_p0_ext = Addition(zext(shifted_prod, 1),
                                       zext(acc_negated, 1),
                                       precision=add_prec)
            mant_add_p1_ext = Addition(
                mant_add_p0_ext,
                Constant(1, precision=ML_StdLogic),
                precision=add_prec,
                tag="mant_add",
                debug=ML_Debug(display_format=" -radix 2"))
            # discarding carry overflow bit
            mant_add_p0 = SubSignalSelection(mant_add_p0_ext,
                                             0,
                                             acc_width - 1,
                                             precision=acc_prec)
            mant_add_p1 = SubSignalSelection(mant_add_p1_ext,
                                             0,
                                             acc_width - 1,
                                             precision=acc_prec)

            mant_add_pre_sign = CopySign(mant_add_p1_ext,
                                         precision=ML_StdLogic,
                                         tag="mant_add_pre_sign",
                                         debug=debug_std)
            mant_add = Select(Comparison(sign_xy,
                                         sign_acc,
                                         specifier=Comparison.Equal,
                                         precision=ML_Bool),
                              mant_add_p0,
                              Select(
                                  Comparison(mant_add_pre_sign,
                                             Constant(1,
                                                      precision=ML_StdLogic),
                                             specifier=Comparison.Equal,
                                             precision=ML_Bool),
                                  mant_add_p1,
                                  BitLogicNegate(mant_add_p0,
                                                 precision=acc_prec),
                                  precision=acc_prec,
                              ),
                              precision=acc_prec,
                              tag="mant_add")

            # if both operands had the same sign, then
            # mant_add is necessarily positive and the result
            # sign matches the input sign
            # if both operands had opposite signs, then
            # the result sign matches the product sign
            # if mant_add is positive, else the accumulator sign
            output_sign = Select(
                Comparison(effective_op,
                           Constant(1, precision=ML_StdLogic),
                           specifier=Comparison.Equal,
                           precision=ML_Bool),
                # if the effective op is a subtraction (prod - acc)
                BitLogicXor(sign_acc, mant_add_pre_sign,
                            precision=ML_StdLogic),
                # the effective op is an addition, thus result and
                # acc share sign
                sign_acc,
                precision=ML_StdLogic,
                tag="output_sign")

            if self.pipelined: self.implementation.start_new_stage()

            # adding output
            self.implementation.add_output_signal("vr_sign", output_sign)
            self.implementation.add_output_signal("vr_acc", mant_add)

        else:
            # 2s complement encoding of the accumulator,
            # the accumulator is never negated, only the producted
            # is negated if negative

            # negate shifted prod when required
            shifted_prod_op = Select(Comparison(sign_xy,
                                                Constant(
                                                    1, precision=ML_StdLogic),
                                                specifier=Comparison.Equal,
                                                precision=ML_Bool),
                                     Negation(shifted_prod,
                                              precision=shift_prec),
                                     shifted_prod,
                                     precision=shift_prec)

            add_prec = shift_prec  # ML_StdLogicVectorFormat(datapath_full_width + 1)

            mant_add = Addition(shifted_prod_op,
                                acc,
                                precision=acc_prec,
                                tag="mant_add",
                                debug=ML_Debug(display_format=" -radix 2"))

            if self.pipelined: self.implementation.start_new_stage()

            self.implementation.add_output_signal("vr_acc", mant_add)

        return [self.implementation]

예제 #15

파일 보기

파일: ml_exp_adaptative.py 프로젝트: templeblock/metalibm

    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "expf.c", 
                 function_name = "expf"):

        # declaring target and instantiating optimization engine
        processor = target
        self.precision = precision
        opt_eng = OptimizationEngine(processor)
        gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True)

        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = exp_implementation.add_input_variable("x", self.precision) 


        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)


        test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
        test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
        test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

        test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
        return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax      = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax 
        exp_overflow_bound  = ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater)
        early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2 ** precision_emin
        exp_underflow_bound = floor(log(precision_min_value))


        early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less)
        early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision)))


        sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}


        # constant computation
        invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))


        log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision
        invlog2_cst = Constant(invlog2, precision = self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN) 
        log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f"))
        k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f"))
        ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact= True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact = True)
        r =  exact_hi_part - k * log2_lo
        r.set_tag("r")
        r.set_attributes(debug = ML_Debug(display_format = "%f"))

        opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma)

        tag_map = {}
        opt_eng.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx: Variable("x", precision = self.precision, interval = interval_vx),
            tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision)
        }
        #try:
        if 1:
            #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            Log.report(Log.Info, "eval error: %s" % eval_error)
        #except:
        #    Log.report(Log.Info, "gappa error evaluation failed")
        print r.get_str(depth = None, display_precision = True, display_attribute = True)
        print opt_r.get_str(depth = None, display_precision = True, display_attribute = True)

        approx_interval = Interval(-log(2)/2, log(2)/2)

        local_ulp = sup(ulp(exp(approx_interval), self.precision))
        print "ulp: ", local_ulp 
        error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1
        init_poly_degree = poly_degree

        return


        while 1: 
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m")
            poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision)
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            opt_poly = opt_eng.optimization_process(poly, self.precision)

            #print "poly: ", poly.get_str(depth = None, display_precision = True)
            #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval)
            poly_error_copy_map = {
                r.get_handle().get_node(): r_gappa_var
            }
            gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
            poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g")
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)

            global_poly_error = poly_eval_error + poly_approx_error
            global_rel_poly_error = global_poly_error / exp(approx_interval)
            print "global_poly_error: ", global_poly_error, global_rel_poly_error 
            flag = local_ulp > sup(abs(global_rel_poly_error))
            print "test: ", flag
            if flag: break
            else:
                if poly_degree > init_poly_degree + 5:
                    Log.report(Log.Error, "poly degree search did not converge")
                poly_degree += 1



        late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
        diff_k = ik - overflow_exp_offset 
        diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k")
        late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset)
        late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf)
        late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result))

        late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset)
        late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False)
        test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal)
        late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result))

        std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx)
        std_result.set_attributes(tag = "std_result", debug = debug_lftolx)
        result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result)))
        std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return)

        #print scheme.get_str(depth = None, display_precision = True)

        # fusing FMA
        if fuse_fma: 
            Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m")
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m")
        opt_eng.instantiate_abstract_precision(scheme, None)

        Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m")
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m")
        opt_eng.subexpression_sharing(scheme)

        Log.report(Log.Info, "\033[33;1m silencing operation \033[0m")
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        # check processor support
        Log.report(Log.Info, "\033[33;1m checking processor support \033[0m")
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        if fast_path_extract:
            Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m")
            opt_eng.factorize_fast_path(scheme)
        
        Log.report(Log.Info, "\033[33;1m generating source code \033[0m")
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        #self.result.add_header("support_lib/ml_types.h")
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree)
        self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object())
        if debug_flag:
            self.result.add_header("stdio.h")
            self.result.add_header("inttypes.h")
        output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()

예제 #16

파일 보기

def generate_payne_hanek(vx,
                         frac_pi,
                         precision,
                         n=100,
                         k=4,
                         chunk_num=None,
                         debug=False):
    """ generate payne and hanek argument reduction for frac_pi * variable """

    sollya.roundingwarnings = sollya.off
    debug_precision = debug_multi
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[precision]

    p = precision.get_field_size()

    # weight of the most significant digit of the constant
    cst_msb = floor(log2(abs(frac_pi)))
    # length of exponent range which must be covered by the approximation
    # of the constant
    cst_exp_range = cst_msb - precision.get_emin_subnormal() + 1

    # chunk size has to be so than multiplication by a splitted <v>
    # (vx_hi or vx_lo) is exact
    chunk_size = precision.get_field_size() / 2 - 2
    chunk_number = int(ceil((cst_exp_range + chunk_size - 1) / chunk_size))
    scaling_factor = S2**-(chunk_size / 2)

    chunk_size_cst = Constant(chunk_size, precision=ML_Int32)
    cst_msb_node = Constant(cst_msb, precision=ML_Int32)

    # Saving sollya's global precision
    old_global_prec = sollya.settings.prec
    sollya.settings.prec(cst_exp_range + n)

    # table to store chunk of constant multiplicand
    cst_table = ML_NewTable(dimensions=[chunk_number, 1],
                            storage_precision=precision,
                            tag="PH_cst_table")
    # table to store sqrt(scaling_factor) corresponding to the
    # cst multiplicand chunks
    scale_table = ML_NewTable(dimensions=[chunk_number, 1],
                              storage_precision=precision,
                              tag="PH_scale_table")
    tmp_cst = frac_pi

    # cst_table stores normalized constant chunks (they have been
    # scale back to close to 1.0 interval)
    #
    # scale_table stores the scaling factors corresponding to the
    # denormalization of cst_table coefficients

    # this loop divide the digits of frac_pi into chunks
    # the chunk lsb weight is given by a shift from
    # cst_msb, multiple of the chunk index
    for i in range(chunk_number):
        value_div_factor = S2**(chunk_size * (i + 1) - cst_msb)
        local_cst = int(tmp_cst * value_div_factor) / value_div_factor
        local_scale = (scaling_factor**i)
        # storing scaled constant chunks
        cst_table[i][0] = local_cst / (local_scale**2)
        scale_table[i][0] = local_scale
        # Updating constant value
        tmp_cst = tmp_cst - local_cst

    # Computing which part of the constant we do not need to multiply
    # In the following comments, vi represents the bit of frac_pi of weight 2**-i

    # Bits vi so that i <= (vx_exp - p + 1 -k)  are not needed, because they result
    # in a multiple of 2pi and do not contribute to trig functions.

    vx_exp = ExponentExtraction(
        vx, precision=vx.get_precision().get_integer_format())
    vx_exp = Conversion(vx_exp, precision=ML_Int32)

    msb_exp = -(vx_exp - p + 1 - k)
    msb_exp.set_attributes(tag="msb_exp", debug=debug_multi)
    msb_exp = Conversion(msb_exp, precision=ML_Int32)

    # Select the highest index where the reduction should start
    msb_index = Select(cst_msb_node < msb_exp, 0,
                       (cst_msb_node - msb_exp) / chunk_size_cst)
    msb_index.set_attributes(tag="msb_index", debug=debug_multi)

    # For a desired accuracy of 2**-n, bits vi so that i >= (vx_exp + n + 4)  are not needed, because they contribute less than
    # 2**-n to the result

    lsb_exp = -(vx_exp + n + 4)
    lsb_exp.set_attributes(tag="lsb_exp", debug=debug_multi)
    lsb_exp = Conversion(lsb_exp, precision=ML_Int32)

    # Index of the corresponding chunk
    lsb_index = (cst_msb_node - lsb_exp) / chunk_size_cst
    lsb_index.set_attributes(tag="lsb_index", debug=debug_multi)

    # Splitting vx
    half_size = precision.get_field_size() / 2 + 1

    # hi part (most significant digit) of vx input
    vx_hi = TypeCast(BitLogicAnd(
        TypeCast(vx, precision=int_precision),
        Constant(~int(2**half_size - 1), precision=int_precision)),
                     precision=precision)
    vx_hi.set_attributes(tag="vx_hi_ph")  #, debug = debug_multi)

    vx_lo = vx - vx_hi
    vx_lo.set_attributes(tag="vx_lo_ph")  #, debug = debug_multi)

    # loop iterator variable
    vi = Variable("i", precision=ML_Int32, var_type=Variable.Local)
    # step scaling factor
    half_scaling = Constant(S2**(-chunk_size / 2), precision=precision)

    i1 = Constant(1, precision=ML_Int32)

    # accumulator to the output precision
    acc = Variable("acc", precision=precision, var_type=Variable.Local)
    # integer accumulator
    acc_int = Variable("acc_int",
                       precision=int_precision,
                       var_type=Variable.Local)

    init_loop = Statement(
        vx_hi,
        vx_lo,
        ReferenceAssign(vi, msb_index),
        ReferenceAssign(acc, Constant(0, precision=precision)),
        ReferenceAssign(acc_int, Constant(0, precision=int_precision)),
    )

    cst_load = TableLoad(cst_table,
                         vi,
                         0,
                         tag="cst_load",
                         debug=debug_precision)
    sca_load = TableLoad(scale_table,
                         vi,
                         0,
                         tag="sca_load",
                         debug=debug_precision)
    # loop body
    # hi_mult = vx_hi * <scale_factor> * <cst>
    hi_mult = (vx_hi * sca_load) * (cst_load * sca_load)
    hi_mult.set_attributes(tag="hi_mult", debug=debug_precision)
    pre_hi_mult_int = NearestInteger(hi_mult,
                                     precision=int_precision,
                                     tag="hi_mult_int",
                                     debug=(debuglld if debug else None))
    hi_mult_int_f = Conversion(pre_hi_mult_int,
                               precision=precision,
                               tag="hi_mult_int_f",
                               debug=debug_precision)
    pre_hi_mult_red = (hi_mult - hi_mult_int_f).modify_attributes(
        tag="hi_mult_red", debug=debug_precision)

    # for the first chunks (vx_hi * <constant chunk>) exceeds 2**k+1 and may be
    # discard (whereas it may lead to overflow during integer conversion
    pre_exclude_hi = ((cst_msb_node - (vi + i1) * chunk_size + i1) +
                      (vx_exp + Constant(-half_size + 1, precision=ML_Int32))
                      ).modify_attributes(tag="pre_exclude_hi",
                                          debug=(debugd if debug else None))
    pre_exclude_hi.propagate_precision(ML_Int32,
                                       [cst_msb_node, vi, vx_exp, i1])
    Ck = Constant(k, precision=ML_Int32)
    exclude_hi = pre_exclude_hi <= Ck
    exclude_hi.set_attributes(tag="exclude_hi", debug=debug_multi)

    hi_mult_red = Select(exclude_hi, pre_hi_mult_red,
                         Constant(0, precision=precision))
    hi_mult_int = Select(exclude_hi, pre_hi_mult_int,
                         Constant(0, precision=int_precision))

    # lo part of the chunk reduction
    lo_mult = (vx_lo * sca_load) * (cst_load * sca_load)
    lo_mult.set_attributes(tag="lo_mult")  #, debug = debug_multi)
    lo_mult_int = NearestInteger(lo_mult,
                                 precision=int_precision,
                                 tag="lo_mult_int")  #, debug = debug_multi
    lo_mult_int_f = Conversion(lo_mult_int,
                               precision=precision,
                               tag="lo_mult_int_f")  #, debug = debug_multi)
    lo_mult_red = (lo_mult - lo_mult_int_f).modify_attributes(
        tag="lo_mult_red")  #, debug = debug_multi)

    # accumulating fractional part
    acc_expr = (acc + hi_mult_red) + lo_mult_red
    # accumulating integer part
    int_expr = ((acc_int + hi_mult_int) + lo_mult_int) % 2**(k + 1)

    CF1 = Constant(1, precision=precision)
    CI1 = Constant(1, precision=int_precision)

    # extracting exceeding integer part in fractionnal accumulator
    acc_expr_int = NearestInteger(acc_expr, precision=int_precision)
    # normalizing integer and fractionnal accumulator by subtracting then
    # adding exceeding integer part
    normalization = Statement(
        ReferenceAssign(
            acc, acc_expr - Conversion(acc_expr_int, precision=precision)),
        ReferenceAssign(acc_int, int_expr + acc_expr_int),
    )

    acc_expr.set_attributes(tag="acc_expr")  #, debug = debug_multi)
    int_expr.set_attributes(tag="int_expr")  #, debug = debug_multi)

    red_loop = Loop(
        init_loop, vi <= lsb_index,
        Statement(acc_expr, int_expr, normalization,
                  ReferenceAssign(vi, vi + 1)))

    result = Statement(lsb_index, msb_index, red_loop)

    # restoring sollya's global precision
    sollya.settings.prec = old_global_prec

    return result, acc, acc_int

예제 #17

파일 보기

  def eval_argument_reduction(self, size1, prec1, size2, prec2):
    one = Constant(1, precision = ML_Exact, tag = "one")
    dx =     Variable("dx",
                      precision = ML_Custom_FixedPoint_Format(0, 52, False),
                      interval = Interval(0, 1 - S2**-52))

    # do the argument reduction
    x =       Addition(dx, one, tag = "x",
                       precision = ML_Exact)
    x1 =    Conversion(x, tag = "x1",
                       precision = ML_Custom_FixedPoint_Format(0, size1, False),
                       rounding_mode = ML_RoundTowardMinusInfty)
    s = Multiplication(Subtraction(x1, one, precision = ML_Exact),
                       Constant(S2**size1, precision = ML_Exact),
                       precision = ML_Exact,
                       tag = "indexTableX")
    inv_x1 =  Division(one, x1, tag = "ix1",
                       precision = ML_Exact)
    inv_x = Conversion(inv_x1,  tag = "ix",
                       precision = ML_Custom_FixedPoint_Format(1, prec1, False),
                       rounding_mode = ML_RoundTowardPlusInfty)
    y = Multiplication(x, inv_x, tag = "y",
                       precision = ML_Exact)
    dy =   Subtraction(y, one,  tag = "dy", 
                       precision = ML_Exact)
    y1 =    Conversion(y, tag = "y",
                       precision = ML_Custom_FixedPoint_Format(0,size2,False),
                       rounding_mode = ML_RoundTowardMinusInfty)
    t = Multiplication(Subtraction(y1, one, precision = ML_Exact),
                       Constant(S2**size2, precision = ML_Exact),
                       precision = ML_Exact,
                       tag = "indexTableY")
    inv_y1 =  Division(one, y1, tag = "iy1",
                       precision = ML_Exact)
    inv_y = Conversion(inv_y1, tag = "iy",
                       precision = ML_Custom_FixedPoint_Format(1,prec2,False),
                       rounding_mode = ML_RoundTowardPlusInfty)
    z = Multiplication(y, inv_y, tag = "z",
                       precision = ML_Exact)
    dz =   Subtraction(z, one, tag = "dz",
                       precision = ML_Exact)


    # add the necessary goals and hints
    dx_gappa = Variable("dx_gappa", interval = dx.get_interval(), precision = dx.get_precision())
    swap_map = {dx: dx_gappa}
    # goals (main goal: dz, the result of the argument reduction)
    gappa_code = self.gappa_engine.get_interval_code_no_copy(dz.copy(swap_map), bound_list = [dx_gappa])
    self.gappa_engine.add_goal(gappa_code, dy.copy(swap_map))
    self.gappa_engine.add_goal(gappa_code, s.copy(swap_map)) # range of index of table 1
    self.gappa_engine.add_goal(gappa_code, t.copy(swap_map)) # range of index of table 2
    # hints. are the ones with isAppox=True really necessary ?
    self.gappa_engine.add_hint(gappa_code, x.copy(swap_map), x1.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code, y.copy(swap_map), y1.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code, inv_x1.copy(swap_map), inv_x.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code, inv_y1.copy(swap_map), inv_y.copy(swap_map), isApprox = True)
    self.gappa_engine.add_hint(gappa_code,
                               Multiplication(x1, inv_x1, precision = ML_Exact).copy(swap_map), one,
                               Comparison(swap_map[inv_x1], Constant(0, precision = ML_Exact),
                                          specifier = Comparison.NotEqual, precision = ML_Bool))
    self.gappa_engine.add_hint(gappa_code,
                               Multiplication(y1, inv_y1, precision = ML_Exact).copy(swap_map), one,
                               Comparison(swap_map[inv_y1], Constant(0, precision = ML_Exact),
                                          specifier = Comparison.NotEqual, precision = ML_Bool))
    toto = Variable("toto", precision = ML_Binary64)
    self.gappa_engine.add_hypothesis(gappa_code, toto, Interval(0, S2**-52))
    
    # execute and parse the result
    result = execute_gappa_script_extract(gappa_code.get(self.gappa_engine))
    self.gappa_engine.clear_memoization_map() # avoid memory leak
    #print result['indexTableX'], result['indexTableY']
    length_table1 = 1 + floor(sup(result['indexTableX'])).getConstantAsInt()
    length_table2 = 1 + floor(sup(result['indexTableY'])).getConstantAsInt()
    if False and (length_table2 != 1 + floor(sup(result['dy']) * S2**size2).getConstantAsInt()):
      print "(dy*2**size2:", 1 + floor(sup(result['dy']*S2**size2)).getConstantAsInt(), ")"
      print "(indexTableY:", 1 + floor(sup(result['indexTableY'])).getConstantAsInt(), ")"
      print result['indexTableY'], result['dy']
      sys.exit(1)
    return {
      # arguments
      'size1': size1, 'prec1': prec1, 'size2': size2, 'prec2': prec2,
      # size of the tables
      'length_table1': length_table1,
      'length_table2': length_table2,
      'sizeof_table1': length_table1 * (16 + ML_Custom_FixedPoint_Format(1,prec1,False).get_c_bit_size()/8),
      'sizeof_table2': length_table2 * (16 + ML_Custom_FixedPoint_Format(1,prec2,False).get_c_bit_size()/8),
      # intervals
      'in_interval': dx.get_interval(),
      'mid_interval': result['dy'],
      'out_interval': result['goal'],
    }

예제 #18

파일 보기

파일: ml2_exp.py 프로젝트: IanBriggs/OpTuner

        def generate_reduction_fptaylor(x):

            # get k, must be the same at endpoints
            unround_k = x * n_invlog2
            k_low = sollya.floor(sollya.inf(unround_k))
            k_high = sollya.floor(sollya.sup(unround_k))
            if not (k_low == k_high or (k_low == -1 and sollya.sup(x) == 0)):
                assert False, "Interval must not straddle multples of log(2)"
            k = int(k_low)
            r = x - k * n_log2

            twok = 2**k

            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  whole rnd64= {} * {};".format(k, n_log2),
                "  r rnd64= x - whole;", "  poly rnd64= {};".format(poly_expr),
                "  retval rnd64= poly*{};".format(twok), "Expressions",
                "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), r, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), r, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err

예제 #19

파일 보기

파일: ml2_exp.py 프로젝트: IanBriggs/OpTuner

        def split_domain(starting_domain, slivers):
            in_domains = [starting_domain]

            # abs
            out_domains = list()
            for I in in_domains:
                if sollya.inf(I) < 0 and sollya.sup(I) > 0:
                    out_domains.append(sollya.Interval(sollya.inf(I), 0))
                    out_domains.append(sollya.Interval(0, sollya.sup(I)))
                else:
                    out_domains.append(I)
            in_domains = out_domains

            # k
            out_domains = list()
            while len(in_domains) > 0:
                I = in_domains.pop()
                unround_mult = I * n_invlog2
                mult_low = sollya.floor(sollya.inf(unround_mult))
                mult_high = sollya.floor(sollya.sup(unround_mult))
                #print("in: [{}, {}] ({}, {})".format(float(sollya.inf(I)), float(sollya.sup(I)), int(mult_low), int(mult_high)))
                if mult_low == mult_high or (mult_low == -1
                                             and mult_high == 0):
                    #print("  accepted")
                    out_domains.append(I)
                    continue

                k_range = sollya.Interval(mult_low, mult_low + 1.5)
                I_range = k_range * n_log2
                for _ in range(100):
                    mid = sollya.mid(I_range)
                    k = sollya.floor(mid * n_invlog2)
                    if k == mult_low:
                        I_range = sollya.Interval(mid, sollya.sup(I_range))
                    else:
                        I_range = sollya.Interval(sollya.inf(I_range), mid)

                divider_high = sollya.sup(I_range)
                divider_low = sollya.inf(I_range)

                lower_part = sollya.Interval(sollya.inf(I), divider_low)
                upper_part = sollya.Interval(divider_high, sollya.sup(I))
                #print("  -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part))))
                #print("  -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part))))
                in_domains.append(upper_part)
                in_domains.append(lower_part)
            in_domains = out_domains

            # subdivide each section into 2**subd sections
            for _ in range(slivers):
                out_domains = list()
                for I in in_domains:
                    mid = sollya.mid(I)
                    out_domains.append(sollya.Interval(sollya.inf(I), mid))
                    out_domains.append(sollya.Interval(mid, sollya.sup(I)))
                in_domains = out_domains

            in_domains = set(in_domains)
            in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x)))
            in_domains = [
                d for d in in_domains if sollya.inf(d) != sollya.sup(d)
            ]
            return in_domains

예제 #20

파일 보기

    def generate_scheme(self):
        """ main scheme generation """

        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        expected_interval = {}

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        x_interval = Interval(-10.3, 10.7)
        var_x.set_interval(x_interval)
        expected_interval[var_x] = x_interval

        var_y = self.implementation.add_input_signal("y", input_precision)
        y_interval = Interval(-17.9, 17.2)
        var_y.set_interval(y_interval)
        expected_interval[var_y] = y_interval

        var_z = self.implementation.add_input_signal("z", input_precision)
        z_interval = Interval(-7.3, 7.7)
        var_z.set_interval(z_interval)
        expected_interval[var_z] = z_interval

        cst = Constant(42.5, tag="cst")
        expected_interval[cst] = Interval(42.5)

        conv_ceil = Ceil(var_x, tag="ceil")
        expected_interval[conv_ceil] = sollya.ceil(x_interval)

        conv_floor = Floor(var_y, tag="floor")
        expected_interval[conv_floor] = sollya.floor(y_interval)

        mult = var_z * var_x
        mult.set_tag("mult")
        mult_interval = z_interval * x_interval
        expected_interval[mult] = mult_interval

        large_add = (var_x + var_y) - mult
        large_add.set_attributes(tag="large_add")
        large_add_interval = (x_interval + y_interval) - mult_interval
        expected_interval[large_add] = large_add_interval

        reduced_result = Max(0, Min(large_add, 13))
        reduced_result.set_tag("reduced_result")
        reduced_result_interval = interval_max(
            Interval(0), interval_min(large_add_interval, Interval(13)))
        expected_interval[reduced_result] = reduced_result_interval

        select_result = Select(var_x > var_y,
                               reduced_result,
                               var_z,
                               tag="select_result")
        select_interval = interval_union(reduced_result_interval, z_interval)
        expected_interval[select_result] = select_interval

        # checking interval evaluation
        for var in [
                cst, var_x, var_y, mult, large_add, reduced_result,
                select_result, conv_ceil, conv_floor
        ]:
            interval = evaluate_range(var)
            expected = expected_interval[var]
            print("{}: {} vs expected {}".format(var.get_tag(), interval,
                                                 expected))
            assert not interval is None
            assert interval == expected

        return [self.implementation]

예제 #21

파일 보기

    def generate_scheme(self):

        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))
        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = self.precision
        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        vy = self.implementation.add_input_signal("y", io_precision)

        base_precision = self.precision.get_base_format()
        p = base_precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_precision     = ML_StdLogicVectorFormat(base_precision.get_exponent_size())

        mant_precision    = ML_StdLogicVectorFormat(base_precision.get_mantissa_size())

        mant_vx = MantissaExtraction(vx, precision = mant_precision)
        mant_vy = MantissaExtraction(vy, precision = mant_precision)

        exp_vx = RawExponentExtraction(vx, precision = exp_precision)
        exp_vy = RawExponentExtraction(vy, precision = exp_precision)

        sign_vx = CopySign(vx, precision = ML_StdLogic)
        sign_vy = CopySign(vy, precision = ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        effective_op = BitLogicXor(sign_vx, sign_vy, precision = ML_StdLogic, tag = "effective_op", debug=debug_std)

        ## Wrapper for zero extension
        # @param op the input operation tree
        # @param s integer size of the extension
        # @return the Zero extended operation node
        def zext(op,s):
          op_size = op.get_precision().get_bit_size()
          ext_precision  = ML_StdLogicVectorFormat(op_size + s)
          return ZeroExt(op, s, precision = ext_precision)
        ## Generate the right zero extended output from @p optree
        def rzext(optree, ext_size):
          op_size = optree.get_precision().get_bit_size()
          ext_format = ML_StdLogicVectorFormat(ext_size)
          out_format = ML_StdLogicVectorFormat(op_size + ext_size)
          return Concatenation(optree, Constant(0, precision = ext_format), precision = out_format)

        exp_bias = p + 2
        exp_precision_ext = fixed_point(base_precision.get_exponent_size() + 2, 0)
        exp_precision = fixed_point(base_precision.get_exponent_size(), 0, signed=False)
        # Y is first aligned p+2 bit to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + precision + 2
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = Subtraction(
            Addition(
                TypeCast(exp_vx, precision=exp_precision),
                Constant(exp_bias, precision=exp_precision_ext),
            ),
            TypeCast(exp_vy, precision=exp_precision),
        )
        exp_diff_lt_0 = Comparison(exp_diff, Constant(0, precision=exp_precision_ext), specifier = Comparison.Less, precision = ML_Bool)
        exp_diff_gt_2pp4 = Comparison(exp_diff, Constant(2*p+4, precision = exp_precision_ext), specifier = Comparison.Greater, precision = ML_Bool)

        shift_amount_size = int(floor(log2(2*p+4))+1)
        shift_amount_prec = ML_StdLogicVectorFormat(shift_amount_size)

        mant_shift = Select(
          exp_diff_lt_0,
          0,
          Select(
            exp_diff_gt_2pp4,
            Constant(2*p+4),
            exp_diff,
          ),
          tag = "mant_shift",
          debug = debug_dec
        )

        mant_shift = TypeCast(
            Conversion(mant_shift, precision=fixed_point(shift_amount_size, 0, signed=False)),
            precision=shift_amount_prec
        )

        mant_ext_size = 2*p+4
        shift_prec = ML_StdLogicVectorFormat(3*p+4)
        shifted_mant_vy = BitLogicRightShift(rzext(mant_vy, mant_ext_size), mant_shift, precision = shift_prec, tag = "shifted_mant_vy", debug = debug_std)
        mant_vx_ext = zext(rzext(mant_vx, p+2), p+2+1)
        mant_vx_ext.set_attributes(tag="mant_vx_ext")

        add_prec = ML_StdLogicVectorFormat(3*p+5)

        mant_vx_add_op = Select(
          Comparison(
            effective_op,
            Constant(1, precision = ML_StdLogic),
            precision = ML_Bool,
            specifier = Comparison.Equal
          ),
          Negation(mant_vx_ext, precision = add_prec, tag = "neg_mant_vx"),
          mant_vx_ext,
          precision = add_prec,
          tag = "mant_vx_add_op",
          debug=debug_cst_dec
        )


        mant_add = UnsignedAddition(
                     zext(shifted_mant_vy, 1),
                     mant_vx_add_op,
                     precision = add_prec,
                     tag = "mant_add",
                     debug=debug_std
                  )

        # if the addition overflows, then it meant vx has been negated and
        # the 2's complement addition cancelled the negative MSB, thus
        # the addition result is positive, and the result is of the sign of Y
        # else the result is of opposite sign to Y
        add_is_negative = BitLogicAnd(
            CopySign(mant_add, precision = ML_StdLogic),
            effective_op,
            precision = ML_StdLogic,
            tag = "add_is_negative",
            debug = debug_std
          )
        # Negate mantissa addition result if it is negative
        mant_add_abs = Select(
          Comparison(
            add_is_negative,
            Constant(1, precision = ML_StdLogic),
            specifier = Comparison.Equal,
            precision = ML_Bool
          ),
          Negation(mant_add, precision = add_prec, tag = "neg_mant_add"),
          mant_add,
          precision = add_prec,
          tag = "mant_add_abs"
        )

        res_sign = BitLogicXor(add_is_negative, sign_vy, precision = ML_StdLogic, tag = "res_sign")

        # Precision for leading zero count
        lzc_width = int(floor(log2(3*p+5)) + 1)
        lzc_prec = ML_StdLogicVectorFormat(lzc_width)


        add_lzc = CountLeadingZeros(
            mant_add_abs,
            precision=lzc_prec,
            tag="add_lzc",
            debug=debug_dec_unsigned
        )

        #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec)
        # CP stands for close path, the data path where X and Y are within 1 exp diff
        res_normed_mant = BitLogicLeftShift(mant_add, add_lzc, precision = add_prec, tag = "res_normed_mant", debug = debug_std)
        pre_mant_field = SubSignalSelection(res_normed_mant, 2*p+5, 3*p+3, precision = ML_StdLogicVectorFormat(p-1))

        ## Helper function to extract a single bit
        #  from a vector of bits signal
        def BitExtraction(optree, index, **kw):
            return VectorElementSelection(optree, index, precision = ML_StdLogic, **kw)
        def IntCst(value):
            return Constant(value, precision = ML_Integer)

        round_bit = BitExtraction(res_normed_mant, IntCst(2*p+4))
        mant_lsb  = BitExtraction(res_normed_mant, IntCst(2*p+5))
        sticky_prec = ML_StdLogicVectorFormat(2*p+4)
        sticky_input = SubSignalSelection(
          res_normed_mant, 0, 2*p+3,
          precision =  sticky_prec
        )
        sticky_bit = Select(
          Comparison(
            sticky_input,
            Constant(0, precision = sticky_prec),
            specifier = Comparison.NotEqual,
            precision = ML_Bool
          ),
          Constant(1, precision = ML_StdLogic),
          Constant(0, precision = ML_StdLogic),
          precision = ML_StdLogic,
          tag = "sticky_bit",
          debug = debug_std
        )

        # increment selection for rouding to nearest (tie to even)
        round_increment_RN = BitLogicAnd(
          round_bit,
          BitLogicOr(
            sticky_bit,
            mant_lsb,
            precision = ML_StdLogic
          ),
          precision = ML_StdLogic,
          tag = "round_increment_RN",
          debug = debug_std
        )

        rounded_mant = UnsignedAddition(
          zext(pre_mant_field, 1),
          round_increment_RN,
          precision = ML_StdLogicVectorFormat(p),
          tag = "rounded_mant",
          debug = debug_std
        )
        rounded_overflow = BitExtraction(
          rounded_mant,
          IntCst(p-1),
          tag = "rounded_overflow",
          debug = debug_std
        )
        res_mant_field = Select(
          Comparison(
            rounded_overflow,
            Constant(1, precision = ML_StdLogic),
            specifier = Comparison.Equal,
            precision = ML_Bool
          ),
          SubSignalSelection(rounded_mant, 1, p-1),
          SubSignalSelection(rounded_mant, 0, p-2),
          precision = ML_StdLogicVectorFormat(p-1),
          tag = "final_mant",
          debug = debug_std
        )

        res_exp_prec_size = base_precision.get_exponent_size() + 2
        res_exp_prec = ML_StdLogicVectorFormat(res_exp_prec_size)

        res_exp_ext = UnsignedAddition(
          UnsignedSubtraction(
            UnsignedAddition(
              zext(exp_vx, 2),
              Constant(3+p, precision = res_exp_prec),
              precision = res_exp_prec
            ),
            zext(add_lzc, res_exp_prec_size - lzc_width),
            precision = res_exp_prec
          ),
          rounded_overflow,
          precision = res_exp_prec,
          tag = "res_exp_ext",
          debug = debug_std
        )

        res_exp = Truncate(res_exp_ext, precision = ML_StdLogicVectorFormat(base_precision.get_exponent_size()), tag = "res_exp", debug = debug_dec)

        vr_out = TypeCast(
          FloatBuild(
            res_sign,
            res_exp,
            res_mant_field,
            precision = base_precision,
          ),
          precision = io_precision,
          tag = "result",
          debug = debug_std
        )

        self.implementation.add_output_signal("vr_out", vr_out)

        return [self.implementation]

예제 #22

파일 보기

파일: ml_vectorizable_log.py 프로젝트: nibrunie/metalibm

  def generate_scheme(self):
    """Produce an abstract scheme for the logarithm.

    This abstract scheme will be used by the code generation backend.
    """
    if self.precision not in [ML_Binary32, ML_Binary64]:
        Log.report(Log.Error, "The demanded precision is not supported")

    vx = self.implementation.add_input_variable("x", self.precision)


    def default_bool_convert(optree, precision=None, **kw):
        return bool_convert(optree, precision, -1, 0, **kw) \
                if isinstance(self.processor, VectorBackend) \
                else bool_convert(optree, precision, 1, 0, **kw)

    precision = self.precision.sollya_object
    int_prec = self.precision.get_integer_format()
    Log.report(Log.Info, "int_prec is %s" % int_prec)
    uint_prec = self.precision.get_unsigned_integer_format()


    Log.report(Log.Info, "MDL constants")
    cgpe_scheme_idx = int(self.cgpe_index)
    table_index_size = int(self.tbl_index_size)
    #
    table_nb_elements = 2**(table_index_size)
    table_dimensions = [2*table_nb_elements]  # two values are stored for each element
    field_size = Constant(self.precision.get_field_size(),
                          precision = int_prec,
                          tag = 'field_size')
    if self.log_radix == EXP_1:
      log2_hi = Constant(
        round(log(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log(2) - round(log(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    elif self.log_radix == 10:
      log2_hi = Constant(
        round(log10(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log10(2) - round(log10(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    # ... if log_radix == '2' then log2(2) == 1

    # subnormal_mask aims at trapping positive subnormals except zero.
    # That's why we will subtract 1 to the integer bitstring of the input, and
    # then compare for Less (strict) the resulting integer bitstring to this
    # mask, e.g.  0x7fffff for binary32.
    if self.no_subnormal == False:
      subnormal_mask = Constant((1 << self.precision.get_field_size()) - 1,
                                precision = int_prec, tag = 'subnormal_mask')
    fp_one = Constant(1.0, precision = self.precision, tag = 'fp_one')
    fp_one_as_uint = TypeCast(fp_one, precision = uint_prec,
                              tag = 'fp_one_as_uint')
    int_zero = Constant(0, precision = int_prec, tag = 'int_zero')
    int_one  = Constant(1, precision = int_prec, tag = 'int_one')
    table_mantissa_half_ulp = Constant(
            1 << (self.precision.field_size - table_index_size - 1),
            precision = int_prec
            )
    table_s_exp_index_mask = Constant(
            ~((table_mantissa_half_ulp.get_value() << 1) - 1),
            precision = uint_prec
            )

    Log.report(Log.Info, "MDL table")
    # The table holds approximations of -log(2^tau * r_i) so we first compute
    # the index value for which tau changes from 1 to 0.
    cut = sqrt(2.)
    tau_index_limit = floor(table_nb_elements * (2./cut - 1))
    sollya_logtbl = [
      (-log1p(float(i) / table_nb_elements)
      + (0 if i <= tau_index_limit else log(2.))) / log(self.log_radix)
      for i in range(table_nb_elements)
    ]
    # ...
    init_logtbl_hi = [
            round(sollya_logtbl[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl_lo = [
            round(sollya_logtbl[i] - init_logtbl_hi[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl = [tmp[i] for i in range(len(init_logtbl_hi)) for tmp in [init_logtbl_hi, init_logtbl_lo]]
    log1p_table = ML_NewTable(dimensions = table_dimensions,
                              storage_precision = self.precision,
                              init_data = init_logtbl,
                              tag = 'ml_log1p_table')
    # ...
    if self.no_rcp:
      sollya_rcptbl = [
        (1/((1+float(i)/table_nb_elements)+2**(-1-int(self.tbl_index_size))))
        for i in range(table_nb_elements)
      ]
      init_rcptbl = [
            round(sollya_rcptbl[i],
                  int(self.tbl_index_size)+1, # self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
      ]
      rcp_table = ML_NewTable(dimensions = [table_nb_elements],
                              storage_precision = self.precision,
                              init_data = init_rcptbl,
                              tag = 'ml_rcp_table')
    # ...

    Log.report(Log.Info, 'MDL unified subnormal handling')
    vx_as_int = TypeCast(vx, precision = int_prec, tag = 'vx_as_int')
    if self.no_subnormal == False:
      vx_as_uint = TypeCast(vx, precision = uint_prec, tag = 'vx_as_uint')
      # Avoid the 0.0 case by subtracting 1 from vx_as_int
      tmp = Comparison(vx_as_int - 1, subnormal_mask,
                       specifier = Comparison.Less)
      is_subnormal = default_bool_convert(
        tmp, # Will catch negative values as well as NaNs with sign bit set
        precision = int_prec)
      is_subnormal.set_attributes(tag = "is_subnormal")
      if not(isinstance(self.processor, VectorBackend)):
        is_subnormal = Subtraction(Constant(0, precision = int_prec),
                                   is_subnormal,
                                   precision = int_prec)

      #################################################
      # Vectorizable integer based subnormal handling #
      #################################################
      # 1. lzcnt
      # custom lzcount-like for subnormal numbers using FPU (see draft article)
      Zi = BitLogicOr(vx_as_uint, fp_one_as_uint, precision = uint_prec, tag="Zi")
      Zf = Subtraction(
        TypeCast(Zi, precision = self.precision),
        fp_one,
        precision = self.precision,
        tag="Zf")
      # Zf exponent is -(nlz(x) - exponent_size).
      # 2. compute shift value
      # Vectorial comparison on x86+sse/avx is going to look like
      # '|0x00|0xff|0x00|0x00|' and that's why we use Negate.
      # But for scalar code generation, comparison will rather be either 0 or 1
      # in C. Thus mask below won't be correct for a scalar implementation.
      # FIXME: Can we know the backend that will be called and choose in
      # consequence? Should we make something arch-agnostic instead?
      #
      n_value = BitLogicAnd(
        Addition(
          DirtyExponentExtraction(Zf, self.precision),
          Constant(
            self.precision.get_bias(),
            precision = int_prec),
          precision = int_prec),
        is_subnormal,
        precision = int_prec,
        tag = "n_value")
      alpha = Negation(n_value, tag="alpha")
      #
      # 3. shift left
      # renormalized_mantissa = BitLogicLeftShift(vx_as_int, value)
      normal_vx_as_int = BitLogicLeftShift(vx_as_int, alpha)
      # 4. set exponent to the right value
      # Compute the exponent to add : (p-1)-(value) + 1 = p-1-value
      # The final "+ 1" comes from the fact that once renormalized, the
      # floating-point datum has a biased exponent of 1
      #tmp0 = Subtraction(
      #        field_size,
      #        value,
      #        precision = int_prec,
      #        tag="tmp0")
      # Set the value to 0 if the number is not subnormal
      #tmp1 = BitLogicAnd(tmp0, is_subnormal)
      #renormalized_exponent = BitLogicLeftShift(
      #        tmp1,
      #        field_size
      #        )
    else: # no_subnormal == True
      normal_vx_as_int = vx_as_int
      
    #normal_vx_as_int = renormalized_mantissa + renormalized_exponent
    normal_vx = TypeCast(normal_vx_as_int, precision = self.precision,
                         tag = 'normal_vx')

    # alpha = BitLogicAnd(field_size, is_subnormal, tag = 'alpha')
    # XXX Extract the mantissa, see if this is supported in the x86 vector
    # backend or if it still uses the support_lib.
    vx_mantissa = MantissaExtraction(normal_vx, precision = self.precision)

    Log.report(Log.Info, "MDL scheme")
    if self.force_division == True:
      rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    elif self.no_rcp == False:
      rcp_m = ReciprocalSeed(vx_mantissa, precision = self.precision)
      if not self.processor.is_supported_operation(rcp_m):
        if self.precision == ML_Binary64:
          # Try using a binary32 FastReciprocal
          binary32_m = Conversion(vx_mantissa, precision = ML_Binary32)
          rcp_m = ReciprocalSeed(binary32_m, precision = ML_Binary32)
          rcp_m = Conversion(rcp_m, precision = ML_Binary64)
        if not self.processor.is_supported_operation(rcp_m):
          # FIXME An approximation table could be used instead but for vector
          # implementations another GATHER would be required.
          # However this may well be better than a division...
          rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    else: # ... use a look-up table
      rcp_shift = BitLogicLeftShift(normal_vx_as_int, self.precision.get_exponent_size() + 1)
      rcp_idx = BitLogicRightShift(rcp_shift, self.precision.get_exponent_size() + 1 + self.precision.get_field_size() - int(self.tbl_index_size))
      rcp_m = TableLoad(rcp_table, rcp_idx, tag = 'rcp_idx',
                        debug = debug_multi)
    #  
    rcp_m.set_attributes(tag = 'rcp_m')

    # exponent is normally either 0 or -1, since m is in [1, 2). Possible
    # optimization?
    # exponent = ExponentExtraction(rcp_m, precision = self.precision,
    #         tag = 'exponent')

    ri_round = TypeCast(
            Addition(
                TypeCast(rcp_m, precision = int_prec),
                table_mantissa_half_ulp,
                precision = int_prec
                ),
            precision = uint_prec
            )
    ri_fast_rndn = BitLogicAnd(
            ri_round,
            table_s_exp_index_mask,
            tag = 'ri_fast_rndn',
            precision = uint_prec
            )
    # u = m * ri - 1
    ul = None
    if self.no_rcp == True: # ... u does not fit on a single word
      tmp_u, tmp_ul = Mul211(vx_mantissa,         
                             TypeCast(ri_fast_rndn, precision = self.precision), 
                             fma = (self.no_fma == False))
      fp_minus_one = Constant(-1.0, precision = self.precision, tag = 'fp_minus_one')
      u, ul = Add212(fp_minus_one, tmp_u, tmp_ul)      
      u.set_attributes(tag='uh')
      ul.set_attributes(tag='ul')
    elif self.no_fma == False:
      u = FusedMultiplyAdd(
        vx_mantissa,
        TypeCast(ri_fast_rndn, precision = self.precision),
        fp_one,
        specifier = FusedMultiplyAdd.Subtract,
        tag = 'u')
    else: # disable FMA
      # tmph + tmpl = m * ri, where tmph ~ 1
      tmph, tmpl = Mul211(vx_mantissa,         
                          TypeCast(ri_fast_rndn, precision = self.precision), 
                          fma = False)
      # u_tmp = tmph - 1 ... exact due to Sterbenz
      u_tmp = Subtraction(tmph, fp_one, precision = self.precision)
      # u = u_tmp - tmpl ... exact since the result u is representable as a single word
      u = Addition(u_tmp, tmpl, precision = self.precision, tag = 'u')
    
    unneeded_bits = Constant(
            self.precision.field_size - table_index_size,
            precision=uint_prec,
            tag="unneeded_bits"
            )
    assert self.precision.field_size - table_index_size >= 0
    ri_bits = BitLogicRightShift(
            ri_fast_rndn,
            unneeded_bits,
            precision = uint_prec,
            tag = "ri_bits"
            )
    # Retrieve mantissa's MSBs + first bit of exponent, for tau computation in case
    # exponent is 0 (i.e. biased 127, i.e. first bit of exponent is set.).
    # In this particular case, i = 0 but tau is 1
    # table_index does not need to be as long as uint_prec might be,
    # try and keep it the size of size_t.
    size_t_prec = ML_UInt32
    signed_size_t_prec = ML_Int32
    table_index_mask = Constant(
            (1 << (table_index_size + 1)) - 1,
            precision = size_t_prec
            )
    table_index = BitLogicAnd(
            Conversion(ri_bits, precision = size_t_prec),
            table_index_mask,
            tag = 'table_index',
            precision = size_t_prec
            )
    # Compute tau using the tau_index_limit value.
    tmp = default_bool_convert(
            Comparison(
                TypeCast(table_index, precision = signed_size_t_prec),
                Constant(tau_index_limit, precision = signed_size_t_prec),
                specifier = Comparison.Greater
                if isinstance(self.processor, VectorBackend)
                else Comparison.LessOrEqual
                ),
            precision = signed_size_t_prec,
            tag="tmp"
            )
    # A true tmp will typically be -1 for VectorBackends, but 1 for standard C.
    tau = Conversion(
        Addition(tmp, Constant(1, precision=signed_size_t_prec), precision = signed_size_t_prec, tag="pre_add")
            if isinstance(self.processor, VectorBackend)
            else tmp,
            precision=int_prec,
            tag="pre_tau"
        )
    tau.set_attributes(tag = 'tau')
    # Update table_index: keep only table_index_size bits
    table_index_hi = BitLogicAnd(
            table_index,
            Constant((1 << table_index_size) - 1, precision = size_t_prec),
            precision = size_t_prec
            )
    # table_index_hi = table_index_hi << 1
    table_index_hi = BitLogicLeftShift(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_hi"
            )
    # table_index_lo = table_index_hi + 1
    table_index_lo = Addition(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_lo"
            )

    tbl_hi = TableLoad(log1p_table, table_index_hi, tag = 'tbl_hi',
                       debug = debug_multi)
    tbl_lo = TableLoad(log1p_table, table_index_lo, tag = 'tbl_lo',
                       debug = debug_multi)
    # Compute exponent e + tau - alpha, but first subtract the bias.
    if self.no_subnormal == False:
      tmp_eptau = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
      exponent = Subtraction(tmp_eptau, alpha, precision = int_prec)
    else:
      exponent = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
    #
    fp_exponent = Conversion(exponent, precision = self.precision,
                             tag = 'fp_exponent')

    Log.report(Log.Info, 'MDL polynomial approximation')
    if self.log_radix == EXP_1:
      sollya_function = log(1 + sollya.x)
    elif self.log_radix == 2:
      sollya_function = log2(1 + sollya.x)
    elif self.log_radix == 10:
      sollya_function = log10(1 + sollya.x)
    # ...
    if self.force_division == True: # rcp accuracy is 2^(-p)
      boundrcp = 2**(-self.precision.get_precision())
    else:
      boundrcp = 1.5 * 2**(-12)           # ... see Intel intrinsics guide
      if self.precision in [ML_Binary64]:
        if not self.processor.is_supported_operation(rcp_m):
          boundrcp = (1+boundrcp)*(1+2**(-24)) - 1
        else:
          boundrcp = 2**(-14)             # ... see Intel intrinsics guide
    arg_red_mag = boundrcp + 2**(-table_index_size-1) + boundrcp * 2**(-table_index_size-1)
    if self.no_rcp == False:
      approx_interval = Interval(-arg_red_mag, arg_red_mag)
    else:
      approx_interval = Interval(-2**(-int(self.tbl_index_size)+1),2**(-int(self.tbl_index_size)+1))
    max_eps = 2**-(2*(self.precision.get_field_size()))
    Log.report(Log.Info, "max acceptable error for polynomial = {}".format(float.hex(max_eps)))
    poly_degree = sup(
            guessdegree(
                sollya_function,
                approx_interval,
                max_eps,
                )
            )
    Log.report(Log.Info, "poly degree is ", poly_degree)
    if self.log_radix == EXP_1:
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(2, int(poly_degree) + 1), # Force 1st 2 coeffs to 0 and 1, resp.
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree - 1),
        approx_interval,
        sollya.absolute,
        0 + sollya._x_) # Force the first 2 coefficients to 0 and 1, resp.
    else: # ... == '2' or '10'
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(1, int(poly_degree) + 1), # Force 1st coeff to 0
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree),
        approx_interval,
        sollya.absolute,
        0) # Force the first coefficients to 0

    Log.report(Log.Info, str(poly_object))

    constant_precision = ML_SingleSingle if self.precision == ML_Binary32 \
            else ML_DoubleDouble if self.precision == ML_Binary64 \
            else None
    if is_cgpe_available():
        log1pu_poly = PolynomialSchemeEvaluator.generate_cgpe_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision, scheme_id = cgpe_scheme_idx
                )
    else:
        Log.report(Log.Warning,
                "CGPE not available, falling back to std poly evaluator")
        log1pu_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision
                )

    # XXX Dirty implementation of double-(self.precision) poly
    def dirty_poly_node_conversion(node, variable_h, variable_l, use_fma):
        return dirty_multi_node_expand(
          node, self.precision, mem_map={variable_h: (variable_h, variable_l)}, fma=use_fma)
    log1pu_poly_hi, log1pu_poly_lo = dirty_poly_node_conversion(log1pu_poly, u, ul,
                                                                use_fma=(self.no_fma == False))

    log1pu_poly_hi.set_attributes(tag = 'log1pu_poly_hi')
    log1pu_poly_lo.set_attributes(tag = 'log1pu_poly_lo')

    # Compute log(2) * (e + tau - alpha)
    if self.log_radix != 2: # 'e' or '10'
      log2e_hi, log2e_lo = Mul212(fp_exponent, log2_hi, log2_lo, 
                                  fma = (self.no_fma == False))
   
    # Add log1p(u)
    if self.log_radix != 2: # 'e' or '10'
      tmp_res_hi, tmp_res_lo = Add222(log2e_hi, log2e_lo,
                                      log1pu_poly_hi, log1pu_poly_lo)
    else:
      tmp_res_hi, tmp_res_lo = Add212(fp_exponent,
                                      log1pu_poly_hi, log1pu_poly_lo)

    # Add -log(2^(tau)/m) approximation retrieved by two table lookups
    logx_hi = Add122(tmp_res_hi, tmp_res_lo, tbl_hi, tbl_lo)[0]
    logx_hi.set_attributes(tag = 'logx_hi')

    scheme = Return(logx_hi, precision = self.precision)

    return scheme

예제 #23

파일 보기

파일: range_eval.py 프로젝트: metalibm/metalibm

    def generate_scheme(self):
        """ main scheme generation """

        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        expected_interval = {}

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        x_interval = Interval(-10.3,10.7)
        var_x.set_interval(x_interval)
        expected_interval[var_x] = x_interval

        var_y = self.implementation.add_input_signal("y", input_precision)
        y_interval = Interval(-17.9,17.2)
        var_y.set_interval(y_interval)
        expected_interval[var_y] = y_interval

        var_z = self.implementation.add_input_signal("z", input_precision)
        z_interval = Interval(-7.3,7.7)
        var_z.set_interval(z_interval)
        expected_interval[var_z] = z_interval

        cst = Constant(42.5, tag = "cst")
        expected_interval[cst] = Interval(42.5)

        conv_ceil = Ceil(var_x, tag = "ceil")
        expected_interval[conv_ceil] = sollya.ceil(x_interval)

        conv_floor = Floor(var_y, tag = "floor")
        expected_interval[conv_floor] = sollya.floor(y_interval)

        mult = var_z * var_x
        mult.set_tag("mult")
        mult_interval = z_interval * x_interval
        expected_interval[mult] = mult_interval

        large_add = (var_x + var_y) - mult
        large_add.set_attributes(tag = "large_add")
        large_add_interval = (x_interval + y_interval) - mult_interval
        expected_interval[large_add] = large_add_interval

        var_x_lzc = CountLeadingZeros(var_x, tag="var_x_lzc")
        expected_interval[var_x_lzc] = Interval(0, input_precision.get_bit_size())

        reduced_result = Max(0, Min(large_add, 13))
        reduced_result.set_tag("reduced_result")
        reduced_result_interval = interval_max(
            Interval(0),
            interval_min(
                large_add_interval,
                Interval(13)
            )
        )
        expected_interval[reduced_result] = reduced_result_interval

        select_result = Select(
            var_x > var_y,
            reduced_result,
            var_z,
            tag = "select_result"
        )
        select_interval = interval_union(reduced_result_interval, z_interval)
        expected_interval[select_result] = select_interval

        # floating-point operation on mantissa and exponents
        fp_x_range = Interval(-0.01, 100)

        unbound_fp_var = Variable("fp_x", precision=ML_Binary32, interval=fp_x_range)
        mant_fp_x = MantissaExtraction(unbound_fp_var, tag="mant_fp_x", precision=ML_Binary32)
        exp_fp_x = ExponentExtraction(unbound_fp_var, tag="exp_fp_x", precision=ML_Int32)
        ins_exp_fp_x = ExponentInsertion(exp_fp_x, tag="ins_exp_fp_x", precision=ML_Binary32)

        expected_interval[unbound_fp_var] = fp_x_range
        expected_interval[exp_fp_x] = Interval(
            sollya.floor(sollya.log2(sollya.inf(abs(fp_x_range)))),
            sollya.floor(sollya.log2(sollya.sup(abs(fp_x_range))))
        )
        expected_interval[mant_fp_x] = Interval(1, 2)
        expected_interval[ins_exp_fp_x] = Interval(
            S2**sollya.inf(expected_interval[exp_fp_x]),
            S2**sollya.sup(expected_interval[exp_fp_x])
        )


        # checking interval evaluation
        for var in [var_x_lzc, exp_fp_x, unbound_fp_var, mant_fp_x, ins_exp_fp_x, cst, var_x, var_y, mult, large_add, reduced_result, select_result, conv_ceil, conv_floor]:
            interval = evaluate_range(var)
            expected = expected_interval[var]
            print("{}: {}".format(var.get_tag(), interval))
            print("  vs expected {}".format(expected))
            assert not interval is None
            assert interval == expected


        return [self.implementation]

예제 #24

파일 보기

  def generate_argument_reduction(self, memory_limit):
    best_arg_reduc = None

    best_arg_reduc = self.eval_argument_reduction(6,10,12,13)
    best_arg_reduc['sizeof_tables'] = best_arg_reduc['sizeof_table1'] + best_arg_reduc['sizeof_table2']
    best_arg_reduc['degree_poly1'] = 4
    best_arg_reduc['degree_poly2'] = 8
    return best_arg_reduc
    # iterate through all possible parameters, and return the best argument reduction
    # the order of importance of the caracteristics of a good argument reduction is:
    #   1- the argument reduction is valid
    #   2- the degree of the polynomials obtains are minimals
    #   3- the memory used is minimal
    # An arument reduction is valid iff:
    #   - the memory used is less than memory_limit
    #   - y-1 and z-1  fit into a uint64_t
    #   - the second argument reduction should usefull (ie: it should add at least 1 bit to the argument reduction)
    # From thoses validity constraint we deduce some bound on the parameters to reduce the space of value searched:
    # (note that thoses bound are implied by, but not equivalents to the constraints)
    #   size1 <= log2(memory_limit/17)                                       (memory_limit on the first table)
    #   prec1 < 13 + size1                                                   (y-1 fits into a uint64_t)
    #   size2 <= log2((memory_limit - sizeof_table1)/17/midinterval)          (memory_limit on both tables)
    #   size2 >= 1 - log2(midinterval)                                       (second arg red should be usefull)
    #   prec2 < 12 - prec1 - log2((y-y1)/y1),  for all possible y            (z-1 fits into a uint64_t)
    # note: it is hard to deduce a tight bound on prec2 from the last inequality
    # a good approximation is  size2 ~= max[for y]( - log2((y-y1)/y1)), but using it may eliminate valid arg reduc

    #self.eval_argument_reduction(12, 20, 22, 14)

    min_size1 = 1
    max_size1 = floor(log(memory_limit/17)/log(2)).getConstantAsInt()
    for size1 in xrange(max_size1, min_size1-1, -1):
      
      min_prec1 = size1
      max_prec1 = 12 + size1
      for prec1 in xrange(min_prec1,max_prec1+1):
        
        # we need sizeof_table1 and mid_interval for the bound on size2 and prec2
        first_arg_reduc = self.eval_argument_reduction(size1, prec1, prec1, prec1)
        mid_interval = first_arg_reduc['mid_interval']
        sizeof_table1 = first_arg_reduc['sizeof_table1']

        if not(0 <= inf(mid_interval) and sup(mid_interval) < S2**(64 - 52 - prec1)):
          continue
        if not(first_arg_reduc['sizeof_table1'] < memory_limit):
          continue
        
        min_size2 = 1 - ceil(log(sup(mid_interval))/log(2)).getConstantAsInt()
        max_size2 = floor(log((memory_limit - sizeof_table1)/(17 * sup(mid_interval)))/log(2)).getConstantAsInt()
        # during execution of the prec2 loop, it can reduces the interval of valid values for prec2
        # so min_prec2 and max_prec2 are setted here and not before the the prec2 loop
        # (because they are modified inside the body of the loop, for the next iteration of size2)
        min_prec2 = 0
        max_prec2 = 12 + max_size2 - prec1
        for size2 in xrange(max_size2,min_size2-1,-1):
          
          max_prec2 = min(max_prec2, 12 + size2 - prec1)
          for prec2 in xrange(max_prec2,min_prec2-1,-1):
            
            #print '=====\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{})\t====='.format(size1,min_size1,max_size1,prec1,min_prec1,max_prec1,size2,min_size2,max_size2,prec2,min_prec2,max_prec2)
            #print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss #memory used by the programm

            arg_reduc = self.eval_argument_reduction(size1, prec1, size2, prec2)
            mid_interval = arg_reduc['mid_interval']
            out_interval = arg_reduc['out_interval']
            sizeof_tables = arg_reduc['sizeof_table1'] + arg_reduc['sizeof_table2']
            if not(0 <= inf(out_interval) and sup(out_interval) < S2**(64-52-prec1-prec2)):
              max_prec2 = prec2 - 1
              continue
            if memory_limit < sizeof_tables:
              continue
            #assert(prec2 < 12 + size2 - prec1) # test the approximation size2 ~= max[for y]( - log2((y-y1)/y1))

            # guess the degree of the two polynomials (relative error <= 2^-52 and absolute error <= 2^-120)
            # note: we exclude zero from out_interval to not perturb sollya (log(1+x)/x is not well defined on 0)
            sollya_out_interval = Interval(S2**(-52-prec1-prec2), sup(out_interval))
            guess_degree_poly1 = guessdegree(log(1+sollya.x)/sollya.x, sollya_out_interval, S2**-52)
            guess_degree_poly2 = guessdegree(log(1+sollya.x), sollya_out_interval, S2**-120)
            # TODO: detect when guessdegree return multiple possible degree, and find the right one
            if False and inf(guess_degree_poly1) <> sup(guess_degree_poly1):
              print "improvable guess_degree_poly1:", guess_degree_poly1
            if False and inf(guess_degree_poly2) <> sup(guess_degree_poly2):
              print "improvable guess_degree_poly2:", guess_degree_poly2
            degree_poly1 = sup(guess_degree_poly1).getConstantAsInt() + 1
            degree_poly2 = sup(guess_degree_poly2).getConstantAsInt()
            
            if ((best_arg_reduc is not None)
            and (best_arg_reduc['degree_poly1'] < degree_poly1 or best_arg_reduc['degree_poly2'] < degree_poly2)):
              min_prec2 = prec2 + 1
              break

            if ((best_arg_reduc is None)
             or (best_arg_reduc['degree_poly1'] > degree_poly1)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] > degree_poly2)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] == degree_poly2 and best_arg_reduc['sizeof_tables'] > sizeof_tables)):
              arg_reduc['degree_poly1'] = degree_poly1
              arg_reduc['degree_poly2'] = degree_poly2
              arg_reduc['sizeof_tables'] = sizeof_tables
              best_arg_reduc = arg_reduc
              #print "\n   --new best--  \n", arg_reduc, "\n"
    #print "\nBest arg reduc: \n", best_arg_reduc, "\n"
    return best_arg_reduc

예제 #25

파일 보기

    def generate_scalar_scheme(self, vx):
        abs_vx = Abs(vx, precision=self.precision)

        FCT_LIMIT = 1.0

        one_limit = search_bound_threshold(sollya.erf, FCT_LIMIT, 1.0, 10.0,
                                           self.precision)
        one_limit_exp = int(sollya.floor(sollya.log2(one_limit)))
        Log.report(Log.Debug, "erf(x) = 1.0 limit is {}, with exp={}",
                   one_limit, one_limit_exp)

        upper_approx_bound = 10

        # empiral numbers
        eps_exp = {ML_Binary32: -3, ML_Binary64: -5}[self.precision]
        eps = S2**eps_exp

        Log.report(Log.Info, "building mathematical polynomial")
        approx_interval = Interval(0, eps)
        # fonction to approximate is erf(x) / x
        # it is an even function erf(x) / x = erf(-x) / (-x)
        approx_fct = sollya.erf(sollya.x) - (sollya.x)
        poly_degree = int(
            sup(
                guessdegree(approx_fct, approx_interval, S2**
                            -(self.precision.get_field_size() + 5)))) + 1

        poly_degree_list = list(range(1, poly_degree, 2))
        Log.report(Log.Debug, "poly_degree is {} and list {}", poly_degree,
                   poly_degree_list)
        global_poly_object = Polynomial.build_from_approximation(
            approx_fct, poly_degree_list,
            [self.precision] * len(poly_degree_list), approx_interval,
            sollya.relative)
        Log.report(
            Log.Debug, "inform is {}",
            dirtyinfnorm(approx_fct - global_poly_object.get_sollya_object(),
                         approx_interval))
        poly_object = global_poly_object.sub_poly(start_index=1, offset=1)

        ext_precision = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble,
        }[self.precision]

        pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, abs_vx, unified_precision=self.precision)

        result = FMA(pre_poly, abs_vx, abs_vx)
        result.set_attributes(tag="result", debug=debug_multi)

        eps_target = S2**-(self.precision.get_field_size() + 5)

        def offset_div_function(fct):
            return lambda offset: fct(sollya.x + offset)

        # empiral numbers
        field_size = {ML_Binary32: 6, ML_Binary64: 8}[self.precision]

        near_indexing = SubFPIndexing(eps_exp, 0, 6, self.precision)
        near_approx = generic_poly_split(offset_div_function(sollya.erf),
                                         near_indexing, eps_target,
                                         self.precision, abs_vx)
        near_approx.set_attributes(tag="near_approx", debug=debug_multi)

        def offset_function(fct):
            return lambda offset: fct(sollya.x + offset)

        medium_indexing = SubFPIndexing(1, one_limit_exp, 7, self.precision)

        medium_approx = generic_poly_split(offset_function(sollya.erf),
                                           medium_indexing, eps_target,
                                           self.precision, abs_vx)
        medium_approx.set_attributes(tag="medium_approx", debug=debug_multi)

        # approximation for positive values
        scheme = ConditionBlock(
            abs_vx < eps, Return(result),
            ConditionBlock(
                abs_vx < near_indexing.get_max_bound(), Return(near_approx),
                ConditionBlock(abs_vx < medium_indexing.get_max_bound(),
                               Return(medium_approx),
                               Return(Constant(1.0,
                                               precision=self.precision)))))
        return scheme

예제 #26

파일 보기

  def generate_scheme(self):
    memory_limit = 2500

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = input_var
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)

    ### Constants computations ###

    v_log2_hi = nearestint(log(2) * 2**-52) * 2**52
    v_log2_lo = round(log(2) - v_log2_hi, 64+53, sollya.RN)
    log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi")
    log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo")
   
    print "\n\033[1mSearch parameters for the argument reduction:\033[0m (this can take a while)"
    arg_reduc = self.generate_argument_reduction(memory_limit)

    print "\n\033[1mArgument reduction found:\033[0m [({},{}),({},{})] -> polynomials of degree {},{}, using {} bytes of memory".format(arg_reduc['size1'],arg_reduc['prec1'],arg_reduc['size2'],arg_reduc['prec2'],arg_reduc['degree_poly1'],arg_reduc['degree_poly2'],arg_reduc['sizeof_tables']) 
    
    print "\n\033[1mGenerate the first logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table1'], arg_reduc['sizeof_table1'])
    inv_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']],
                           storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False),
                           tag = self.uniquify_name("inv_table_1"))
    log_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']],
                           storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False),
                           tag = self.uniquify_name("log_table_1"))
    for i in xrange(0, arg_reduc['length_table1']-1):
      x1 = 1 + i/S2*arg_reduc['size1']
      inv_x1 = ceil(S2**arg_reduc['prec1']/x1)*S2**arg_reduc['prec1']
      log_x1 = floor(log(x1) * S2**(128-11))*S2**(11-128)
      inv_table_1[i] = inv_x1 #Constant(inv_x1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False))
      log_table_1[i] = log_x1 #Constant(log_x1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))

    print "\n\033[1mGenerate the second logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table2'], arg_reduc['sizeof_table2'])
    inv_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']],
                           storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False),
                           tag = self.uniquify_name("inv_table_2"))
    log_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']],
                           storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False),
                           tag = self.uniquify_name("log_table_2"))
    for i in xrange(0, arg_reduc['length_table2']-1):
      y1 = 1 + i/S2**arg_reduc['size2']
      inv_y1 = ceil(S2**arg_reduc['prec2']/x1) * S2**arg_reduc['prec2']
      log_y1 = floor(log(inv_y1) * S2**(128-11))*S2**(11-128)
      inv_table_2[i] = inv_y1 #Constant(inv_y1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False))
      log_table_2[i] = log_y1 #Constant(log_y1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))
    
    ### Evaluation Scheme ###
    
    print "\n\033[1mGenerate the evaluation scheme:\033[0m"
    input_var = self.implementation.add_input_variable("input_var", self.precision) 
    ve = ExponentExtraction(input_var, tag = "x_exponent", debug = debugd)
    vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = ML_Custom_FixedPoint_Format(0,52,False), debug = debug_lftolx)
    #vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = self.precision, debug = debug_lftolx)

    print "filtering and handling special cases"
    test_is_special_cases = LogicalNot(Test(input_var, specifier = Test.IsIEEENormalPositive, likely = True, debug = debugd, tag = "is_special_cases"))
    handling_special_cases = Statement(
      ConditionBlock(
        Test(input_var, specifier = Test.IsSignalingNaN, debug = True),
        ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision))
      ),
      ConditionBlock(
        Test(input_var, specifier = Test.IsNaN, debug = True),
        Return(input_var)
      )#,
      # TODO: add tests for x == 0 (raise DivideByZero, return -Inf), x < 0 (raise InvalidOperation, return qNaN)
      # all that remains is x is a subnormal positive
      #Statement(
      #  ReferenceAssign(Dereference(ve), Subtraction(ve, Subtraction(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(12, precision = ve.get_precision())))),
      #  ReferenceAssign(Dereference(vx), BitLogicLeftShift(vx, Addition(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(1, precision = ve.get_precision()))))
      #)
    )
    
    print "doing the argument reduction"
    v_dx = vx
    v_x1 = Conversion(v_dx, tag = 'x1',
                      precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size1'],False),
                      rounding_mode = ML_RoundTowardMinusInfty)
    v_index_x = TypeCast(v_x1, tag = 'index_x',
                        precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_x1.get_precision().get_c_bit_size(), 0, False))
    v_inv_x = TableLoad(inv_table_1, v_index_x, tag = 'inv_x')
    v_x = Addition(v_dx, 1, tag = 'x',
                   precision = ML_Custom_FixedPoint_Format(1,52,False))
    v_dy = Multiplication(v_x, v_inv_x, tag = 'dy',
                          precision = ML_Custom_FixedPoint_Format(0,52+arg_reduc['prec1'],False))
    v_y1 = Conversion(v_dy, tag = 'y1',
                      precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size2'],False),
                      rounding_mode = ML_RoundTowardMinusInfty)
    v_index_y = TypeCast(v_y1, tag = 'index_y',
                        precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_y1.get_precision().get_c_bit_size(), 0, False))
    v_inv_y = TableLoad(inv_table_2, v_index_y, tag = 'inv_y')
    v_y = Addition(v_dy, 1, tag = 'y',
                   precision = ML_Custom_FixedPoint_Format(1,52+arg_reduc['prec2'],False))
    # note that we limit the number of bits used to represent dz to 64.
    # we proved during the arg reduction that we can do that (sup(out_interval) < 2^(64-52-prec1-prec2))
    v_dz = Multiplication(v_y, v_inv_y, tag = 'z',
                          precision = ML_Custom_FixedPoint_Format(64-52-arg_reduc['prec1']-arg_reduc['prec2'],52+arg_reduc['prec1']+arg_reduc['prec2'],False))
    # reduce the number of bits used to represent dz. we can do that
    
    print "doing the first polynomial evaluation"
    global_poly1_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, arg_reduc['degree_poly1']-1, [64] * (arg_reduc['degree_poly1']), arg_reduc['out_interval'], fixed, sollya.absolute)
    poly1_object = global_poly1_object.sub_poly(start_index = 1)
    print global_poly1_object
    print poly1_object
    poly1 = PolynomialSchemeEvaluator.generate_horner_scheme(poly1_object, v_dz, unified_precision = v_dz.get_precision())
    return ConditionBlock(test_is_special_cases, handling_special_cases, Return(poly1))

    #approx_interval = Interval(0, 27021597764222975*S2**-61)
    
    #poly_degree = 1+sup(guessdegree(log(1+x)/x, approx_interval, S2**-(self.precision.get_field_size())))
    #global_poly_object = Polynomial.build_from_approximation(log(1+x)/x, poly_degree, [1] + [self.precision]*(poly_degree), approx_interval, sollya.absolute)
    #poly_object = global_poly_object.sub_poly(start_index = 1)
    #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
    #_poly.set_attributes(tag = "poly", debug = debug_lftolx)

    """

예제 #27

파일 보기

파일: ml2_wide_sin.py 프로젝트: IanBriggs/OpTuner

        def generate_reduction_fptaylor(x):
            # get sign and abs_x, must be the same at endpoints
            if sollya.sup(x) <= 0:
                sign_x_expr = "-1.0"
                abs_x_expr = "-x"
                abs_x = -x
            elif sollya.inf(x) >= 0:
                sign_x_expr = "1.0"
                abs_x_expr = "x"
                abs_x = x
            else:
                assert False, "Interval must not straddle 0"

            # get k, must be the same at endpoints
            unround_k = abs_x * n_invpi
            k_low = sollya.floor(sollya.inf(unround_k))
            k_high = sollya.floor(sollya.sup(unround_k))
            if k_low != k_high:
                assert False, "Interval must not straddle multples of pi"
            k = int(k_low)
            part = k % 2

            r_expr = "abs_x - whole"
            r = abs_x - k * n_pi

            z_expr = "r"
            z = r

            if part == 1:
                flipped_poly_expr = "-poly"
            else:
                flipped_poly_expr = "poly"

            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  abs_x rnd64= {};".format(abs_x_expr),
                "  whole rnd64= {} * {};".format(k, n_pi),
                "  r rnd64= abs_x - whole;", "  z rnd64= {};".format(z_expr),
                "  poly rnd64= {};".format(poly_expr),
                "  flipped_poly rnd64= {};".format(flipped_poly_expr),
                "  retval rnd64= flipped_poly*{};".format(sign_x_expr),
                "Expressions", "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), z, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), z, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err

예제 #28

파일 보기

파일: implementpoly.py 프로젝트: metalibm/metalibm

        approx_interval = Interval(-S2**-5, S2**-5)
        ctx = MLL_Context(ML_Binary64, approx_interval)
        vx = Variable("x",
                      precision=ctx.variableFormat,
                      interval=approx_interval)
        # guessding the best degree
        poly_degree = int(
            sup(
                sollya.guessdegree(sollya.exp(sollya.x), approx_interval,
                                   eps_target)))
        # asking sollya to provide the approximation
        poly_object = Polynomial.build_from_approximation(
            sollya.exp(sollya.x), poly_degree,
            [sollya.doubledouble] * (poly_degree + 1), vx.interval)
        print("poly object is {}".format(poly_object))
        poly_graph, poly_epsilon = mll_implementpoly_horner(
            ctx, poly_object, eps_target, vx)
        print("poly_graph is {}".format(
            poly_graph.get_str(depth=None, display_precision=True)))
        print("poly epsilon is {}".format(float(poly_epsilon)))
        print("poly accuracy is {}".format(
            get_accuracy_from_epsilon(poly_epsilon)))
        implem_results.append(
            (eps_target, poly_degree, poly_object, poly_graph, poly_epsilon))

    for result in implem_results:
        eps_target, poly_degree, poly_object, poly_graph, poly_epsilon = result
        epsilon_log2 = int(sollya.floor(sollya.log2(poly_epsilon)))
        print("epsilon for eps_target={} (degree={}) is {}".format(
            eps_target, poly_degree, epsilon_log2))

예제 #29

파일 보기

    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme

예제 #30

파일 보기

    def generate_scheme(self):
        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = self.precision

        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        # rounding mode input
        rnd_mode = self.implementation.add_input_signal(
            "rnd_mode", rnd_mode_format)

        # size of most significant table index (for linear slope tabulation)
        alpha = self.alpha  # 6
        # size of medium significant table index (for initial value table index LSB)
        beta = self.beta  # 5
        # size of least significant table index (for linear offset tabulation)
        gamma = self.gamma  # 5

        guard_bits = self.guard_bits  # 3

        vx.set_interval(self.interval)

        range_hi = sollya.sup(self.interval)
        range_lo = sollya.inf(self.interval)
        f_hi = self.function(range_hi)
        f_lo = self.function(range_lo)
        # fixed by format used for reduced_x
        range_size = range_hi - range_lo
        range_size_log2 = int(sollya.log2(range_size))
        assert 2**range_size_log2 == range_size

        print("range_size_log2={}".format(range_size_log2))

        reduced_x = Conversion(BitLogicRightShift(vx - range_lo,
                                                  range_size_log2),
                               precision=fixed_point(0,
                                                     alpha + beta + gamma,
                                                     signed=False),
                               tag="reduced_x",
                               debug=debug_fixed)

        alpha_index = get_fixed_slice(reduced_x,
                                      0,
                                      alpha - 1,
                                      align_hi=FixedPointPosition.FromMSBToLSB,
                                      align_lo=FixedPointPosition.FromMSBToLSB,
                                      tag="alpha_index",
                                      debug=debug_std)
        gamma_index = get_fixed_slice(reduced_x,
                                      gamma - 1,
                                      0,
                                      align_hi=FixedPointPosition.FromLSBToLSB,
                                      align_lo=FixedPointPosition.FromLSBToLSB,
                                      tag="gamma_index",
                                      debug=debug_std)

        beta_index = get_fixed_slice(reduced_x,
                                     alpha,
                                     gamma,
                                     align_hi=FixedPointPosition.FromMSBToLSB,
                                     align_lo=FixedPointPosition.FromLSBToLSB,
                                     tag="beta_index",
                                     debug=debug_std)

        # Assuming monotonic function
        f_absmax = max(abs(f_hi), abs(f_lo))
        f_absmin = min(abs(f_hi), abs(f_lo))

        f_msb = int(sollya.ceil(sollya.log2(f_absmax))) + 1
        f_lsb = int(sollya.floor(sollya.log2(f_absmin)))
        storage_lsb = f_lsb - io_precision.get_bit_size() - guard_bits

        f_int_size = f_msb
        f_frac_size = -storage_lsb

        storage_format = fixed_point(f_int_size, f_frac_size, signed=False)
        Log.report(Log.Info, "storage_format is {}".format(storage_format))

        # table of initial value index
        tiv_index = Concatenation(alpha_index,
                                  beta_index,
                                  tag="tiv_index",
                                  debug=debug_std)
        # table of offset value index
        to_index = Concatenation(alpha_index,
                                 gamma_index,
                                 tag="to_index",
                                 debug=debug_std)

        tiv_index_size = alpha + beta
        to_index_size = alpha + gamma

        Log.report(Log.Info, "initial table structures")
        table_iv = ML_NewTable(dimensions=[2**tiv_index_size],
                               storage_precision=storage_format,
                               tag="tiv")
        table_offset = ML_NewTable(dimensions=[2**to_index_size],
                                   storage_precision=storage_format,
                                   tag="to")

        slope_table = [None] * (2**alpha)
        slope_delta = 1.0 / sollya.SollyaObject(2**alpha)
        delta_u = range_size * slope_delta * 2**-15
        Log.report(Log.Info, "computing slope value")
        for i in range(2**alpha):
            # slope is computed at the middle of range_size interval
            slope_x = range_lo + (i + 0.5) * range_size * slope_delta
            # TODO: gross approximation of derivatives
            f_xpu = self.function(slope_x + delta_u / 2)
            f_xmu = self.function(slope_x - delta_u / 2)
            slope = (f_xpu - f_xmu) / delta_u
            slope_table[i] = slope

        range_rcp_steps = 1.0 / sollya.SollyaObject(2**tiv_index_size)
        Log.report(Log.Info, "computing value for initial-value table")
        for i in range(2**tiv_index_size):
            slope_index = i / 2**beta
            iv_x = range_lo + i * range_rcp_steps * range_size
            offset_x = 0.5 * range_rcp_steps * range_size
            # initial value is computed so that the piecewise linear
            # approximation intersects the function at iv_x + offset_x
            iv_y = self.function(
                iv_x + offset_x) - offset_x * slope_table[int(slope_index)]
            initial_value = storage_format.round_sollya_object(iv_y)
            table_iv[i] = initial_value

        # determining table of initial value interval
        tiv_min = table_iv[0]
        tiv_max = table_iv[0]
        for i in range(1, 2**tiv_index_size):
            tiv_min = min(tiv_min, table_iv[i])
            tiv_max = max(tiv_max, table_iv[i])
        table_iv.set_interval(Interval(tiv_min, tiv_max))

        offset_step = range_size / S2**(alpha + beta + gamma)
        for i in range(2**alpha):
            Log.report(Log.Info,
                       "computing offset value for sub-table {}".format(i))
            for j in range(2**gamma):
                to_i = i * 2**gamma + j
                offset = slope_table[i] * j * offset_step
                table_offset[to_i] = offset

        # determining table of offset interval
        to_min = table_offset[0]
        to_max = table_offset[0]
        for i in range(1, 2**(alpha + gamma)):
            to_min = min(to_min, table_offset[i])
            to_max = max(to_max, table_offset[i])
        offset_interval = Interval(to_min, to_max)
        table_offset.set_interval(offset_interval)

        initial_value = TableLoad(table_iv,
                                  tiv_index,
                                  precision=storage_format,
                                  tag="initial_value",
                                  debug=debug_fixed)

        offset_precision = get_fixed_type_from_interval(offset_interval, 16)
        print("offset_precision is {} ({} bits)".format(
            offset_precision, offset_precision.get_bit_size()))
        table_offset.get_precision().storage_precision = offset_precision

        # rounding table value
        for i in range(1, 2**(alpha + gamma)):
            table_offset[i] = offset_precision.round_sollya_object(
                table_offset[i])

        offset_value = TableLoad(table_offset,
                                 to_index,
                                 precision=offset_precision,
                                 tag="offset_value",
                                 debug=debug_fixed)

        Log.report(
            Log.Verbose,
            "initial_value's interval: {}, offset_value's interval: {}".format(
                evaluate_range(initial_value), evaluate_range(offset_value)))

        final_add = initial_value + offset_value
        round_bit = final_add  # + FixedPointPosition(final_add, io_precision.get_bit_size(), align=FixedPointPosition.FromMSBToLSB)

        vr_out = Conversion(initial_value + offset_value,
                            precision=io_precision,
                            tag="vr_out",
                            debug=debug_fixed)

        self.implementation.add_output_signal("vr_out", vr_out)

        # Approximation error evaluation
        approx_error = 0.0
        for i in range(2**alpha):
            for j in range(2**beta):
                tiv_i = (i * 2**beta + j)
                # = range_lo + tiv_i * range_rcp_steps * range_size
                iv = table_iv[tiv_i]
                for k in range(2**gamma):
                    to_i = i * 2**gamma + k
                    offset = table_offset[to_i]
                    approx_value = offset + iv
                    table_x = range_lo + range_size * (
                        (i * 2**beta + j) * 2**gamma + k) / S2**(alpha + beta +
                                                                 gamma)
                    local_error = abs(1 / (table_x) - approx_value)
                    approx_error = max(approx_error, local_error)
        error_log2 = float(sollya.log2(approx_error))
        print("approx_error is {}, error_log2 is {}".format(
            float(approx_error), error_log2))

        # table size
        table_iv_size = 2**(alpha + beta)
        table_offset_size = 2**(alpha + gamma)
        print("tables' size are {} entries".format(table_iv_size +
                                                   table_offset_size))

        return [self.implementation]