Python ceil Examples

Programming Language: Python

Namespace/Package Name: sollya

Method/Function: ceil

Examples at hotexamples.com: 22

Python ceil - 22 examples found. These are the top rated real world Python examples of sollya.ceil extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: random_gen.py Project: metalibm/metalibm

 def __init__(self, inf_bound, sup_bound):
     self.inf_bound = inf_bound
     self.sup_bound = sup_bound
     self.zero_in_interval = 0 in sollya.Interval(
         inf_bound, sup_bound)
     self.min_exp = None if self.zero_in_interval else min(
         sollya.ceil(sollya.log2(abs(inf_bound))),
         sollya.ceil(sollya.log2(abs(sup_bound))))
     self.max_exp = max(sollya.ceil(sollya.log2(abs(inf_bound))),
                        sollya.ceil(sollya.log2(abs(sup_bound))))

Example #2

Show file

def solve_format_Constant(optree):
    """ Legalize Constant node """
    assert isinstance(optree, Constant)
    value = optree.get_value()
    if FP_SpecialValue.is_special_value(value):
        return optree.get_precision()
    elif not optree.get_precision() is None:
        # if precision is already set (manually forced), returns it
        return optree.get_precision()
    else:
        # fixed-point format solving
        frac_size = -1
        FRAC_THRESHOLD = 100 # maximum number of frac bit to be tested
        # TODO: fix
        for i in range(FRAC_THRESHOLD):
            if int(value*2**i) == value * 2**i:
                frac_size = i
                break
        if frac_size < 0:
            Log.report(Log.Error, "value {} is not an integer, from node:\n{}", value, optree)
        abs_value = abs(value)
        signed = value < 0
        # int_size = max(int(sollya.ceil(sollya.log2(abs_value+2**frac_size))), 0) + (1 if signed else 0)
        int_size = max(int(sollya.ceil(sollya.log2(abs_value + 1))), 0) + (1 if signed else 0)
        if frac_size == 0 and int_size == 0:
            int_size = 1
        return fixed_point(int_size, frac_size, signed=signed)

Example #3

Show file

 def round_sollya_object(self, value, round_mode=sollya.RN):
     rnd_function = {
         sollya.RN: sollya.nearestint,
         sollya.RD: sollya.floor,
         sollya.RU: sollya.ceil,
         sollya.RZ: lambda x: sollya.floor(x) if x > 0 \
                    else sollya.ceil(x)
     }[round_mode]
     scale_factor = S2**self.get_frac_size()
     return rnd_function(scale_factor * value) / scale_factor

Example #4

Show file

 def __init__(self, low_exp_value, max_exp_value, field_bits, precision):
     self.field_bits = field_bits
     self.low_exp_value = low_exp_value
     self.max_exp_value = max_exp_value
     exp_bits = int(
         sollya.ceil(sollya.log2(max_exp_value - low_exp_value + 1)))
     assert exp_bits >= 0 and field_bits >= 0 and (exp_bits +
                                                   field_bits) > 0
     self.exp_bits = exp_bits
     self.split_num = (self.max_exp_value - self.low_exp_value +
                       1) * 2**(self.field_bits)
     Log.report(Log.Debug, "split_num={}", self.split_num)
     self.precision = precision

Example #5

Show file

def solve_format_Constant(optree):
    """ Legalize Constant node """
    assert isinstance(optree, Constant)
    value = optree.get_value()
    if FP_SpecialValue.is_special_value(value):
        return optree.get_precision()
    elif not optree.get_precision() is None:
        # if precision is already set (manually forced), returns it
        return optree.get_precision()
    else:
        # fixed-point format solving
        assert int(value) == value
        abs_value = abs(value)
        signed = value < 0
        int_size = max(int(sollya.ceil(sollya.log2(abs_value + 1))),
                       0) + (1 if signed else 0)
        frac_size = 0
        if frac_size == 0 and int_size == 0:
            int_size = 1
        return fixed_point(int_size, frac_size, signed=signed)

Example #6

Show file

File: implementpoly.py Project: metalibm/metalibm

 def computeNeededVariableFormat(self, I, epsTarget, variableFormat):
     if epsTarget > 0:
         # TODO: fix to support ML_Binary32
         if epsTarget >= self.MIN_LIMB_ERROR or variableFormat.mp_node.precision is self.limb_format:
             # FIXME: default to minimal precision (self.limb_format)
             return variableFormat
         else:
             target_accuracy = sollya.ceil(-sollya.log2(epsTarget))
             target_format = self.get_format_from_accuracy(
                 target_accuracy,
                 eps_target=epsTarget,
                 interval=variableFormat.mp_node.interval)
             if target_format.mp_node.precision.get_bit_size(
             ) < variableFormat.mp_node.precision.get_bit_size():
                 return target_format
             else:
                 # if variableFormat is smaller (less bits) and more accurate
                 # then we use it
                 return variableFormat
     else:
         return variableFormat

Example #7

Show file

File: p_size_datapath.py Project: metalibm/metalibm

def determine_minimal_fixed_format_cst(value):
    """ determine the minimal size format which can encode
        exactly the constant value value """
    # fixed-point format solving
    frac_size = -1
    FRAC_THRESHOLD = 100  # maximum number of frac bit to be tested
    # TODO: fix
    for i in range(FRAC_THRESHOLD):
        if int(value * 2**i) == value * 2**i:
            frac_size = i
            break
    if frac_size < 0:
        Log.report(Log.Error, "value {} is not an integer, from node:\n{}",
                   value, optree)
    abs_value = abs(value)
    signed = value < 0
    # int_size = max(int(sollya.ceil(sollya.log2(abs_value+2**frac_size))), 0) + (1 if signed else 0)
    int_size = max(int(sollya.ceil(sollya.log2(abs_value + 1))),
                   0) + (1 if signed else 0)
    if frac_size == 0 and int_size == 0:
        int_size = 1
    return fixed_point(int_size, frac_size, signed=signed)

Example #8

Show file

    def generate_scheme(self):
        """ main scheme generation """

        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        expected_interval = {}

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        x_interval = Interval(-10.3, 10.7)
        var_x.set_interval(x_interval)
        expected_interval[var_x] = x_interval

        var_y = self.implementation.add_input_signal("y", input_precision)
        y_interval = Interval(-17.9, 17.2)
        var_y.set_interval(y_interval)
        expected_interval[var_y] = y_interval

        var_z = self.implementation.add_input_signal("z", input_precision)
        z_interval = Interval(-7.3, 7.7)
        var_z.set_interval(z_interval)
        expected_interval[var_z] = z_interval

        cst = Constant(42.5, tag="cst")
        expected_interval[cst] = Interval(42.5)

        conv_ceil = Ceil(var_x, tag="ceil")
        expected_interval[conv_ceil] = sollya.ceil(x_interval)

        conv_floor = Floor(var_y, tag="floor")
        expected_interval[conv_floor] = sollya.floor(y_interval)

        mult = var_z * var_x
        mult.set_tag("mult")
        mult_interval = z_interval * x_interval
        expected_interval[mult] = mult_interval

        large_add = (var_x + var_y) - mult
        large_add.set_attributes(tag="large_add")
        large_add_interval = (x_interval + y_interval) - mult_interval
        expected_interval[large_add] = large_add_interval

        reduced_result = Max(0, Min(large_add, 13))
        reduced_result.set_tag("reduced_result")
        reduced_result_interval = interval_max(
            Interval(0), interval_min(large_add_interval, Interval(13)))
        expected_interval[reduced_result] = reduced_result_interval

        select_result = Select(var_x > var_y,
                               reduced_result,
                               var_z,
                               tag="select_result")
        select_interval = interval_union(reduced_result_interval, z_interval)
        expected_interval[select_result] = select_interval

        # checking interval evaluation
        for var in [
                cst, var_x, var_y, mult, large_add, reduced_result,
                select_result, conv_ceil, conv_floor
        ]:
            interval = evaluate_range(var)
            expected = expected_interval[var]
            print("{}: {} vs expected {}".format(var.get_tag(), interval,
                                                 expected))
            assert not interval is None
            assert interval == expected

        return [self.implementation]

Example #9

Show file

  def generate_argument_reduction(self, memory_limit):
    best_arg_reduc = None

    best_arg_reduc = self.eval_argument_reduction(6,10,12,13)
    best_arg_reduc['sizeof_tables'] = best_arg_reduc['sizeof_table1'] + best_arg_reduc['sizeof_table2']
    best_arg_reduc['degree_poly1'] = 4
    best_arg_reduc['degree_poly2'] = 8
    return best_arg_reduc
    # iterate through all possible parameters, and return the best argument reduction
    # the order of importance of the caracteristics of a good argument reduction is:
    #   1- the argument reduction is valid
    #   2- the degree of the polynomials obtains are minimals
    #   3- the memory used is minimal
    # An arument reduction is valid iff:
    #   - the memory used is less than memory_limit
    #   - y-1 and z-1  fit into a uint64_t
    #   - the second argument reduction should usefull (ie: it should add at least 1 bit to the argument reduction)
    # From thoses validity constraint we deduce some bound on the parameters to reduce the space of value searched:
    # (note that thoses bound are implied by, but not equivalents to the constraints)
    #   size1 <= log2(memory_limit/17)                                       (memory_limit on the first table)
    #   prec1 < 13 + size1                                                   (y-1 fits into a uint64_t)
    #   size2 <= log2((memory_limit - sizeof_table1)/17/midinterval)          (memory_limit on both tables)
    #   size2 >= 1 - log2(midinterval)                                       (second arg red should be usefull)
    #   prec2 < 12 - prec1 - log2((y-y1)/y1),  for all possible y            (z-1 fits into a uint64_t)
    # note: it is hard to deduce a tight bound on prec2 from the last inequality
    # a good approximation is  size2 ~= max[for y]( - log2((y-y1)/y1)), but using it may eliminate valid arg reduc

    #self.eval_argument_reduction(12, 20, 22, 14)

    min_size1 = 1
    max_size1 = floor(log(memory_limit/17)/log(2)).getConstantAsInt()
    for size1 in xrange(max_size1, min_size1-1, -1):
      
      min_prec1 = size1
      max_prec1 = 12 + size1
      for prec1 in xrange(min_prec1,max_prec1+1):
        
        # we need sizeof_table1 and mid_interval for the bound on size2 and prec2
        first_arg_reduc = self.eval_argument_reduction(size1, prec1, prec1, prec1)
        mid_interval = first_arg_reduc['mid_interval']
        sizeof_table1 = first_arg_reduc['sizeof_table1']

        if not(0 <= inf(mid_interval) and sup(mid_interval) < S2**(64 - 52 - prec1)):
          continue
        if not(first_arg_reduc['sizeof_table1'] < memory_limit):
          continue
        
        min_size2 = 1 - ceil(log(sup(mid_interval))/log(2)).getConstantAsInt()
        max_size2 = floor(log((memory_limit - sizeof_table1)/(17 * sup(mid_interval)))/log(2)).getConstantAsInt()
        # during execution of the prec2 loop, it can reduces the interval of valid values for prec2
        # so min_prec2 and max_prec2 are setted here and not before the the prec2 loop
        # (because they are modified inside the body of the loop, for the next iteration of size2)
        min_prec2 = 0
        max_prec2 = 12 + max_size2 - prec1
        for size2 in xrange(max_size2,min_size2-1,-1):
          
          max_prec2 = min(max_prec2, 12 + size2 - prec1)
          for prec2 in xrange(max_prec2,min_prec2-1,-1):
            
            #print '=====\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{})\t====='.format(size1,min_size1,max_size1,prec1,min_prec1,max_prec1,size2,min_size2,max_size2,prec2,min_prec2,max_prec2)
            #print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss #memory used by the programm

            arg_reduc = self.eval_argument_reduction(size1, prec1, size2, prec2)
            mid_interval = arg_reduc['mid_interval']
            out_interval = arg_reduc['out_interval']
            sizeof_tables = arg_reduc['sizeof_table1'] + arg_reduc['sizeof_table2']
            if not(0 <= inf(out_interval) and sup(out_interval) < S2**(64-52-prec1-prec2)):
              max_prec2 = prec2 - 1
              continue
            if memory_limit < sizeof_tables:
              continue
            #assert(prec2 < 12 + size2 - prec1) # test the approximation size2 ~= max[for y]( - log2((y-y1)/y1))

            # guess the degree of the two polynomials (relative error <= 2^-52 and absolute error <= 2^-120)
            # note: we exclude zero from out_interval to not perturb sollya (log(1+x)/x is not well defined on 0)
            sollya_out_interval = Interval(S2**(-52-prec1-prec2), sup(out_interval))
            guess_degree_poly1 = guessdegree(log(1+sollya.x)/sollya.x, sollya_out_interval, S2**-52)
            guess_degree_poly2 = guessdegree(log(1+sollya.x), sollya_out_interval, S2**-120)
            # TODO: detect when guessdegree return multiple possible degree, and find the right one
            if False and inf(guess_degree_poly1) <> sup(guess_degree_poly1):
              print "improvable guess_degree_poly1:", guess_degree_poly1
            if False and inf(guess_degree_poly2) <> sup(guess_degree_poly2):
              print "improvable guess_degree_poly2:", guess_degree_poly2
            degree_poly1 = sup(guess_degree_poly1).getConstantAsInt() + 1
            degree_poly2 = sup(guess_degree_poly2).getConstantAsInt()
            
            if ((best_arg_reduc is not None)
            and (best_arg_reduc['degree_poly1'] < degree_poly1 or best_arg_reduc['degree_poly2'] < degree_poly2)):
              min_prec2 = prec2 + 1
              break

            if ((best_arg_reduc is None)
             or (best_arg_reduc['degree_poly1'] > degree_poly1)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] > degree_poly2)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] == degree_poly2 and best_arg_reduc['sizeof_tables'] > sizeof_tables)):
              arg_reduc['degree_poly1'] = degree_poly1
              arg_reduc['degree_poly2'] = degree_poly2
              arg_reduc['sizeof_tables'] = sizeof_tables
              best_arg_reduc = arg_reduc
              #print "\n   --new best--  \n", arg_reduc, "\n"
    #print "\nBest arg reduc: \n", best_arg_reduc, "\n"
    return best_arg_reduc

Example #10

Show file

    def generate_scalar_scheme(self, vx):
        Log.set_dump_stdout(True)

        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
                Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        index_size = 5

        comp_lo = (vx < 0)
        comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool)
        sign = Select(comp_lo, -1, 1, precision = self.precision)

        # as sinh is an odd function, we can simplify the input to its absolute
        # value once the sign has been extracted
        vx = Abs(vx)
        int_precision = self.precision.get_integer_format()

        # argument reduction
        arg_reg_value = log(2)/2**index_size
        inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN)
        inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2    for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx    = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN)
        log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision)
        log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision)
        k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag = "r", debug = debug_multi)

        if is_gappa_installed():
                r_eval_error = self.get_eval_error(r_hi, variable_copy_map =
                    {
                        vx: Variable("vx", interval = Interval(0, 715), precision = self.precision),
                        k: Variable("k", interval = Interval(0, 1024), precision = self.precision)
                    })
                Log.report(Log.Verbose, "r_eval_error: ", r_eval_error)

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi)
        k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi)
        k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi)

        exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i

            reduced_hi_prec = int(self.precision.get_mantissa_size() - 8)
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value    = sollya.SollyaObject(2)**((input_value)* 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size)
            pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN)
            pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN)
            neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN)
            neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # sinh(x) = 1/2 * (exp(x) - exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value)
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        # S=2^(h-1), T = 2^(-h-1)
        # exp(r)    = 1 + poly_pos(r)
        # exp(-r) = 1 + poly_neg(r)
        # 2^(l / 2^index_size)    = pos_value_hi + pos_value_lo
        # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo
        #

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function)

        Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error))))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision)
        poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi)

        poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision)
        poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi)

        table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi)

        neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi)
        neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi)
        pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi)
        pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi)

        k_plus = Max(
            Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))
        k_neg = Max(
            Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))

        # 2^(h-1)
        pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi)
        # 2^(-h-1)
        pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi)

        hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg)
        hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi)


        pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos
        pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi)

        neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg
        neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi)

        result = Addition(
            Subtraction(
                pos_exp,
                neg_exp,
                precision=self.precision,
            ),
            hi_terms,
            precision=self.precision,
            tag="result",
            debug=debug_multi
        )

        # ov_value
        ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD)
        ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater)

        # main scheme
        scheme = Statement(
            Return(
                Select(
                    ov_flag,
                    sign*FP_PlusInfty(self.precision),
                    sign*result
                )))

        return scheme

Example #11

Show file

def generate_payne_hanek(vx,
                         frac_pi,
                         precision,
                         n=100,
                         k=4,
                         chunk_num=None,
                         debug=False):
    """ generate payne and hanek argument reduction for frac_pi * variable """

    sollya.roundingwarnings = sollya.off
    debug_precision = debug_multi
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[precision]

    p = precision.get_field_size()

    # weight of the most significant digit of the constant
    cst_msb = floor(log2(abs(frac_pi)))
    # length of exponent range which must be covered by the approximation
    # of the constant
    cst_exp_range = cst_msb - precision.get_emin_subnormal() + 1

    # chunk size has to be so than multiplication by a splitted <v>
    # (vx_hi or vx_lo) is exact
    chunk_size = precision.get_field_size() / 2 - 2
    chunk_number = int(ceil((cst_exp_range + chunk_size - 1) / chunk_size))
    scaling_factor = S2**-(chunk_size / 2)

    chunk_size_cst = Constant(chunk_size, precision=ML_Int32)
    cst_msb_node = Constant(cst_msb, precision=ML_Int32)

    # Saving sollya's global precision
    old_global_prec = sollya.settings.prec
    sollya.settings.prec(cst_exp_range + n)

    # table to store chunk of constant multiplicand
    cst_table = ML_NewTable(dimensions=[chunk_number, 1],
                            storage_precision=precision,
                            tag="PH_cst_table")
    # table to store sqrt(scaling_factor) corresponding to the
    # cst multiplicand chunks
    scale_table = ML_NewTable(dimensions=[chunk_number, 1],
                              storage_precision=precision,
                              tag="PH_scale_table")
    tmp_cst = frac_pi

    # cst_table stores normalized constant chunks (they have been
    # scale back to close to 1.0 interval)
    #
    # scale_table stores the scaling factors corresponding to the
    # denormalization of cst_table coefficients

    # this loop divide the digits of frac_pi into chunks
    # the chunk lsb weight is given by a shift from
    # cst_msb, multiple of the chunk index
    for i in range(chunk_number):
        value_div_factor = S2**(chunk_size * (i + 1) - cst_msb)
        local_cst = int(tmp_cst * value_div_factor) / value_div_factor
        local_scale = (scaling_factor**i)
        # storing scaled constant chunks
        cst_table[i][0] = local_cst / (local_scale**2)
        scale_table[i][0] = local_scale
        # Updating constant value
        tmp_cst = tmp_cst - local_cst

    # Computing which part of the constant we do not need to multiply
    # In the following comments, vi represents the bit of frac_pi of weight 2**-i

    # Bits vi so that i <= (vx_exp - p + 1 -k)  are not needed, because they result
    # in a multiple of 2pi and do not contribute to trig functions.

    vx_exp = ExponentExtraction(
        vx, precision=vx.get_precision().get_integer_format())
    vx_exp = Conversion(vx_exp, precision=ML_Int32)

    msb_exp = -(vx_exp - p + 1 - k)
    msb_exp.set_attributes(tag="msb_exp", debug=debug_multi)
    msb_exp = Conversion(msb_exp, precision=ML_Int32)

    # Select the highest index where the reduction should start
    msb_index = Select(cst_msb_node < msb_exp, 0,
                       (cst_msb_node - msb_exp) / chunk_size_cst)
    msb_index.set_attributes(tag="msb_index", debug=debug_multi)

    # For a desired accuracy of 2**-n, bits vi so that i >= (vx_exp + n + 4)  are not needed, because they contribute less than
    # 2**-n to the result

    lsb_exp = -(vx_exp + n + 4)
    lsb_exp.set_attributes(tag="lsb_exp", debug=debug_multi)
    lsb_exp = Conversion(lsb_exp, precision=ML_Int32)

    # Index of the corresponding chunk
    lsb_index = (cst_msb_node - lsb_exp) / chunk_size_cst
    lsb_index.set_attributes(tag="lsb_index", debug=debug_multi)

    # Splitting vx
    half_size = precision.get_field_size() / 2 + 1

    # hi part (most significant digit) of vx input
    vx_hi = TypeCast(BitLogicAnd(
        TypeCast(vx, precision=int_precision),
        Constant(~int(2**half_size - 1), precision=int_precision)),
                     precision=precision)
    vx_hi.set_attributes(tag="vx_hi_ph")  #, debug = debug_multi)

    vx_lo = vx - vx_hi
    vx_lo.set_attributes(tag="vx_lo_ph")  #, debug = debug_multi)

    # loop iterator variable
    vi = Variable("i", precision=ML_Int32, var_type=Variable.Local)
    # step scaling factor
    half_scaling = Constant(S2**(-chunk_size / 2), precision=precision)

    i1 = Constant(1, precision=ML_Int32)

    # accumulator to the output precision
    acc = Variable("acc", precision=precision, var_type=Variable.Local)
    # integer accumulator
    acc_int = Variable("acc_int",
                       precision=int_precision,
                       var_type=Variable.Local)

    init_loop = Statement(
        vx_hi,
        vx_lo,
        ReferenceAssign(vi, msb_index),
        ReferenceAssign(acc, Constant(0, precision=precision)),
        ReferenceAssign(acc_int, Constant(0, precision=int_precision)),
    )

    cst_load = TableLoad(cst_table,
                         vi,
                         0,
                         tag="cst_load",
                         debug=debug_precision)
    sca_load = TableLoad(scale_table,
                         vi,
                         0,
                         tag="sca_load",
                         debug=debug_precision)
    # loop body
    # hi_mult = vx_hi * <scale_factor> * <cst>
    hi_mult = (vx_hi * sca_load) * (cst_load * sca_load)
    hi_mult.set_attributes(tag="hi_mult", debug=debug_precision)
    pre_hi_mult_int = NearestInteger(hi_mult,
                                     precision=int_precision,
                                     tag="hi_mult_int",
                                     debug=(debuglld if debug else None))
    hi_mult_int_f = Conversion(pre_hi_mult_int,
                               precision=precision,
                               tag="hi_mult_int_f",
                               debug=debug_precision)
    pre_hi_mult_red = (hi_mult - hi_mult_int_f).modify_attributes(
        tag="hi_mult_red", debug=debug_precision)

    # for the first chunks (vx_hi * <constant chunk>) exceeds 2**k+1 and may be
    # discard (whereas it may lead to overflow during integer conversion
    pre_exclude_hi = ((cst_msb_node - (vi + i1) * chunk_size + i1) +
                      (vx_exp + Constant(-half_size + 1, precision=ML_Int32))
                      ).modify_attributes(tag="pre_exclude_hi",
                                          debug=(debugd if debug else None))
    pre_exclude_hi.propagate_precision(ML_Int32,
                                       [cst_msb_node, vi, vx_exp, i1])
    Ck = Constant(k, precision=ML_Int32)
    exclude_hi = pre_exclude_hi <= Ck
    exclude_hi.set_attributes(tag="exclude_hi", debug=debug_multi)

    hi_mult_red = Select(exclude_hi, pre_hi_mult_red,
                         Constant(0, precision=precision))
    hi_mult_int = Select(exclude_hi, pre_hi_mult_int,
                         Constant(0, precision=int_precision))

    # lo part of the chunk reduction
    lo_mult = (vx_lo * sca_load) * (cst_load * sca_load)
    lo_mult.set_attributes(tag="lo_mult")  #, debug = debug_multi)
    lo_mult_int = NearestInteger(lo_mult,
                                 precision=int_precision,
                                 tag="lo_mult_int")  #, debug = debug_multi
    lo_mult_int_f = Conversion(lo_mult_int,
                               precision=precision,
                               tag="lo_mult_int_f")  #, debug = debug_multi)
    lo_mult_red = (lo_mult - lo_mult_int_f).modify_attributes(
        tag="lo_mult_red")  #, debug = debug_multi)

    # accumulating fractional part
    acc_expr = (acc + hi_mult_red) + lo_mult_red
    # accumulating integer part
    int_expr = ((acc_int + hi_mult_int) + lo_mult_int) % 2**(k + 1)

    CF1 = Constant(1, precision=precision)
    CI1 = Constant(1, precision=int_precision)

    # extracting exceeding integer part in fractionnal accumulator
    acc_expr_int = NearestInteger(acc_expr, precision=int_precision)
    # normalizing integer and fractionnal accumulator by subtracting then
    # adding exceeding integer part
    normalization = Statement(
        ReferenceAssign(
            acc, acc_expr - Conversion(acc_expr_int, precision=precision)),
        ReferenceAssign(acc_int, int_expr + acc_expr_int),
    )

    acc_expr.set_attributes(tag="acc_expr")  #, debug = debug_multi)
    int_expr.set_attributes(tag="int_expr")  #, debug = debug_multi)

    red_loop = Loop(
        init_loop, vi <= lsb_index,
        Statement(acc_expr, int_expr, normalization,
                  ReferenceAssign(vi, vi + 1)))

    result = Statement(lsb_index, msb_index, red_loop)

    # restoring sollya's global precision
    sollya.settings.prec = old_global_prec

    return result, acc, acc_int

Example #12

Show file

def generate_payne_hanek(vx,
                         frac_pi,
                         precision,
                         n=100,
                         k=4,
                         chunk_num=None,
                         debug=False):
    """ generate payne and hanek argument reduction for frac_pi * variable """
    # determining integer format corresponding to
    # floating point precision argument
    int_precision = {ML_Binary64: ML_Int64, ML_Binary32: ML_Int32}[precision]

    cst_msb = floor(log2(abs(frac_pi)))
    cst_exp_range = cst_msb - precision.get_emin_subnormal() + 1

    # chunk size has to be so than multiplication by a splitted <v> (vx_hi or vx_lo)
    # is exact
    chunk_size = 20  # precision.get_field_size() / 2 - 2
    chunk_number = int(ceil((cst_exp_range + chunk_size - 1) / chunk_size))
    scaling_factor = S2**-(chunk_size / 2)

    chunk_size_cst = Constant(chunk_size, precision=ML_Int32)
    cst_msb_node = Constant(cst_msb, precision=ML_Int32)

    p = precision.get_field_size()

    # adapting debug format to precision argument
    debug_precision = {
        ML_Binary32: debug_ftox,
        ML_Binary64: debug_lftolx
    }[precision] if debug else None

    # saving sollya's global precision
    old_global_prec = get_prec()
    prec(cst_exp_range + 100)

    # table to store chunk of constant multiplicand
    cst_table = ML_Table(dimensions=[chunk_number, 1],
                         storage_precision=precision,
                         tag="PH_cst_table")
    # table to store sqrt(scaling_factor) corresponding to the cst multiplicand chunks
    scale_table = ML_Table(dimensions=[chunk_number, 1],
                           storage_precision=precision,
                           tag="PH_scale_table")
    tmp_cst = frac_pi

    # this loop divide the digits of frac_pi into chunks
    # the chunk lsb weight is given by a shift from
    # cst_msb, multiple of the chunk index
    for i in xrange(chunk_number):
        value_div_factor = S2**(chunk_size * (i + 1) - cst_msb)
        local_cst = int(tmp_cst * value_div_factor) / value_div_factor
        local_scale = (scaling_factor**i)
        # storing scaled constant chunks
        cst_table[i][0] = local_cst / (local_scale**2)
        scale_table[i][0] = local_scale
        tmp_cst = tmp_cst - local_cst

    vx_exp = ExponentExtraction(vx)
    msb_exp = -vx_exp + p - 1 + k
    msb_exp.set_attributes(tag="msb_exp", debug=(debugd if debug else None))

    msb_index = Select(cst_msb_node < msb_exp, 0,
                       (cst_msb_node - msb_exp) / chunk_size_cst)
    msb_index.set_attributes(tag="msb_index",
                             debug=(debugd if debug else None))

    lsb_exp = -vx_exp + p - 1 - n
    lsb_exp.set_attributes(tag="lsb_exp", debug=(debugd if debug else None))

    lsb_index = (cst_msb_node - lsb_exp) / chunk_size_cst
    lsb_index.set_attributes(tag="lsb_index",
                             debug=(debugd if debug else None))

    half_size = precision.get_field_size() / 2 + 1

    vx_hi = TypeCast(BitLogicAnd(
        TypeCast(vx, precision=ML_Int64),
        Constant(~(2**half_size - 1), precision=ML_Int64)),
                     precision=precision)
    vx_hi.set_attributes(tag="vx_hi", debug=debug_precision)

    vx_lo = vx - vx_hi
    vx_lo.set_attributes(tag="vx_lo", debug=debug_precision)

    vi = Variable("i", precision=ML_Int32, var_type=Variable.Local)

    half_scaling = Constant(S2**(-chunk_size / 2), precision=precision)

    i1 = Constant(1, precision=ML_Int32)

    acc = Variable("acc", precision=precision, var_type=Variable.Local)
    acc_int = Variable("acc_int",
                       precision=int_precision,
                       var_type=Variable.Local)

    init_loop = Statement(
        vx_hi,
        vx_lo,
        ReferenceAssign(vi, msb_index),
        ReferenceAssign(acc, Constant(0, precision=precision)),
        ReferenceAssign(acc_int, Constant(0, precision=precision)),
    )

    cst_load = TableLoad(cst_table,
                         vi,
                         0,
                         tag="cst_load",
                         debug=debug_precision)
    sca_load = TableLoad(scale_table,
                         vi,
                         0,
                         tag="sca_load",
                         debug=debug_precision)

    hi_mult = (vx_hi * sca_load) * (cst_load * sca_load)
    hi_mult.set_attributes(tag="hi_mult", debug=debug_precision)
    pre_hi_mult_int = NearestInteger(hi_mult,
                                     precision=int_precision,
                                     tag="hi_mult_int",
                                     debug=(debuglld if debug else None))
    hi_mult_int_f = Conversion(pre_hi_mult_int,
                               precision=precision,
                               tag="hi_mult_int_f",
                               debug=debug_precision)
    pre_hi_mult_red = (hi_mult - hi_mult_int_f).modify_attributes(
        tag="hi_mult_red", debug=debug_precision)

    # for the first chunks (vx_hi * <constant chunk>) exceeds 2**k+1 and may be
    # discard (whereas it may lead to overflow during integer conversion
    pre_exclude_hi = ((cst_msb_node - (vi + i1) * chunk_size + i1) +
                      (vx_exp + Constant(-half_size + 1, precision=ML_Int32))
                      ).modify_attributes(tag="pre_exclude_hi",
                                          debug=(debugd if debug else None))
    pre_exclude_hi.propagate_precision(ML_Int32,
                                       [cst_msb_node, vi, vx_exp, i1])
    Ck = Constant(k, precision=ML_Int32)
    exclude_hi = pre_exclude_hi <= Ck
    exclude_hi.set_attributes(tag="exclude_hi",
                              debug=(debugd if debug else None))

    hi_mult_red = Select(exclude_hi, pre_hi_mult_red,
                         Constant(0, precision=precision))
    hi_mult_int = Select(exclude_hi, pre_hi_mult_int,
                         Constant(0, precision=int_precision))

    lo_mult = (vx_lo * sca_load) * (cst_load * sca_load)
    lo_mult.set_attributes(tag="lo_mult", debug=debug_precision)
    lo_mult_int = NearestInteger(lo_mult,
                                 precision=int_precision,
                                 tag="lo_mult_int",
                                 debug=(debuglld if debug else None))
    lo_mult_int_f = Conversion(lo_mult_int,
                               precision=precision,
                               tag="lo_mult_int_f",
                               debug=debug_precision)
    lo_mult_red = (lo_mult - lo_mult_int_f).modify_attributes(
        tag="lo_mult_red", debug=debug_precision)

    acc_expr = (acc + hi_mult_red) + lo_mult_red
    int_expr = ((acc_int + hi_mult_int) + lo_mult_int) % 2**(k + 1)

    CF1 = Constant(1, precision=precision)
    CI1 = Constant(1, precision=int_precision)

    acc_expr_int = NearestInteger(acc_expr, precision=int_precision)

    normalization = Statement(
        ReferenceAssign(
            acc, acc_expr - Conversion(acc_expr_int, precision=precision)),
        ReferenceAssign(acc_int, int_expr + acc_expr_int),
    )

    acc_expr.set_attributes(tag="acc_expr", debug=debug_precision)
    int_expr.set_attributes(tag="int_expr",
                            debug=(debuglld if debug else None))

    red_loop = Loop(
        init_loop,
        vi <= lsb_index,
        Statement(
            acc_expr,
            int_expr,
            normalization,
            #ReferenceAssign(acc, acc_expr),
            #ReferenceAssign(acc_int, int_expr),
            ReferenceAssign(vi, vi + 1)))
    result = Statement(lsb_index, msb_index, red_loop)

    # restoring sollya's global precision
    prec(old_global_prec)

    return result, acc, acc_int

Example #13

Show file

File: ml_fast_sincos.py Project: mezzarobba/metalibm

    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "target: %s " % self.processor.target_name)

        # display parameter information
        Log.report(Log.Info, "accuracy      : %s " % self.accuracy)
        Log.report(Log.Info, "input interval: %s " % self.input_interval)

        accuracy_goal = self.accuracy.get_goal()
        Log.report(Log.Info, "accuracy_goal=%f" % accuracy_goal)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_interval))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_NewTable(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in range(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.io_precisions[0]
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(x), [0, 2], [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.io_precisions[0])

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme

Example #14

Show file

File: range_eval.py Project: metalibm/metalibm

    def generate_scheme(self):
        """ main scheme generation """

        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        expected_interval = {}

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        x_interval = Interval(-10.3,10.7)
        var_x.set_interval(x_interval)
        expected_interval[var_x] = x_interval

        var_y = self.implementation.add_input_signal("y", input_precision)
        y_interval = Interval(-17.9,17.2)
        var_y.set_interval(y_interval)
        expected_interval[var_y] = y_interval

        var_z = self.implementation.add_input_signal("z", input_precision)
        z_interval = Interval(-7.3,7.7)
        var_z.set_interval(z_interval)
        expected_interval[var_z] = z_interval

        cst = Constant(42.5, tag = "cst")
        expected_interval[cst] = Interval(42.5)

        conv_ceil = Ceil(var_x, tag = "ceil")
        expected_interval[conv_ceil] = sollya.ceil(x_interval)

        conv_floor = Floor(var_y, tag = "floor")
        expected_interval[conv_floor] = sollya.floor(y_interval)

        mult = var_z * var_x
        mult.set_tag("mult")
        mult_interval = z_interval * x_interval
        expected_interval[mult] = mult_interval

        large_add = (var_x + var_y) - mult
        large_add.set_attributes(tag = "large_add")
        large_add_interval = (x_interval + y_interval) - mult_interval
        expected_interval[large_add] = large_add_interval

        var_x_lzc = CountLeadingZeros(var_x, tag="var_x_lzc")
        expected_interval[var_x_lzc] = Interval(0, input_precision.get_bit_size())

        reduced_result = Max(0, Min(large_add, 13))
        reduced_result.set_tag("reduced_result")
        reduced_result_interval = interval_max(
            Interval(0),
            interval_min(
                large_add_interval,
                Interval(13)
            )
        )
        expected_interval[reduced_result] = reduced_result_interval

        select_result = Select(
            var_x > var_y,
            reduced_result,
            var_z,
            tag = "select_result"
        )
        select_interval = interval_union(reduced_result_interval, z_interval)
        expected_interval[select_result] = select_interval

        # floating-point operation on mantissa and exponents
        fp_x_range = Interval(-0.01, 100)

        unbound_fp_var = Variable("fp_x", precision=ML_Binary32, interval=fp_x_range)
        mant_fp_x = MantissaExtraction(unbound_fp_var, tag="mant_fp_x", precision=ML_Binary32)
        exp_fp_x = ExponentExtraction(unbound_fp_var, tag="exp_fp_x", precision=ML_Int32)
        ins_exp_fp_x = ExponentInsertion(exp_fp_x, tag="ins_exp_fp_x", precision=ML_Binary32)

        expected_interval[unbound_fp_var] = fp_x_range
        expected_interval[exp_fp_x] = Interval(
            sollya.floor(sollya.log2(sollya.inf(abs(fp_x_range)))),
            sollya.floor(sollya.log2(sollya.sup(abs(fp_x_range))))
        )
        expected_interval[mant_fp_x] = Interval(1, 2)
        expected_interval[ins_exp_fp_x] = Interval(
            S2**sollya.inf(expected_interval[exp_fp_x]),
            S2**sollya.sup(expected_interval[exp_fp_x])
        )


        # checking interval evaluation
        for var in [var_x_lzc, exp_fp_x, unbound_fp_var, mant_fp_x, ins_exp_fp_x, cst, var_x, var_y, mult, large_add, reduced_result, select_result, conv_ceil, conv_floor]:
            interval = evaluate_range(var)
            expected = expected_interval[var]
            print("{}: {}".format(var.get_tag(), interval))
            print("  vs expected {}".format(expected))
            assert not interval is None
            assert interval == expected


        return [self.implementation]

Example #15

Show file

File: random_gen.py Project: metalibm/metalibm

def get_value_exp(value):
    """ return the binary exponent of value """
    return sollya.ceil(sollya.log2(abs(value)))

Example #16

Show file

def ulp(v, format_):
    """ return a 'unit in last place' value for <v> assuming precision is defined by format _ """
    return sollya.S2**(sollya.ceil(sollya.log2(sollya.abs(v))) -
                       (format_.get_precision() + 1))

Example #17

Show file

    def generate_scheme(self):
        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = self.precision

        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        # rounding mode input
        rnd_mode = self.implementation.add_input_signal(
            "rnd_mode", rnd_mode_format)

        # size of most significant table index (for linear slope tabulation)
        alpha = self.alpha  # 6
        # size of medium significant table index (for initial value table index LSB)
        beta = self.beta  # 5
        # size of least significant table index (for linear offset tabulation)
        gamma = self.gamma  # 5

        guard_bits = self.guard_bits  # 3

        vx.set_interval(self.interval)

        range_hi = sollya.sup(self.interval)
        range_lo = sollya.inf(self.interval)
        f_hi = self.function(range_hi)
        f_lo = self.function(range_lo)
        # fixed by format used for reduced_x
        range_size = range_hi - range_lo
        range_size_log2 = int(sollya.log2(range_size))
        assert 2**range_size_log2 == range_size

        print("range_size_log2={}".format(range_size_log2))

        reduced_x = Conversion(BitLogicRightShift(vx - range_lo,
                                                  range_size_log2),
                               precision=fixed_point(0,
                                                     alpha + beta + gamma,
                                                     signed=False),
                               tag="reduced_x",
                               debug=debug_fixed)

        alpha_index = get_fixed_slice(reduced_x,
                                      0,
                                      alpha - 1,
                                      align_hi=FixedPointPosition.FromMSBToLSB,
                                      align_lo=FixedPointPosition.FromMSBToLSB,
                                      tag="alpha_index",
                                      debug=debug_std)
        gamma_index = get_fixed_slice(reduced_x,
                                      gamma - 1,
                                      0,
                                      align_hi=FixedPointPosition.FromLSBToLSB,
                                      align_lo=FixedPointPosition.FromLSBToLSB,
                                      tag="gamma_index",
                                      debug=debug_std)

        beta_index = get_fixed_slice(reduced_x,
                                     alpha,
                                     gamma,
                                     align_hi=FixedPointPosition.FromMSBToLSB,
                                     align_lo=FixedPointPosition.FromLSBToLSB,
                                     tag="beta_index",
                                     debug=debug_std)

        # Assuming monotonic function
        f_absmax = max(abs(f_hi), abs(f_lo))
        f_absmin = min(abs(f_hi), abs(f_lo))

        f_msb = int(sollya.ceil(sollya.log2(f_absmax))) + 1
        f_lsb = int(sollya.floor(sollya.log2(f_absmin)))
        storage_lsb = f_lsb - io_precision.get_bit_size() - guard_bits

        f_int_size = f_msb
        f_frac_size = -storage_lsb

        storage_format = fixed_point(f_int_size, f_frac_size, signed=False)
        Log.report(Log.Info, "storage_format is {}".format(storage_format))

        # table of initial value index
        tiv_index = Concatenation(alpha_index,
                                  beta_index,
                                  tag="tiv_index",
                                  debug=debug_std)
        # table of offset value index
        to_index = Concatenation(alpha_index,
                                 gamma_index,
                                 tag="to_index",
                                 debug=debug_std)

        tiv_index_size = alpha + beta
        to_index_size = alpha + gamma

        Log.report(Log.Info, "initial table structures")
        table_iv = ML_NewTable(dimensions=[2**tiv_index_size],
                               storage_precision=storage_format,
                               tag="tiv")
        table_offset = ML_NewTable(dimensions=[2**to_index_size],
                                   storage_precision=storage_format,
                                   tag="to")

        slope_table = [None] * (2**alpha)
        slope_delta = 1.0 / sollya.SollyaObject(2**alpha)
        delta_u = range_size * slope_delta * 2**-15
        Log.report(Log.Info, "computing slope value")
        for i in range(2**alpha):
            # slope is computed at the middle of range_size interval
            slope_x = range_lo + (i + 0.5) * range_size * slope_delta
            # TODO: gross approximation of derivatives
            f_xpu = self.function(slope_x + delta_u / 2)
            f_xmu = self.function(slope_x - delta_u / 2)
            slope = (f_xpu - f_xmu) / delta_u
            slope_table[i] = slope

        range_rcp_steps = 1.0 / sollya.SollyaObject(2**tiv_index_size)
        Log.report(Log.Info, "computing value for initial-value table")
        for i in range(2**tiv_index_size):
            slope_index = i / 2**beta
            iv_x = range_lo + i * range_rcp_steps * range_size
            offset_x = 0.5 * range_rcp_steps * range_size
            # initial value is computed so that the piecewise linear
            # approximation intersects the function at iv_x + offset_x
            iv_y = self.function(
                iv_x + offset_x) - offset_x * slope_table[int(slope_index)]
            initial_value = storage_format.round_sollya_object(iv_y)
            table_iv[i] = initial_value

        # determining table of initial value interval
        tiv_min = table_iv[0]
        tiv_max = table_iv[0]
        for i in range(1, 2**tiv_index_size):
            tiv_min = min(tiv_min, table_iv[i])
            tiv_max = max(tiv_max, table_iv[i])
        table_iv.set_interval(Interval(tiv_min, tiv_max))

        offset_step = range_size / S2**(alpha + beta + gamma)
        for i in range(2**alpha):
            Log.report(Log.Info,
                       "computing offset value for sub-table {}".format(i))
            for j in range(2**gamma):
                to_i = i * 2**gamma + j
                offset = slope_table[i] * j * offset_step
                table_offset[to_i] = offset

        # determining table of offset interval
        to_min = table_offset[0]
        to_max = table_offset[0]
        for i in range(1, 2**(alpha + gamma)):
            to_min = min(to_min, table_offset[i])
            to_max = max(to_max, table_offset[i])
        offset_interval = Interval(to_min, to_max)
        table_offset.set_interval(offset_interval)

        initial_value = TableLoad(table_iv,
                                  tiv_index,
                                  precision=storage_format,
                                  tag="initial_value",
                                  debug=debug_fixed)

        offset_precision = get_fixed_type_from_interval(offset_interval, 16)
        print("offset_precision is {} ({} bits)".format(
            offset_precision, offset_precision.get_bit_size()))
        table_offset.get_precision().storage_precision = offset_precision

        # rounding table value
        for i in range(1, 2**(alpha + gamma)):
            table_offset[i] = offset_precision.round_sollya_object(
                table_offset[i])

        offset_value = TableLoad(table_offset,
                                 to_index,
                                 precision=offset_precision,
                                 tag="offset_value",
                                 debug=debug_fixed)

        Log.report(
            Log.Verbose,
            "initial_value's interval: {}, offset_value's interval: {}".format(
                evaluate_range(initial_value), evaluate_range(offset_value)))

        final_add = initial_value + offset_value
        round_bit = final_add  # + FixedPointPosition(final_add, io_precision.get_bit_size(), align=FixedPointPosition.FromMSBToLSB)

        vr_out = Conversion(initial_value + offset_value,
                            precision=io_precision,
                            tag="vr_out",
                            debug=debug_fixed)

        self.implementation.add_output_signal("vr_out", vr_out)

        # Approximation error evaluation
        approx_error = 0.0
        for i in range(2**alpha):
            for j in range(2**beta):
                tiv_i = (i * 2**beta + j)
                # = range_lo + tiv_i * range_rcp_steps * range_size
                iv = table_iv[tiv_i]
                for k in range(2**gamma):
                    to_i = i * 2**gamma + k
                    offset = table_offset[to_i]
                    approx_value = offset + iv
                    table_x = range_lo + range_size * (
                        (i * 2**beta + j) * 2**gamma + k) / S2**(alpha + beta +
                                                                 gamma)
                    local_error = abs(1 / (table_x) - approx_value)
                    approx_error = max(approx_error, local_error)
        error_log2 = float(sollya.log2(approx_error))
        print("approx_error is {}, error_log2 is {}".format(
            float(approx_error), error_log2))

        # table size
        table_iv_size = 2**(alpha + beta)
        table_offset_size = 2**(alpha + gamma)
        print("tables' size are {} entries".format(table_iv_size +
                                                   table_offset_size))

        return [self.implementation]

Example #18

Show file

File: ml_exp_adaptative.py Project: templeblock/metalibm

    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "expf.c", 
                 function_name = "expf"):

        # declaring target and instantiating optimization engine
        processor = target
        self.precision = precision
        opt_eng = OptimizationEngine(processor)
        gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True)

        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = exp_implementation.add_input_variable("x", self.precision) 


        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)


        test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
        test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
        test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

        test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
        return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax      = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax 
        exp_overflow_bound  = ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater)
        early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2 ** precision_emin
        exp_underflow_bound = floor(log(precision_min_value))


        early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less)
        early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision)))


        sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}


        # constant computation
        invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))


        log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision
        invlog2_cst = Constant(invlog2, precision = self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN) 
        log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f"))
        k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f"))
        ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact= True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact = True)
        r =  exact_hi_part - k * log2_lo
        r.set_tag("r")
        r.set_attributes(debug = ML_Debug(display_format = "%f"))

        opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma)

        tag_map = {}
        opt_eng.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx: Variable("x", precision = self.precision, interval = interval_vx),
            tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision)
        }
        #try:
        if 1:
            #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            Log.report(Log.Info, "eval error: %s" % eval_error)
        #except:
        #    Log.report(Log.Info, "gappa error evaluation failed")
        print r.get_str(depth = None, display_precision = True, display_attribute = True)
        print opt_r.get_str(depth = None, display_precision = True, display_attribute = True)

        approx_interval = Interval(-log(2)/2, log(2)/2)

        local_ulp = sup(ulp(exp(approx_interval), self.precision))
        print "ulp: ", local_ulp 
        error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1
        init_poly_degree = poly_degree

        return


        while 1: 
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m")
            poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision)
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            opt_poly = opt_eng.optimization_process(poly, self.precision)

            #print "poly: ", poly.get_str(depth = None, display_precision = True)
            #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval)
            poly_error_copy_map = {
                r.get_handle().get_node(): r_gappa_var
            }
            gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
            poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g")
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)

            global_poly_error = poly_eval_error + poly_approx_error
            global_rel_poly_error = global_poly_error / exp(approx_interval)
            print "global_poly_error: ", global_poly_error, global_rel_poly_error 
            flag = local_ulp > sup(abs(global_rel_poly_error))
            print "test: ", flag
            if flag: break
            else:
                if poly_degree > init_poly_degree + 5:
                    Log.report(Log.Error, "poly degree search did not converge")
                poly_degree += 1



        late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
        diff_k = ik - overflow_exp_offset 
        diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k")
        late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset)
        late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf)
        late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result))

        late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset)
        late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False)
        test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal)
        late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result))

        std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx)
        std_result.set_attributes(tag = "std_result", debug = debug_lftolx)
        result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result)))
        std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return)

        #print scheme.get_str(depth = None, display_precision = True)

        # fusing FMA
        if fuse_fma: 
            Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m")
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m")
        opt_eng.instantiate_abstract_precision(scheme, None)

        Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m")
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m")
        opt_eng.subexpression_sharing(scheme)

        Log.report(Log.Info, "\033[33;1m silencing operation \033[0m")
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        # check processor support
        Log.report(Log.Info, "\033[33;1m checking processor support \033[0m")
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        if fast_path_extract:
            Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m")
            opt_eng.factorize_fast_path(scheme)
        
        Log.report(Log.Info, "\033[33;1m generating source code \033[0m")
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        #self.result.add_header("support_lib/ml_types.h")
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree)
        self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object())
        if debug_flag:
            self.result.add_header("stdio.h")
            self.result.add_header("inttypes.h")
        output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()

Example #19

Show file

  def generate_scheme(self):
    memory_limit = 2500

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = input_var
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)

    ### Constants computations ###

    v_log2_hi = nearestint(log(2) * 2**-52) * 2**52
    v_log2_lo = round(log(2) - v_log2_hi, 64+53, sollya.RN)
    log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi")
    log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo")
   
    print "\n\033[1mSearch parameters for the argument reduction:\033[0m (this can take a while)"
    arg_reduc = self.generate_argument_reduction(memory_limit)

    print "\n\033[1mArgument reduction found:\033[0m [({},{}),({},{})] -> polynomials of degree {},{}, using {} bytes of memory".format(arg_reduc['size1'],arg_reduc['prec1'],arg_reduc['size2'],arg_reduc['prec2'],arg_reduc['degree_poly1'],arg_reduc['degree_poly2'],arg_reduc['sizeof_tables']) 
    
    print "\n\033[1mGenerate the first logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table1'], arg_reduc['sizeof_table1'])
    inv_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']],
                           storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False),
                           tag = self.uniquify_name("inv_table_1"))
    log_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']],
                           storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False),
                           tag = self.uniquify_name("log_table_1"))
    for i in xrange(0, arg_reduc['length_table1']-1):
      x1 = 1 + i/S2*arg_reduc['size1']
      inv_x1 = ceil(S2**arg_reduc['prec1']/x1)*S2**arg_reduc['prec1']
      log_x1 = floor(log(x1) * S2**(128-11))*S2**(11-128)
      inv_table_1[i] = inv_x1 #Constant(inv_x1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False))
      log_table_1[i] = log_x1 #Constant(log_x1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))

    print "\n\033[1mGenerate the second logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table2'], arg_reduc['sizeof_table2'])
    inv_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']],
                           storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False),
                           tag = self.uniquify_name("inv_table_2"))
    log_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']],
                           storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False),
                           tag = self.uniquify_name("log_table_2"))
    for i in xrange(0, arg_reduc['length_table2']-1):
      y1 = 1 + i/S2**arg_reduc['size2']
      inv_y1 = ceil(S2**arg_reduc['prec2']/x1) * S2**arg_reduc['prec2']
      log_y1 = floor(log(inv_y1) * S2**(128-11))*S2**(11-128)
      inv_table_2[i] = inv_y1 #Constant(inv_y1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False))
      log_table_2[i] = log_y1 #Constant(log_y1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))
    
    ### Evaluation Scheme ###
    
    print "\n\033[1mGenerate the evaluation scheme:\033[0m"
    input_var = self.implementation.add_input_variable("input_var", self.precision) 
    ve = ExponentExtraction(input_var, tag = "x_exponent", debug = debugd)
    vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = ML_Custom_FixedPoint_Format(0,52,False), debug = debug_lftolx)
    #vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = self.precision, debug = debug_lftolx)

    print "filtering and handling special cases"
    test_is_special_cases = LogicalNot(Test(input_var, specifier = Test.IsIEEENormalPositive, likely = True, debug = debugd, tag = "is_special_cases"))
    handling_special_cases = Statement(
      ConditionBlock(
        Test(input_var, specifier = Test.IsSignalingNaN, debug = True),
        ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision))
      ),
      ConditionBlock(
        Test(input_var, specifier = Test.IsNaN, debug = True),
        Return(input_var)
      )#,
      # TODO: add tests for x == 0 (raise DivideByZero, return -Inf), x < 0 (raise InvalidOperation, return qNaN)
      # all that remains is x is a subnormal positive
      #Statement(
      #  ReferenceAssign(Dereference(ve), Subtraction(ve, Subtraction(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(12, precision = ve.get_precision())))),
      #  ReferenceAssign(Dereference(vx), BitLogicLeftShift(vx, Addition(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(1, precision = ve.get_precision()))))
      #)
    )
    
    print "doing the argument reduction"
    v_dx = vx
    v_x1 = Conversion(v_dx, tag = 'x1',
                      precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size1'],False),
                      rounding_mode = ML_RoundTowardMinusInfty)
    v_index_x = TypeCast(v_x1, tag = 'index_x',
                        precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_x1.get_precision().get_c_bit_size(), 0, False))
    v_inv_x = TableLoad(inv_table_1, v_index_x, tag = 'inv_x')
    v_x = Addition(v_dx, 1, tag = 'x',
                   precision = ML_Custom_FixedPoint_Format(1,52,False))
    v_dy = Multiplication(v_x, v_inv_x, tag = 'dy',
                          precision = ML_Custom_FixedPoint_Format(0,52+arg_reduc['prec1'],False))
    v_y1 = Conversion(v_dy, tag = 'y1',
                      precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size2'],False),
                      rounding_mode = ML_RoundTowardMinusInfty)
    v_index_y = TypeCast(v_y1, tag = 'index_y',
                        precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_y1.get_precision().get_c_bit_size(), 0, False))
    v_inv_y = TableLoad(inv_table_2, v_index_y, tag = 'inv_y')
    v_y = Addition(v_dy, 1, tag = 'y',
                   precision = ML_Custom_FixedPoint_Format(1,52+arg_reduc['prec2'],False))
    # note that we limit the number of bits used to represent dz to 64.
    # we proved during the arg reduction that we can do that (sup(out_interval) < 2^(64-52-prec1-prec2))
    v_dz = Multiplication(v_y, v_inv_y, tag = 'z',
                          precision = ML_Custom_FixedPoint_Format(64-52-arg_reduc['prec1']-arg_reduc['prec2'],52+arg_reduc['prec1']+arg_reduc['prec2'],False))
    # reduce the number of bits used to represent dz. we can do that
    
    print "doing the first polynomial evaluation"
    global_poly1_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, arg_reduc['degree_poly1']-1, [64] * (arg_reduc['degree_poly1']), arg_reduc['out_interval'], fixed, sollya.absolute)
    poly1_object = global_poly1_object.sub_poly(start_index = 1)
    print global_poly1_object
    print poly1_object
    poly1 = PolynomialSchemeEvaluator.generate_horner_scheme(poly1_object, v_dz, unified_precision = v_dz.get_precision())
    return ConditionBlock(test_is_special_cases, handling_special_cases, Return(poly1))

    #approx_interval = Interval(0, 27021597764222975*S2**-61)
    
    #poly_degree = 1+sup(guessdegree(log(1+x)/x, approx_interval, S2**-(self.precision.get_field_size())))
    #global_poly_object = Polynomial.build_from_approximation(log(1+x)/x, poly_degree, [1] + [self.precision]*(poly_degree), approx_interval, sollya.absolute)
    #poly_object = global_poly_object.sub_poly(start_index = 1)
    #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
    #_poly.set_attributes(tag = "poly", debug = debug_lftolx)

    """

Example #20

Show file

File: ml_fixed_mpfma.py Project: templeblock/metalibm

    def generate_scheme(self):
        ## Generate Fused multiply and add comput <x> . <y> + <z>
        Log.report(
            Log.Info,
            "generating fixed MPFMA with {ed} extra digit(s) and sign-magnitude accumulator: {sm}"
            .format(ed=self.extra_digit, sm=self.sign_magnitude))

        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = HdlVirtualFormat(self.precision)
        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        # reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable

        # maximum weigth for a mantissa product digit
        max_prod_exp = self.precision.get_emax() * 2 + 1
        # minimum wieght for a mantissa product digit
        min_prod_exp = self.precision.get_emin_subnormal() * 2

        ## Most and least significant digit index for the
        #  accumulator
        acc_msb_index = max_prod_exp + self.extra_digit
        acc_lsb_index = min_prod_exp

        acc_width = acc_msb_index - min_prod_exp + 1
        # precision of the accumulator
        acc_prec = ML_StdLogicVectorFormat(acc_width)

        reset = self.implementation.add_input_signal("reset", ML_StdLogic)

        vx = self.implementation.add_input_signal("x", io_precision)
        vy = self.implementation.add_input_signal("y", io_precision)

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        acc = self.implementation.add_input_signal("acc", acc_prec)
        if self.sign_magnitude:
            # the accumulator is in sign-magnitude representation
            sign_acc = self.implementation.add_input_signal(
                "sign_acc", ML_StdLogic)
        else:
            sign_acc = CopySign(acc,
                                precision=ML_StdLogic,
                                tag="sign_acc",
                                debug=debug_std)

        vx_precision = self.precision
        vy_precision = self.precision
        result_precision = acc_prec

        # precision for first operand vx which is to be statically
        # positionned
        p = vx_precision.get_mantissa_size()
        # precision for second operand vy which is to be dynamically shifted
        q = vy_precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_vx_precision = ML_StdLogicVectorFormat(
            vx_precision.get_exponent_size())
        exp_vy_precision = ML_StdLogicVectorFormat(
            vy_precision.get_exponent_size())

        mant_vx_precision = ML_StdLogicVectorFormat(p - 1)
        mant_vy_precision = ML_StdLogicVectorFormat(q - 1)

        mant_vx = MantissaExtraction(vx, precision=mant_vx_precision)
        mant_vy = MantissaExtraction(vy, precision=mant_vy_precision)

        exp_vx = ExponentExtraction(vx,
                                    precision=exp_vx_precision,
                                    tag="exp_vx",
                                    debug=debug_dec)
        exp_vy = ExponentExtraction(vy,
                                    precision=exp_vy_precision,
                                    tag="exp_vy",
                                    debug=debug_dec)

        # Maximum number of leading zero for normalized <vx> mantissa
        L_x = 0
        # Maximum number of leading zero for normalized <vy> mantissa
        L_y = 0
        # Maximum number of leading zero for the product of <x>.<y>
        # mantissa.
        L_xy = L_x + L_y + 1

        sign_vx = CopySign(vx, precision=ML_StdLogic)
        sign_vy = CopySign(vy, precision=ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        sign_xy = BitLogicXor(sign_vx,
                              sign_vy,
                              precision=ML_StdLogic,
                              tag="sign_xy",
                              debug=ML_Debug(display_format="-radix 2"))
        effective_op = BitLogicXor(sign_xy,
                                   sign_acc,
                                   precision=ML_StdLogic,
                                   tag="effective_op",
                                   debug=ML_Debug(display_format="-radix 2"))

        exp_vx_bias = vx_precision.get_bias()
        exp_vy_bias = vy_precision.get_bias()

        # <acc> is statically positionned in the datapath,
        # it may even constitute the whole datapath
        #
        # the product is shifted with respect to the fix accumulator

        exp_bias = (exp_vx_bias + exp_vy_bias)

        # because of the mantissa range [1, 2[, the product exponent
        # is located one bit to the right (lower) of the product MSB
        prod_exp_offset = 1

        # Determine a working precision to accomodate exponent difference
        # FIXME: check interval and exponent operations size
        exp_precision_ext_size = max(
            vx_precision.get_exponent_size(),
            vy_precision.get_exponent_size(),
            abs(ceil(log2(abs(acc_msb_index)))),
            abs(ceil(log2(abs(acc_lsb_index)))),
            abs(ceil(log2(abs(exp_bias + prod_exp_offset)))),
        ) + 2
        Log.report(Log.Info,
                   "exp_precision_ext_size={}".format(exp_precision_ext_size))
        exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size)

        # static accumulator exponent
        exp_acc = Constant(acc_msb_index,
                           precision=exp_precision_ext,
                           tag="exp_acc",
                           debug=debug_cst_dec)

        # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + offset
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = Subtraction(
            exp_acc,
            Addition(Addition(zext(
                exp_vy,
                exp_precision_ext_size - vy_precision.get_exponent_size()),
                              zext(
                                  exp_vx, exp_precision_ext_size -
                                  vx_precision.get_exponent_size()),
                              precision=exp_precision_ext),
                     Constant(exp_bias + prod_exp_offset,
                              precision=exp_precision_ext,
                              tag="diff_bias",
                              debug=debug_cst_dec),
                     precision=exp_precision_ext,
                     tag="pre_exp_diff",
                     debug=debug_dec),
            precision=exp_precision_ext,
            tag="exp_diff",
            debug=debug_dec)
        signed_exp_diff = SignCast(exp_diff,
                                   specifier=SignCast.Signed,
                                   precision=exp_precision_ext)
        datapath_full_width = acc_width
        # the maximum exp diff is the size of the datapath
        # minus the bit size of the product
        max_exp_diff = datapath_full_width - (p + q)
        exp_diff_lt_0 = Comparison(signed_exp_diff,
                                   Constant(0, precision=exp_precision_ext),
                                   specifier=Comparison.Less,
                                   precision=ML_Bool,
                                   tag="exp_diff_lt_0",
                                   debug=debug_std)
        exp_diff_gt_max_diff = Comparison(signed_exp_diff,
                                          Constant(
                                              max_exp_diff,
                                              precision=exp_precision_ext),
                                          specifier=Comparison.Greater,
                                          precision=ML_Bool)

        shift_amount_prec = ML_StdLogicVectorFormat(
            int(floor(log2(max_exp_diff)) + 1))

        mant_shift = Select(exp_diff_lt_0,
                            Constant(0, precision=shift_amount_prec),
                            Select(exp_diff_gt_max_diff,
                                   Constant(max_exp_diff,
                                            precision=shift_amount_prec),
                                   Truncate(exp_diff,
                                            precision=shift_amount_prec),
                                   precision=shift_amount_prec),
                            precision=shift_amount_prec,
                            tag="mant_shift",
                            debug=ML_Debug(display_format="-radix 10"))

        prod_prec = ML_StdLogicVectorFormat(p + q)
        prod = Multiplication(mant_vx,
                              mant_vy,
                              precision=prod_prec,
                              tag="prod",
                              debug=debug_std)

        # attempt at pipelining the operator
        # self.implementation.start_new_stage()

        mant_ext_size = datapath_full_width - (p + q)
        shift_prec = ML_StdLogicVectorFormat(datapath_full_width)
        shifted_prod = BitLogicRightShift(rzext(prod, mant_ext_size),
                                          mant_shift,
                                          precision=shift_prec,
                                          tag="shifted_prod",
                                          debug=debug_std)

        ## Inserting a pipeline stage after the product shifting
        if self.pipelined: self.implementation.start_new_stage()

        if self.sign_magnitude:
            # the accumulator is in sign-magnitude representation

            acc_negated = Select(Comparison(sign_xy,
                                            sign_acc,
                                            specifier=Comparison.Equal,
                                            precision=ML_Bool),
                                 acc,
                                 BitLogicNegate(acc, precision=acc_prec),
                                 precision=acc_prec)

            # one extra MSB bit is added to the final addition
            # to detect overflows
            add_width = acc_width + 1
            add_prec = ML_StdLogicVectorFormat(add_width)

            # FIXME: implement with a proper compound adder
            mant_add_p0_ext = Addition(zext(shifted_prod, 1),
                                       zext(acc_negated, 1),
                                       precision=add_prec)
            mant_add_p1_ext = Addition(
                mant_add_p0_ext,
                Constant(1, precision=ML_StdLogic),
                precision=add_prec,
                tag="mant_add",
                debug=ML_Debug(display_format=" -radix 2"))
            # discarding carry overflow bit
            mant_add_p0 = SubSignalSelection(mant_add_p0_ext,
                                             0,
                                             acc_width - 1,
                                             precision=acc_prec)
            mant_add_p1 = SubSignalSelection(mant_add_p1_ext,
                                             0,
                                             acc_width - 1,
                                             precision=acc_prec)

            mant_add_pre_sign = CopySign(mant_add_p1_ext,
                                         precision=ML_StdLogic,
                                         tag="mant_add_pre_sign",
                                         debug=debug_std)
            mant_add = Select(Comparison(sign_xy,
                                         sign_acc,
                                         specifier=Comparison.Equal,
                                         precision=ML_Bool),
                              mant_add_p0,
                              Select(
                                  Comparison(mant_add_pre_sign,
                                             Constant(1,
                                                      precision=ML_StdLogic),
                                             specifier=Comparison.Equal,
                                             precision=ML_Bool),
                                  mant_add_p1,
                                  BitLogicNegate(mant_add_p0,
                                                 precision=acc_prec),
                                  precision=acc_prec,
                              ),
                              precision=acc_prec,
                              tag="mant_add")

            # if both operands had the same sign, then
            # mant_add is necessarily positive and the result
            # sign matches the input sign
            # if both operands had opposite signs, then
            # the result sign matches the product sign
            # if mant_add is positive, else the accumulator sign
            output_sign = Select(
                Comparison(effective_op,
                           Constant(1, precision=ML_StdLogic),
                           specifier=Comparison.Equal,
                           precision=ML_Bool),
                # if the effective op is a subtraction (prod - acc)
                BitLogicXor(sign_acc, mant_add_pre_sign,
                            precision=ML_StdLogic),
                # the effective op is an addition, thus result and
                # acc share sign
                sign_acc,
                precision=ML_StdLogic,
                tag="output_sign")

            if self.pipelined: self.implementation.start_new_stage()

            # adding output
            self.implementation.add_output_signal("vr_sign", output_sign)
            self.implementation.add_output_signal("vr_acc", mant_add)

        else:
            # 2s complement encoding of the accumulator,
            # the accumulator is never negated, only the producted
            # is negated if negative

            # negate shifted prod when required
            shifted_prod_op = Select(Comparison(sign_xy,
                                                Constant(
                                                    1, precision=ML_StdLogic),
                                                specifier=Comparison.Equal,
                                                precision=ML_Bool),
                                     Negation(shifted_prod,
                                              precision=shift_prec),
                                     shifted_prod,
                                     precision=shift_prec)

            add_prec = shift_prec  # ML_StdLogicVectorFormat(datapath_full_width + 1)

            mant_add = Addition(shifted_prod_op,
                                acc,
                                precision=acc_prec,
                                tag="mant_add",
                                debug=ML_Debug(display_format=" -radix 2"))

            if self.pipelined: self.implementation.start_new_stage()

            self.implementation.add_output_signal("vr_acc", mant_add)

        return [self.implementation]

Example #21

Show file

    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme

Example #22

Show file

    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        vx = Abs(vx)
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        # argument reduction
        arg_reg_value = log(2) / 2**index_size
        inv_log2_value = round(1 / arg_reg_value,
                               self.precision.get_sollya_object(), RN)
        inv_log2_cst = Constant(inv_log2_value,
                                precision=self.precision,
                                tag="inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2  for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^21024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision(
        ) - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value,
                              self.precision.get_sollya_object(), RN)
        log2_hi_value_cst = Constant(log2_hi_value,
                                     tag="log2_hi_value",
                                     precision=self.precision)
        log2_lo_value_cst = Constant(log2_lo_value,
                                     tag="log2_lo_value",
                                     precision=self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision)
        k_log2 = Multiplication(k,
                                log2_hi_value_cst,
                                precision=self.precision,
                                exact=True,
                                tag="k_log2",
                                unbreakable=True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag="r", debug=debug_multi)

        r_eval_error = self.get_eval_error(
            r_hi,
            variable_copy_map={
                vx:
                Variable("vx",
                         interval=Interval(0, 715),
                         precision=self.precision),
                k:
                Variable("k",
                         interval=Interval(0, 1024),
                         precision=int_precision)
            })
        print "r_eval_error: ", r_eval_error

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(
            guessdegree(exp(sollya.x), approx_interval, error_goal_approx))
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k,
                               precision=int_precision,
                               tag="k_integer",
                               debug=debug_multi)
        k_hi = BitLogicRightShift(k_integer,
                                  Constant(index_size),
                                  tag="k_int_hi",
                                  precision=int_precision,
                                  debug=debug_multi)
        k_lo = Modulo(k_integer,
                      2**index_size,
                      tag="k_int_lo",
                      precision=int_precision,
                      debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp_table = ML_Table(dimensions=[2 * 2**index_size, 4],
                             storage_precision=self.precision,
                             tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value = sollya.SollyaObject(2)**((input_value) *
                                                 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value) *
                                                  2**-index_size)
            pos_value_hi = round(exp_value, self.precision.get_sollya_object(),
                                 RN)
            pos_value_lo = round(exp_value - pos_value_hi,
                                 self.precision.get_sollya_object(), RN)
            neg_value_hi = round(mexp_value,
                                 self.precision.get_sollya_object(), RN)
            neg_value_lo = round(mexp_value - neg_value_hi,
                                 self.precision.get_sollya_object(), RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # cosh(x) = 1/2 * (exp(x) + exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        #
        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            exp(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print "poly_approx_error: ", poly_approx_error, float(
            log2(poly_approx_error))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            r,
            unified_precision=self.precision)
        poly_pos.set_attributes(tag="poly_pos", debug=debug_multi)

        poly_neg = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            -r,
            unified_precision=self.precision)
        poly_neg.set_attributes(tag="poly_neg", debug=debug_multi)

        table_index = Addition(k_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        neg_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      0,
                                      tag="neg_value_load_hi",
                                      debug=debug_multi)
        neg_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      1,
                                      tag="neg_value_load_lo",
                                      debug=debug_multi)
        pos_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      2,
                                      tag="pos_value_load_hi",
                                      debug=debug_multi)
        pos_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      3,
                                      tag="pos_value_load_lo",
                                      debug=debug_multi)

        k_plus = Max(
            Subtraction(k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_plus",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))
        k_neg = Max(
            Subtraction(-k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_neg",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))

        pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision)
        pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision)

        pos_exp = (
            pos_value_load_hi +
            (pos_value_load_hi * poly_pos +
             (pos_value_load_lo + pos_value_load_lo * poly_pos))) * pow_exp_pos
        pos_exp.set_attributes(tag="pos_exp", debug=debug_multi)

        neg_exp = (
            neg_value_load_hi +
            (neg_value_load_hi * poly_neg +
             (neg_value_load_lo + neg_value_load_lo * poly_neg))) * pow_exp_neg
        neg_exp.set_attributes(tag="neg_exp", debug=debug_multi)

        result = Addition(pos_exp,
                          neg_exp,
                          precision=self.precision,
                          tag="result",
                          debug=debug_multi)

        # ov_value
        ov_value = round(acosh(self.precision.get_max_value()),
                         self.precision.get_sollya_object(), RD)
        ov_flag = Comparison(Abs(vx),
                             Constant(ov_value, precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme