Exemple #1
0
 def __init__(self, inf_bound, sup_bound):
     self.inf_bound = inf_bound
     self.sup_bound = sup_bound
     self.zero_in_interval = 0 in sollya.Interval(
         inf_bound, sup_bound)
     self.min_exp = None if self.zero_in_interval else min(
         sollya.ceil(sollya.log2(abs(inf_bound))),
         sollya.ceil(sollya.log2(abs(sup_bound))))
     self.max_exp = max(sollya.ceil(sollya.log2(abs(inf_bound))),
                        sollya.ceil(sollya.log2(abs(sup_bound))))
Exemple #2
0
def solve_format_Constant(optree):
    """ Legalize Constant node """
    assert isinstance(optree, Constant)
    value = optree.get_value()
    if FP_SpecialValue.is_special_value(value):
        return optree.get_precision()
    elif not optree.get_precision() is None:
        # if precision is already set (manually forced), returns it
        return optree.get_precision()
    else:
        # fixed-point format solving
        frac_size = -1
        FRAC_THRESHOLD = 100 # maximum number of frac bit to be tested
        # TODO: fix
        for i in range(FRAC_THRESHOLD):
            if int(value*2**i) == value * 2**i:
                frac_size = i
                break
        if frac_size < 0:
            Log.report(Log.Error, "value {} is not an integer, from node:\n{}", value, optree)
        abs_value = abs(value)
        signed = value < 0
        # int_size = max(int(sollya.ceil(sollya.log2(abs_value+2**frac_size))), 0) + (1 if signed else 0)
        int_size = max(int(sollya.ceil(sollya.log2(abs_value + 1))), 0) + (1 if signed else 0)
        if frac_size == 0 and int_size == 0:
            int_size = 1
        return fixed_point(int_size, frac_size, signed=signed)
Exemple #3
0
 def round_sollya_object(self, value, round_mode=sollya.RN):
     rnd_function = {
         sollya.RN: sollya.nearestint,
         sollya.RD: sollya.floor,
         sollya.RU: sollya.ceil,
         sollya.RZ: lambda x: sollya.floor(x) if x > 0 \
                    else sollya.ceil(x)
     }[round_mode]
     scale_factor = S2**self.get_frac_size()
     return rnd_function(scale_factor * value) / scale_factor
Exemple #4
0
 def __init__(self, low_exp_value, max_exp_value, field_bits, precision):
     self.field_bits = field_bits
     self.low_exp_value = low_exp_value
     self.max_exp_value = max_exp_value
     exp_bits = int(
         sollya.ceil(sollya.log2(max_exp_value - low_exp_value + 1)))
     assert exp_bits >= 0 and field_bits >= 0 and (exp_bits +
                                                   field_bits) > 0
     self.exp_bits = exp_bits
     self.split_num = (self.max_exp_value - self.low_exp_value +
                       1) * 2**(self.field_bits)
     Log.report(Log.Debug, "split_num={}", self.split_num)
     self.precision = precision
Exemple #5
0
def solve_format_Constant(optree):
    """ Legalize Constant node """
    assert isinstance(optree, Constant)
    value = optree.get_value()
    if FP_SpecialValue.is_special_value(value):
        return optree.get_precision()
    elif not optree.get_precision() is None:
        # if precision is already set (manually forced), returns it
        return optree.get_precision()
    else:
        # fixed-point format solving
        assert int(value) == value
        abs_value = abs(value)
        signed = value < 0
        int_size = max(int(sollya.ceil(sollya.log2(abs_value + 1))),
                       0) + (1 if signed else 0)
        frac_size = 0
        if frac_size == 0 and int_size == 0:
            int_size = 1
        return fixed_point(int_size, frac_size, signed=signed)
Exemple #6
0
 def computeNeededVariableFormat(self, I, epsTarget, variableFormat):
     if epsTarget > 0:
         # TODO: fix to support ML_Binary32
         if epsTarget >= self.MIN_LIMB_ERROR or variableFormat.mp_node.precision is self.limb_format:
             # FIXME: default to minimal precision (self.limb_format)
             return variableFormat
         else:
             target_accuracy = sollya.ceil(-sollya.log2(epsTarget))
             target_format = self.get_format_from_accuracy(
                 target_accuracy,
                 eps_target=epsTarget,
                 interval=variableFormat.mp_node.interval)
             if target_format.mp_node.precision.get_bit_size(
             ) < variableFormat.mp_node.precision.get_bit_size():
                 return target_format
             else:
                 # if variableFormat is smaller (less bits) and more accurate
                 # then we use it
                 return variableFormat
     else:
         return variableFormat
Exemple #7
0
def determine_minimal_fixed_format_cst(value):
    """ determine the minimal size format which can encode
        exactly the constant value value """
    # fixed-point format solving
    frac_size = -1
    FRAC_THRESHOLD = 100  # maximum number of frac bit to be tested
    # TODO: fix
    for i in range(FRAC_THRESHOLD):
        if int(value * 2**i) == value * 2**i:
            frac_size = i
            break
    if frac_size < 0:
        Log.report(Log.Error, "value {} is not an integer, from node:\n{}",
                   value, optree)
    abs_value = abs(value)
    signed = value < 0
    # int_size = max(int(sollya.ceil(sollya.log2(abs_value+2**frac_size))), 0) + (1 if signed else 0)
    int_size = max(int(sollya.ceil(sollya.log2(abs_value + 1))),
                   0) + (1 if signed else 0)
    if frac_size == 0 and int_size == 0:
        int_size = 1
    return fixed_point(int_size, frac_size, signed=signed)
Exemple #8
0
    def generate_scheme(self):
        """ main scheme generation """

        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        expected_interval = {}

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        x_interval = Interval(-10.3, 10.7)
        var_x.set_interval(x_interval)
        expected_interval[var_x] = x_interval

        var_y = self.implementation.add_input_signal("y", input_precision)
        y_interval = Interval(-17.9, 17.2)
        var_y.set_interval(y_interval)
        expected_interval[var_y] = y_interval

        var_z = self.implementation.add_input_signal("z", input_precision)
        z_interval = Interval(-7.3, 7.7)
        var_z.set_interval(z_interval)
        expected_interval[var_z] = z_interval

        cst = Constant(42.5, tag="cst")
        expected_interval[cst] = Interval(42.5)

        conv_ceil = Ceil(var_x, tag="ceil")
        expected_interval[conv_ceil] = sollya.ceil(x_interval)

        conv_floor = Floor(var_y, tag="floor")
        expected_interval[conv_floor] = sollya.floor(y_interval)

        mult = var_z * var_x
        mult.set_tag("mult")
        mult_interval = z_interval * x_interval
        expected_interval[mult] = mult_interval

        large_add = (var_x + var_y) - mult
        large_add.set_attributes(tag="large_add")
        large_add_interval = (x_interval + y_interval) - mult_interval
        expected_interval[large_add] = large_add_interval

        reduced_result = Max(0, Min(large_add, 13))
        reduced_result.set_tag("reduced_result")
        reduced_result_interval = interval_max(
            Interval(0), interval_min(large_add_interval, Interval(13)))
        expected_interval[reduced_result] = reduced_result_interval

        select_result = Select(var_x > var_y,
                               reduced_result,
                               var_z,
                               tag="select_result")
        select_interval = interval_union(reduced_result_interval, z_interval)
        expected_interval[select_result] = select_interval

        # checking interval evaluation
        for var in [
                cst, var_x, var_y, mult, large_add, reduced_result,
                select_result, conv_ceil, conv_floor
        ]:
            interval = evaluate_range(var)
            expected = expected_interval[var]
            print("{}: {} vs expected {}".format(var.get_tag(), interval,
                                                 expected))
            assert not interval is None
            assert interval == expected

        return [self.implementation]
Exemple #9
0
  def generate_argument_reduction(self, memory_limit):
    best_arg_reduc = None

    best_arg_reduc = self.eval_argument_reduction(6,10,12,13)
    best_arg_reduc['sizeof_tables'] = best_arg_reduc['sizeof_table1'] + best_arg_reduc['sizeof_table2']
    best_arg_reduc['degree_poly1'] = 4
    best_arg_reduc['degree_poly2'] = 8
    return best_arg_reduc
    # iterate through all possible parameters, and return the best argument reduction
    # the order of importance of the caracteristics of a good argument reduction is:
    #   1- the argument reduction is valid
    #   2- the degree of the polynomials obtains are minimals
    #   3- the memory used is minimal
    # An arument reduction is valid iff:
    #   - the memory used is less than memory_limit
    #   - y-1 and z-1  fit into a uint64_t
    #   - the second argument reduction should usefull (ie: it should add at least 1 bit to the argument reduction)
    # From thoses validity constraint we deduce some bound on the parameters to reduce the space of value searched:
    # (note that thoses bound are implied by, but not equivalents to the constraints)
    #   size1 <= log2(memory_limit/17)                                       (memory_limit on the first table)
    #   prec1 < 13 + size1                                                   (y-1 fits into a uint64_t)
    #   size2 <= log2((memory_limit - sizeof_table1)/17/midinterval)          (memory_limit on both tables)
    #   size2 >= 1 - log2(midinterval)                                       (second arg red should be usefull)
    #   prec2 < 12 - prec1 - log2((y-y1)/y1),  for all possible y            (z-1 fits into a uint64_t)
    # note: it is hard to deduce a tight bound on prec2 from the last inequality
    # a good approximation is  size2 ~= max[for y]( - log2((y-y1)/y1)), but using it may eliminate valid arg reduc

    #self.eval_argument_reduction(12, 20, 22, 14)

    min_size1 = 1
    max_size1 = floor(log(memory_limit/17)/log(2)).getConstantAsInt()
    for size1 in xrange(max_size1, min_size1-1, -1):
      
      min_prec1 = size1
      max_prec1 = 12 + size1
      for prec1 in xrange(min_prec1,max_prec1+1):
        
        # we need sizeof_table1 and mid_interval for the bound on size2 and prec2
        first_arg_reduc = self.eval_argument_reduction(size1, prec1, prec1, prec1)
        mid_interval = first_arg_reduc['mid_interval']
        sizeof_table1 = first_arg_reduc['sizeof_table1']

        if not(0 <= inf(mid_interval) and sup(mid_interval) < S2**(64 - 52 - prec1)):
          continue
        if not(first_arg_reduc['sizeof_table1'] < memory_limit):
          continue
        
        min_size2 = 1 - ceil(log(sup(mid_interval))/log(2)).getConstantAsInt()
        max_size2 = floor(log((memory_limit - sizeof_table1)/(17 * sup(mid_interval)))/log(2)).getConstantAsInt()
        # during execution of the prec2 loop, it can reduces the interval of valid values for prec2
        # so min_prec2 and max_prec2 are setted here and not before the the prec2 loop
        # (because they are modified inside the body of the loop, for the next iteration of size2)
        min_prec2 = 0
        max_prec2 = 12 + max_size2 - prec1
        for size2 in xrange(max_size2,min_size2-1,-1):
          
          max_prec2 = min(max_prec2, 12 + size2 - prec1)
          for prec2 in xrange(max_prec2,min_prec2-1,-1):
            
            #print '=====\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{})\t====='.format(size1,min_size1,max_size1,prec1,min_prec1,max_prec1,size2,min_size2,max_size2,prec2,min_prec2,max_prec2)
            #print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss #memory used by the programm

            arg_reduc = self.eval_argument_reduction(size1, prec1, size2, prec2)
            mid_interval = arg_reduc['mid_interval']
            out_interval = arg_reduc['out_interval']
            sizeof_tables = arg_reduc['sizeof_table1'] + arg_reduc['sizeof_table2']
            if not(0 <= inf(out_interval) and sup(out_interval) < S2**(64-52-prec1-prec2)):
              max_prec2 = prec2 - 1
              continue
            if memory_limit < sizeof_tables:
              continue
            #assert(prec2 < 12 + size2 - prec1) # test the approximation size2 ~= max[for y]( - log2((y-y1)/y1))

            # guess the degree of the two polynomials (relative error <= 2^-52 and absolute error <= 2^-120)
            # note: we exclude zero from out_interval to not perturb sollya (log(1+x)/x is not well defined on 0)
            sollya_out_interval = Interval(S2**(-52-prec1-prec2), sup(out_interval))
            guess_degree_poly1 = guessdegree(log(1+sollya.x)/sollya.x, sollya_out_interval, S2**-52)
            guess_degree_poly2 = guessdegree(log(1+sollya.x), sollya_out_interval, S2**-120)
            # TODO: detect when guessdegree return multiple possible degree, and find the right one
            if False and inf(guess_degree_poly1) <> sup(guess_degree_poly1):
              print "improvable guess_degree_poly1:", guess_degree_poly1
            if False and inf(guess_degree_poly2) <> sup(guess_degree_poly2):
              print "improvable guess_degree_poly2:", guess_degree_poly2
            degree_poly1 = sup(guess_degree_poly1).getConstantAsInt() + 1
            degree_poly2 = sup(guess_degree_poly2).getConstantAsInt()
            
            if ((best_arg_reduc is not None)
            and (best_arg_reduc['degree_poly1'] < degree_poly1 or best_arg_reduc['degree_poly2'] < degree_poly2)):
              min_prec2 = prec2 + 1
              break

            if ((best_arg_reduc is None)
             or (best_arg_reduc['degree_poly1'] > degree_poly1)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] > degree_poly2)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] == degree_poly2 and best_arg_reduc['sizeof_tables'] > sizeof_tables)):
              arg_reduc['degree_poly1'] = degree_poly1
              arg_reduc['degree_poly2'] = degree_poly2
              arg_reduc['sizeof_tables'] = sizeof_tables
              best_arg_reduc = arg_reduc
              #print "\n   --new best--  \n", arg_reduc, "\n"
    #print "\nBest arg reduc: \n", best_arg_reduc, "\n"
    return best_arg_reduc
Exemple #10
0
    def generate_scalar_scheme(self, vx):
        Log.set_dump_stdout(True)

        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
                Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        index_size = 5

        comp_lo = (vx < 0)
        comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool)
        sign = Select(comp_lo, -1, 1, precision = self.precision)

        # as sinh is an odd function, we can simplify the input to its absolute
        # value once the sign has been extracted
        vx = Abs(vx)
        int_precision = self.precision.get_integer_format()

        # argument reduction
        arg_reg_value = log(2)/2**index_size
        inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN)
        inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2    for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx    = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN)
        log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision)
        log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision)
        k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag = "r", debug = debug_multi)

        if is_gappa_installed():
                r_eval_error = self.get_eval_error(r_hi, variable_copy_map =
                    {
                        vx: Variable("vx", interval = Interval(0, 715), precision = self.precision),
                        k: Variable("k", interval = Interval(0, 1024), precision = self.precision)
                    })
                Log.report(Log.Verbose, "r_eval_error: ", r_eval_error)

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi)
        k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi)
        k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi)

        exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i

            reduced_hi_prec = int(self.precision.get_mantissa_size() - 8)
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value    = sollya.SollyaObject(2)**((input_value)* 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size)
            pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN)
            pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN)
            neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN)
            neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # sinh(x) = 1/2 * (exp(x) - exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value)
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        # S=2^(h-1), T = 2^(-h-1)
        # exp(r)    = 1 + poly_pos(r)
        # exp(-r) = 1 + poly_neg(r)
        # 2^(l / 2^index_size)    = pos_value_hi + pos_value_lo
        # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo
        #

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function)

        Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error))))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision)
        poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi)

        poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision)
        poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi)

        table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi)

        neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi)
        neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi)
        pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi)
        pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi)

        k_plus = Max(
            Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))
        k_neg = Max(
            Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))

        # 2^(h-1)
        pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi)
        # 2^(-h-1)
        pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi)

        hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg)
        hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi)


        pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos
        pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi)

        neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg
        neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi)

        result = Addition(
            Subtraction(
                pos_exp,
                neg_exp,
                precision=self.precision,
            ),
            hi_terms,
            precision=self.precision,
            tag="result",
            debug=debug_multi
        )

        # ov_value
        ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD)
        ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater)

        # main scheme
        scheme = Statement(
            Return(
                Select(
                    ov_flag,
                    sign*FP_PlusInfty(self.precision),
                    sign*result
                )))

        return scheme
Exemple #11
0
def generate_payne_hanek(vx,
                         frac_pi,
                         precision,
                         n=100,
                         k=4,
                         chunk_num=None,
                         debug=False):
    """ generate payne and hanek argument reduction for frac_pi * variable """

    sollya.roundingwarnings = sollya.off
    debug_precision = debug_multi
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[precision]

    p = precision.get_field_size()

    # weight of the most significant digit of the constant
    cst_msb = floor(log2(abs(frac_pi)))
    # length of exponent range which must be covered by the approximation
    # of the constant
    cst_exp_range = cst_msb - precision.get_emin_subnormal() + 1

    # chunk size has to be so than multiplication by a splitted <v>
    # (vx_hi or vx_lo) is exact
    chunk_size = precision.get_field_size() / 2 - 2
    chunk_number = int(ceil((cst_exp_range + chunk_size - 1) / chunk_size))
    scaling_factor = S2**-(chunk_size / 2)

    chunk_size_cst = Constant(chunk_size, precision=ML_Int32)
    cst_msb_node = Constant(cst_msb, precision=ML_Int32)

    # Saving sollya's global precision
    old_global_prec = sollya.settings.prec
    sollya.settings.prec(cst_exp_range + n)

    # table to store chunk of constant multiplicand
    cst_table = ML_NewTable(dimensions=[chunk_number, 1],
                            storage_precision=precision,
                            tag="PH_cst_table")
    # table to store sqrt(scaling_factor) corresponding to the
    # cst multiplicand chunks
    scale_table = ML_NewTable(dimensions=[chunk_number, 1],
                              storage_precision=precision,
                              tag="PH_scale_table")
    tmp_cst = frac_pi

    # cst_table stores normalized constant chunks (they have been
    # scale back to close to 1.0 interval)
    #
    # scale_table stores the scaling factors corresponding to the
    # denormalization of cst_table coefficients

    # this loop divide the digits of frac_pi into chunks
    # the chunk lsb weight is given by a shift from
    # cst_msb, multiple of the chunk index
    for i in range(chunk_number):
        value_div_factor = S2**(chunk_size * (i + 1) - cst_msb)
        local_cst = int(tmp_cst * value_div_factor) / value_div_factor
        local_scale = (scaling_factor**i)
        # storing scaled constant chunks
        cst_table[i][0] = local_cst / (local_scale**2)
        scale_table[i][0] = local_scale
        # Updating constant value
        tmp_cst = tmp_cst - local_cst

    # Computing which part of the constant we do not need to multiply
    # In the following comments, vi represents the bit of frac_pi of weight 2**-i

    # Bits vi so that i <= (vx_exp - p + 1 -k)  are not needed, because they result
    # in a multiple of 2pi and do not contribute to trig functions.

    vx_exp = ExponentExtraction(
        vx, precision=vx.get_precision().get_integer_format())
    vx_exp = Conversion(vx_exp, precision=ML_Int32)

    msb_exp = -(vx_exp - p + 1 - k)
    msb_exp.set_attributes(tag="msb_exp", debug=debug_multi)
    msb_exp = Conversion(msb_exp, precision=ML_Int32)

    # Select the highest index where the reduction should start
    msb_index = Select(cst_msb_node < msb_exp, 0,
                       (cst_msb_node - msb_exp) / chunk_size_cst)
    msb_index.set_attributes(tag="msb_index", debug=debug_multi)

    # For a desired accuracy of 2**-n, bits vi so that i >= (vx_exp + n + 4)  are not needed, because they contribute less than
    # 2**-n to the result

    lsb_exp = -(vx_exp + n + 4)
    lsb_exp.set_attributes(tag="lsb_exp", debug=debug_multi)
    lsb_exp = Conversion(lsb_exp, precision=ML_Int32)

    # Index of the corresponding chunk
    lsb_index = (cst_msb_node - lsb_exp) / chunk_size_cst
    lsb_index.set_attributes(tag="lsb_index", debug=debug_multi)

    # Splitting vx
    half_size = precision.get_field_size() / 2 + 1

    # hi part (most significant digit) of vx input
    vx_hi = TypeCast(BitLogicAnd(
        TypeCast(vx, precision=int_precision),
        Constant(~int(2**half_size - 1), precision=int_precision)),
                     precision=precision)
    vx_hi.set_attributes(tag="vx_hi_ph")  #, debug = debug_multi)

    vx_lo = vx - vx_hi
    vx_lo.set_attributes(tag="vx_lo_ph")  #, debug = debug_multi)

    # loop iterator variable
    vi = Variable("i", precision=ML_Int32, var_type=Variable.Local)
    # step scaling factor
    half_scaling = Constant(S2**(-chunk_size / 2), precision=precision)

    i1 = Constant(1, precision=ML_Int32)

    # accumulator to the output precision
    acc = Variable("acc", precision=precision, var_type=Variable.Local)
    # integer accumulator
    acc_int = Variable("acc_int",
                       precision=int_precision,
                       var_type=Variable.Local)

    init_loop = Statement(
        vx_hi,
        vx_lo,
        ReferenceAssign(vi, msb_index),
        ReferenceAssign(acc, Constant(0, precision=precision)),
        ReferenceAssign(acc_int, Constant(0, precision=int_precision)),
    )

    cst_load = TableLoad(cst_table,
                         vi,
                         0,
                         tag="cst_load",
                         debug=debug_precision)
    sca_load = TableLoad(scale_table,
                         vi,
                         0,
                         tag="sca_load",
                         debug=debug_precision)
    # loop body
    # hi_mult = vx_hi * <scale_factor> * <cst>
    hi_mult = (vx_hi * sca_load) * (cst_load * sca_load)
    hi_mult.set_attributes(tag="hi_mult", debug=debug_precision)
    pre_hi_mult_int = NearestInteger(hi_mult,
                                     precision=int_precision,
                                     tag="hi_mult_int",
                                     debug=(debuglld if debug else None))
    hi_mult_int_f = Conversion(pre_hi_mult_int,
                               precision=precision,
                               tag="hi_mult_int_f",
                               debug=debug_precision)
    pre_hi_mult_red = (hi_mult - hi_mult_int_f).modify_attributes(
        tag="hi_mult_red", debug=debug_precision)

    # for the first chunks (vx_hi * <constant chunk>) exceeds 2**k+1 and may be
    # discard (whereas it may lead to overflow during integer conversion
    pre_exclude_hi = ((cst_msb_node - (vi + i1) * chunk_size + i1) +
                      (vx_exp + Constant(-half_size + 1, precision=ML_Int32))
                      ).modify_attributes(tag="pre_exclude_hi",
                                          debug=(debugd if debug else None))
    pre_exclude_hi.propagate_precision(ML_Int32,
                                       [cst_msb_node, vi, vx_exp, i1])
    Ck = Constant(k, precision=ML_Int32)
    exclude_hi = pre_exclude_hi <= Ck
    exclude_hi.set_attributes(tag="exclude_hi", debug=debug_multi)

    hi_mult_red = Select(exclude_hi, pre_hi_mult_red,
                         Constant(0, precision=precision))
    hi_mult_int = Select(exclude_hi, pre_hi_mult_int,
                         Constant(0, precision=int_precision))

    # lo part of the chunk reduction
    lo_mult = (vx_lo * sca_load) * (cst_load * sca_load)
    lo_mult.set_attributes(tag="lo_mult")  #, debug = debug_multi)
    lo_mult_int = NearestInteger(lo_mult,
                                 precision=int_precision,
                                 tag="lo_mult_int")  #, debug = debug_multi
    lo_mult_int_f = Conversion(lo_mult_int,
                               precision=precision,
                               tag="lo_mult_int_f")  #, debug = debug_multi)
    lo_mult_red = (lo_mult - lo_mult_int_f).modify_attributes(
        tag="lo_mult_red")  #, debug = debug_multi)

    # accumulating fractional part
    acc_expr = (acc + hi_mult_red) + lo_mult_red
    # accumulating integer part
    int_expr = ((acc_int + hi_mult_int) + lo_mult_int) % 2**(k + 1)

    CF1 = Constant(1, precision=precision)
    CI1 = Constant(1, precision=int_precision)

    # extracting exceeding integer part in fractionnal accumulator
    acc_expr_int = NearestInteger(acc_expr, precision=int_precision)
    # normalizing integer and fractionnal accumulator by subtracting then
    # adding exceeding integer part
    normalization = Statement(
        ReferenceAssign(
            acc, acc_expr - Conversion(acc_expr_int, precision=precision)),
        ReferenceAssign(acc_int, int_expr + acc_expr_int),
    )

    acc_expr.set_attributes(tag="acc_expr")  #, debug = debug_multi)
    int_expr.set_attributes(tag="int_expr")  #, debug = debug_multi)

    red_loop = Loop(
        init_loop, vi <= lsb_index,
        Statement(acc_expr, int_expr, normalization,
                  ReferenceAssign(vi, vi + 1)))

    result = Statement(lsb_index, msb_index, red_loop)

    # restoring sollya's global precision
    sollya.settings.prec = old_global_prec

    return result, acc, acc_int
Exemple #12
0
def generate_payne_hanek(vx,
                         frac_pi,
                         precision,
                         n=100,
                         k=4,
                         chunk_num=None,
                         debug=False):
    """ generate payne and hanek argument reduction for frac_pi * variable """
    # determining integer format corresponding to
    # floating point precision argument
    int_precision = {ML_Binary64: ML_Int64, ML_Binary32: ML_Int32}[precision]

    cst_msb = floor(log2(abs(frac_pi)))
    cst_exp_range = cst_msb - precision.get_emin_subnormal() + 1

    # chunk size has to be so than multiplication by a splitted <v> (vx_hi or vx_lo)
    # is exact
    chunk_size = 20  # precision.get_field_size() / 2 - 2
    chunk_number = int(ceil((cst_exp_range + chunk_size - 1) / chunk_size))
    scaling_factor = S2**-(chunk_size / 2)

    chunk_size_cst = Constant(chunk_size, precision=ML_Int32)
    cst_msb_node = Constant(cst_msb, precision=ML_Int32)

    p = precision.get_field_size()

    # adapting debug format to precision argument
    debug_precision = {
        ML_Binary32: debug_ftox,
        ML_Binary64: debug_lftolx
    }[precision] if debug else None

    # saving sollya's global precision
    old_global_prec = get_prec()
    prec(cst_exp_range + 100)

    # table to store chunk of constant multiplicand
    cst_table = ML_Table(dimensions=[chunk_number, 1],
                         storage_precision=precision,
                         tag="PH_cst_table")
    # table to store sqrt(scaling_factor) corresponding to the cst multiplicand chunks
    scale_table = ML_Table(dimensions=[chunk_number, 1],
                           storage_precision=precision,
                           tag="PH_scale_table")
    tmp_cst = frac_pi

    # this loop divide the digits of frac_pi into chunks
    # the chunk lsb weight is given by a shift from
    # cst_msb, multiple of the chunk index
    for i in xrange(chunk_number):
        value_div_factor = S2**(chunk_size * (i + 1) - cst_msb)
        local_cst = int(tmp_cst * value_div_factor) / value_div_factor
        local_scale = (scaling_factor**i)
        # storing scaled constant chunks
        cst_table[i][0] = local_cst / (local_scale**2)
        scale_table[i][0] = local_scale
        tmp_cst = tmp_cst - local_cst

    vx_exp = ExponentExtraction(vx)
    msb_exp = -vx_exp + p - 1 + k
    msb_exp.set_attributes(tag="msb_exp", debug=(debugd if debug else None))

    msb_index = Select(cst_msb_node < msb_exp, 0,
                       (cst_msb_node - msb_exp) / chunk_size_cst)
    msb_index.set_attributes(tag="msb_index",
                             debug=(debugd if debug else None))

    lsb_exp = -vx_exp + p - 1 - n
    lsb_exp.set_attributes(tag="lsb_exp", debug=(debugd if debug else None))

    lsb_index = (cst_msb_node - lsb_exp) / chunk_size_cst
    lsb_index.set_attributes(tag="lsb_index",
                             debug=(debugd if debug else None))

    half_size = precision.get_field_size() / 2 + 1

    vx_hi = TypeCast(BitLogicAnd(
        TypeCast(vx, precision=ML_Int64),
        Constant(~(2**half_size - 1), precision=ML_Int64)),
                     precision=precision)
    vx_hi.set_attributes(tag="vx_hi", debug=debug_precision)

    vx_lo = vx - vx_hi
    vx_lo.set_attributes(tag="vx_lo", debug=debug_precision)

    vi = Variable("i", precision=ML_Int32, var_type=Variable.Local)

    half_scaling = Constant(S2**(-chunk_size / 2), precision=precision)

    i1 = Constant(1, precision=ML_Int32)

    acc = Variable("acc", precision=precision, var_type=Variable.Local)
    acc_int = Variable("acc_int",
                       precision=int_precision,
                       var_type=Variable.Local)

    init_loop = Statement(
        vx_hi,
        vx_lo,
        ReferenceAssign(vi, msb_index),
        ReferenceAssign(acc, Constant(0, precision=precision)),
        ReferenceAssign(acc_int, Constant(0, precision=precision)),
    )

    cst_load = TableLoad(cst_table,
                         vi,
                         0,
                         tag="cst_load",
                         debug=debug_precision)
    sca_load = TableLoad(scale_table,
                         vi,
                         0,
                         tag="sca_load",
                         debug=debug_precision)

    hi_mult = (vx_hi * sca_load) * (cst_load * sca_load)
    hi_mult.set_attributes(tag="hi_mult", debug=debug_precision)
    pre_hi_mult_int = NearestInteger(hi_mult,
                                     precision=int_precision,
                                     tag="hi_mult_int",
                                     debug=(debuglld if debug else None))
    hi_mult_int_f = Conversion(pre_hi_mult_int,
                               precision=precision,
                               tag="hi_mult_int_f",
                               debug=debug_precision)
    pre_hi_mult_red = (hi_mult - hi_mult_int_f).modify_attributes(
        tag="hi_mult_red", debug=debug_precision)

    # for the first chunks (vx_hi * <constant chunk>) exceeds 2**k+1 and may be
    # discard (whereas it may lead to overflow during integer conversion
    pre_exclude_hi = ((cst_msb_node - (vi + i1) * chunk_size + i1) +
                      (vx_exp + Constant(-half_size + 1, precision=ML_Int32))
                      ).modify_attributes(tag="pre_exclude_hi",
                                          debug=(debugd if debug else None))
    pre_exclude_hi.propagate_precision(ML_Int32,
                                       [cst_msb_node, vi, vx_exp, i1])
    Ck = Constant(k, precision=ML_Int32)
    exclude_hi = pre_exclude_hi <= Ck
    exclude_hi.set_attributes(tag="exclude_hi",
                              debug=(debugd if debug else None))

    hi_mult_red = Select(exclude_hi, pre_hi_mult_red,
                         Constant(0, precision=precision))
    hi_mult_int = Select(exclude_hi, pre_hi_mult_int,
                         Constant(0, precision=int_precision))

    lo_mult = (vx_lo * sca_load) * (cst_load * sca_load)
    lo_mult.set_attributes(tag="lo_mult", debug=debug_precision)
    lo_mult_int = NearestInteger(lo_mult,
                                 precision=int_precision,
                                 tag="lo_mult_int",
                                 debug=(debuglld if debug else None))
    lo_mult_int_f = Conversion(lo_mult_int,
                               precision=precision,
                               tag="lo_mult_int_f",
                               debug=debug_precision)
    lo_mult_red = (lo_mult - lo_mult_int_f).modify_attributes(
        tag="lo_mult_red", debug=debug_precision)

    acc_expr = (acc + hi_mult_red) + lo_mult_red
    int_expr = ((acc_int + hi_mult_int) + lo_mult_int) % 2**(k + 1)

    CF1 = Constant(1, precision=precision)
    CI1 = Constant(1, precision=int_precision)

    acc_expr_int = NearestInteger(acc_expr, precision=int_precision)

    normalization = Statement(
        ReferenceAssign(
            acc, acc_expr - Conversion(acc_expr_int, precision=precision)),
        ReferenceAssign(acc_int, int_expr + acc_expr_int),
    )

    acc_expr.set_attributes(tag="acc_expr", debug=debug_precision)
    int_expr.set_attributes(tag="int_expr",
                            debug=(debuglld if debug else None))

    red_loop = Loop(
        init_loop,
        vi <= lsb_index,
        Statement(
            acc_expr,
            int_expr,
            normalization,
            #ReferenceAssign(acc, acc_expr),
            #ReferenceAssign(acc_int, int_expr),
            ReferenceAssign(vi, vi + 1)))
    result = Statement(lsb_index, msb_index, red_loop)

    # restoring sollya's global precision
    prec(old_global_prec)

    return result, acc, acc_int
Exemple #13
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "target: %s " % self.processor.target_name)

        # display parameter information
        Log.report(Log.Info, "accuracy      : %s " % self.accuracy)
        Log.report(Log.Info, "input interval: %s " % self.input_interval)

        accuracy_goal = self.accuracy.get_goal()
        Log.report(Log.Info, "accuracy_goal=%f" % accuracy_goal)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_interval))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_NewTable(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in range(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.io_precisions[0]
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(x), [0, 2], [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.io_precisions[0])

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme
Exemple #14
0
    def generate_scheme(self):
        """ main scheme generation """

        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        expected_interval = {}

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        x_interval = Interval(-10.3,10.7)
        var_x.set_interval(x_interval)
        expected_interval[var_x] = x_interval

        var_y = self.implementation.add_input_signal("y", input_precision)
        y_interval = Interval(-17.9,17.2)
        var_y.set_interval(y_interval)
        expected_interval[var_y] = y_interval

        var_z = self.implementation.add_input_signal("z", input_precision)
        z_interval = Interval(-7.3,7.7)
        var_z.set_interval(z_interval)
        expected_interval[var_z] = z_interval

        cst = Constant(42.5, tag = "cst")
        expected_interval[cst] = Interval(42.5)

        conv_ceil = Ceil(var_x, tag = "ceil")
        expected_interval[conv_ceil] = sollya.ceil(x_interval)

        conv_floor = Floor(var_y, tag = "floor")
        expected_interval[conv_floor] = sollya.floor(y_interval)

        mult = var_z * var_x
        mult.set_tag("mult")
        mult_interval = z_interval * x_interval
        expected_interval[mult] = mult_interval

        large_add = (var_x + var_y) - mult
        large_add.set_attributes(tag = "large_add")
        large_add_interval = (x_interval + y_interval) - mult_interval
        expected_interval[large_add] = large_add_interval

        var_x_lzc = CountLeadingZeros(var_x, tag="var_x_lzc")
        expected_interval[var_x_lzc] = Interval(0, input_precision.get_bit_size())

        reduced_result = Max(0, Min(large_add, 13))
        reduced_result.set_tag("reduced_result")
        reduced_result_interval = interval_max(
            Interval(0),
            interval_min(
                large_add_interval,
                Interval(13)
            )
        )
        expected_interval[reduced_result] = reduced_result_interval

        select_result = Select(
            var_x > var_y,
            reduced_result,
            var_z,
            tag = "select_result"
        )
        select_interval = interval_union(reduced_result_interval, z_interval)
        expected_interval[select_result] = select_interval

        # floating-point operation on mantissa and exponents
        fp_x_range = Interval(-0.01, 100)

        unbound_fp_var = Variable("fp_x", precision=ML_Binary32, interval=fp_x_range)
        mant_fp_x = MantissaExtraction(unbound_fp_var, tag="mant_fp_x", precision=ML_Binary32)
        exp_fp_x = ExponentExtraction(unbound_fp_var, tag="exp_fp_x", precision=ML_Int32)
        ins_exp_fp_x = ExponentInsertion(exp_fp_x, tag="ins_exp_fp_x", precision=ML_Binary32)

        expected_interval[unbound_fp_var] = fp_x_range
        expected_interval[exp_fp_x] = Interval(
            sollya.floor(sollya.log2(sollya.inf(abs(fp_x_range)))),
            sollya.floor(sollya.log2(sollya.sup(abs(fp_x_range))))
        )
        expected_interval[mant_fp_x] = Interval(1, 2)
        expected_interval[ins_exp_fp_x] = Interval(
            S2**sollya.inf(expected_interval[exp_fp_x]),
            S2**sollya.sup(expected_interval[exp_fp_x])
        )


        # checking interval evaluation
        for var in [var_x_lzc, exp_fp_x, unbound_fp_var, mant_fp_x, ins_exp_fp_x, cst, var_x, var_y, mult, large_add, reduced_result, select_result, conv_ceil, conv_floor]:
            interval = evaluate_range(var)
            expected = expected_interval[var]
            print("{}: {}".format(var.get_tag(), interval))
            print("  vs expected {}".format(expected))
            assert not interval is None
            assert interval == expected


        return [self.implementation]
Exemple #15
0
def get_value_exp(value):
    """ return the binary exponent of value """
    return sollya.ceil(sollya.log2(abs(value)))
Exemple #16
0
def ulp(v, format_):
    """ return a 'unit in last place' value for <v> assuming precision is defined by format _ """
    return sollya.S2**(sollya.ceil(sollya.log2(sollya.abs(v))) -
                       (format_.get_precision() + 1))
Exemple #17
0
    def generate_scheme(self):
        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = self.precision

        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        # rounding mode input
        rnd_mode = self.implementation.add_input_signal(
            "rnd_mode", rnd_mode_format)

        # size of most significant table index (for linear slope tabulation)
        alpha = self.alpha  # 6
        # size of medium significant table index (for initial value table index LSB)
        beta = self.beta  # 5
        # size of least significant table index (for linear offset tabulation)
        gamma = self.gamma  # 5

        guard_bits = self.guard_bits  # 3

        vx.set_interval(self.interval)

        range_hi = sollya.sup(self.interval)
        range_lo = sollya.inf(self.interval)
        f_hi = self.function(range_hi)
        f_lo = self.function(range_lo)
        # fixed by format used for reduced_x
        range_size = range_hi - range_lo
        range_size_log2 = int(sollya.log2(range_size))
        assert 2**range_size_log2 == range_size

        print("range_size_log2={}".format(range_size_log2))

        reduced_x = Conversion(BitLogicRightShift(vx - range_lo,
                                                  range_size_log2),
                               precision=fixed_point(0,
                                                     alpha + beta + gamma,
                                                     signed=False),
                               tag="reduced_x",
                               debug=debug_fixed)

        alpha_index = get_fixed_slice(reduced_x,
                                      0,
                                      alpha - 1,
                                      align_hi=FixedPointPosition.FromMSBToLSB,
                                      align_lo=FixedPointPosition.FromMSBToLSB,
                                      tag="alpha_index",
                                      debug=debug_std)
        gamma_index = get_fixed_slice(reduced_x,
                                      gamma - 1,
                                      0,
                                      align_hi=FixedPointPosition.FromLSBToLSB,
                                      align_lo=FixedPointPosition.FromLSBToLSB,
                                      tag="gamma_index",
                                      debug=debug_std)

        beta_index = get_fixed_slice(reduced_x,
                                     alpha,
                                     gamma,
                                     align_hi=FixedPointPosition.FromMSBToLSB,
                                     align_lo=FixedPointPosition.FromLSBToLSB,
                                     tag="beta_index",
                                     debug=debug_std)

        # Assuming monotonic function
        f_absmax = max(abs(f_hi), abs(f_lo))
        f_absmin = min(abs(f_hi), abs(f_lo))

        f_msb = int(sollya.ceil(sollya.log2(f_absmax))) + 1
        f_lsb = int(sollya.floor(sollya.log2(f_absmin)))
        storage_lsb = f_lsb - io_precision.get_bit_size() - guard_bits

        f_int_size = f_msb
        f_frac_size = -storage_lsb

        storage_format = fixed_point(f_int_size, f_frac_size, signed=False)
        Log.report(Log.Info, "storage_format is {}".format(storage_format))

        # table of initial value index
        tiv_index = Concatenation(alpha_index,
                                  beta_index,
                                  tag="tiv_index",
                                  debug=debug_std)
        # table of offset value index
        to_index = Concatenation(alpha_index,
                                 gamma_index,
                                 tag="to_index",
                                 debug=debug_std)

        tiv_index_size = alpha + beta
        to_index_size = alpha + gamma

        Log.report(Log.Info, "initial table structures")
        table_iv = ML_NewTable(dimensions=[2**tiv_index_size],
                               storage_precision=storage_format,
                               tag="tiv")
        table_offset = ML_NewTable(dimensions=[2**to_index_size],
                                   storage_precision=storage_format,
                                   tag="to")

        slope_table = [None] * (2**alpha)
        slope_delta = 1.0 / sollya.SollyaObject(2**alpha)
        delta_u = range_size * slope_delta * 2**-15
        Log.report(Log.Info, "computing slope value")
        for i in range(2**alpha):
            # slope is computed at the middle of range_size interval
            slope_x = range_lo + (i + 0.5) * range_size * slope_delta
            # TODO: gross approximation of derivatives
            f_xpu = self.function(slope_x + delta_u / 2)
            f_xmu = self.function(slope_x - delta_u / 2)
            slope = (f_xpu - f_xmu) / delta_u
            slope_table[i] = slope

        range_rcp_steps = 1.0 / sollya.SollyaObject(2**tiv_index_size)
        Log.report(Log.Info, "computing value for initial-value table")
        for i in range(2**tiv_index_size):
            slope_index = i / 2**beta
            iv_x = range_lo + i * range_rcp_steps * range_size
            offset_x = 0.5 * range_rcp_steps * range_size
            # initial value is computed so that the piecewise linear
            # approximation intersects the function at iv_x + offset_x
            iv_y = self.function(
                iv_x + offset_x) - offset_x * slope_table[int(slope_index)]
            initial_value = storage_format.round_sollya_object(iv_y)
            table_iv[i] = initial_value

        # determining table of initial value interval
        tiv_min = table_iv[0]
        tiv_max = table_iv[0]
        for i in range(1, 2**tiv_index_size):
            tiv_min = min(tiv_min, table_iv[i])
            tiv_max = max(tiv_max, table_iv[i])
        table_iv.set_interval(Interval(tiv_min, tiv_max))

        offset_step = range_size / S2**(alpha + beta + gamma)
        for i in range(2**alpha):
            Log.report(Log.Info,
                       "computing offset value for sub-table {}".format(i))
            for j in range(2**gamma):
                to_i = i * 2**gamma + j
                offset = slope_table[i] * j * offset_step
                table_offset[to_i] = offset

        # determining table of offset interval
        to_min = table_offset[0]
        to_max = table_offset[0]
        for i in range(1, 2**(alpha + gamma)):
            to_min = min(to_min, table_offset[i])
            to_max = max(to_max, table_offset[i])
        offset_interval = Interval(to_min, to_max)
        table_offset.set_interval(offset_interval)

        initial_value = TableLoad(table_iv,
                                  tiv_index,
                                  precision=storage_format,
                                  tag="initial_value",
                                  debug=debug_fixed)

        offset_precision = get_fixed_type_from_interval(offset_interval, 16)
        print("offset_precision is {} ({} bits)".format(
            offset_precision, offset_precision.get_bit_size()))
        table_offset.get_precision().storage_precision = offset_precision

        # rounding table value
        for i in range(1, 2**(alpha + gamma)):
            table_offset[i] = offset_precision.round_sollya_object(
                table_offset[i])

        offset_value = TableLoad(table_offset,
                                 to_index,
                                 precision=offset_precision,
                                 tag="offset_value",
                                 debug=debug_fixed)

        Log.report(
            Log.Verbose,
            "initial_value's interval: {}, offset_value's interval: {}".format(
                evaluate_range(initial_value), evaluate_range(offset_value)))

        final_add = initial_value + offset_value
        round_bit = final_add  # + FixedPointPosition(final_add, io_precision.get_bit_size(), align=FixedPointPosition.FromMSBToLSB)

        vr_out = Conversion(initial_value + offset_value,
                            precision=io_precision,
                            tag="vr_out",
                            debug=debug_fixed)

        self.implementation.add_output_signal("vr_out", vr_out)

        # Approximation error evaluation
        approx_error = 0.0
        for i in range(2**alpha):
            for j in range(2**beta):
                tiv_i = (i * 2**beta + j)
                # = range_lo + tiv_i * range_rcp_steps * range_size
                iv = table_iv[tiv_i]
                for k in range(2**gamma):
                    to_i = i * 2**gamma + k
                    offset = table_offset[to_i]
                    approx_value = offset + iv
                    table_x = range_lo + range_size * (
                        (i * 2**beta + j) * 2**gamma + k) / S2**(alpha + beta +
                                                                 gamma)
                    local_error = abs(1 / (table_x) - approx_value)
                    approx_error = max(approx_error, local_error)
        error_log2 = float(sollya.log2(approx_error))
        print("approx_error is {}, error_log2 is {}".format(
            float(approx_error), error_log2))

        # table size
        table_iv_size = 2**(alpha + beta)
        table_offset_size = 2**(alpha + gamma)
        print("tables' size are {} entries".format(table_iv_size +
                                                   table_offset_size))

        return [self.implementation]
    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "expf.c", 
                 function_name = "expf"):

        # declaring target and instantiating optimization engine
        processor = target
        self.precision = precision
        opt_eng = OptimizationEngine(processor)
        gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True)

        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = exp_implementation.add_input_variable("x", self.precision) 


        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)


        test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
        test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
        test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

        test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
        return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax      = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax 
        exp_overflow_bound  = ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater)
        early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2 ** precision_emin
        exp_underflow_bound = floor(log(precision_min_value))


        early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less)
        early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision)))


        sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}


        # constant computation
        invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))


        log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision
        invlog2_cst = Constant(invlog2, precision = self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN) 
        log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f"))
        k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f"))
        ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact= True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact = True)
        r =  exact_hi_part - k * log2_lo
        r.set_tag("r")
        r.set_attributes(debug = ML_Debug(display_format = "%f"))

        opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma)

        tag_map = {}
        opt_eng.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx: Variable("x", precision = self.precision, interval = interval_vx),
            tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision)
        }
        #try:
        if 1:
            #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            Log.report(Log.Info, "eval error: %s" % eval_error)
        #except:
        #    Log.report(Log.Info, "gappa error evaluation failed")
        print r.get_str(depth = None, display_precision = True, display_attribute = True)
        print opt_r.get_str(depth = None, display_precision = True, display_attribute = True)

        approx_interval = Interval(-log(2)/2, log(2)/2)

        local_ulp = sup(ulp(exp(approx_interval), self.precision))
        print "ulp: ", local_ulp 
        error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1
        init_poly_degree = poly_degree

        return


        while 1: 
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m")
            poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision)
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            opt_poly = opt_eng.optimization_process(poly, self.precision)

            #print "poly: ", poly.get_str(depth = None, display_precision = True)
            #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval)
            poly_error_copy_map = {
                r.get_handle().get_node(): r_gappa_var
            }
            gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
            poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g")
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)

            global_poly_error = poly_eval_error + poly_approx_error
            global_rel_poly_error = global_poly_error / exp(approx_interval)
            print "global_poly_error: ", global_poly_error, global_rel_poly_error 
            flag = local_ulp > sup(abs(global_rel_poly_error))
            print "test: ", flag
            if flag: break
            else:
                if poly_degree > init_poly_degree + 5:
                    Log.report(Log.Error, "poly degree search did not converge")
                poly_degree += 1



        late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
        diff_k = ik - overflow_exp_offset 
        diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k")
        late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset)
        late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf)
        late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result))

        late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset)
        late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False)
        test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal)
        late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result))

        std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx)
        std_result.set_attributes(tag = "std_result", debug = debug_lftolx)
        result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result)))
        std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return)

        #print scheme.get_str(depth = None, display_precision = True)

        # fusing FMA
        if fuse_fma: 
            Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m")
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m")
        opt_eng.instantiate_abstract_precision(scheme, None)

        Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m")
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m")
        opt_eng.subexpression_sharing(scheme)

        Log.report(Log.Info, "\033[33;1m silencing operation \033[0m")
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        # check processor support
        Log.report(Log.Info, "\033[33;1m checking processor support \033[0m")
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        if fast_path_extract:
            Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m")
            opt_eng.factorize_fast_path(scheme)
        
        Log.report(Log.Info, "\033[33;1m generating source code \033[0m")
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        #self.result.add_header("support_lib/ml_types.h")
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree)
        self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object())
        if debug_flag:
            self.result.add_header("stdio.h")
            self.result.add_header("inttypes.h")
        output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
Exemple #19
0
  def generate_scheme(self):
    memory_limit = 2500

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = input_var
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)

    ### Constants computations ###

    v_log2_hi = nearestint(log(2) * 2**-52) * 2**52
    v_log2_lo = round(log(2) - v_log2_hi, 64+53, sollya.RN)
    log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi")
    log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo")
   
    print "\n\033[1mSearch parameters for the argument reduction:\033[0m (this can take a while)"
    arg_reduc = self.generate_argument_reduction(memory_limit)

    print "\n\033[1mArgument reduction found:\033[0m [({},{}),({},{})] -> polynomials of degree {},{}, using {} bytes of memory".format(arg_reduc['size1'],arg_reduc['prec1'],arg_reduc['size2'],arg_reduc['prec2'],arg_reduc['degree_poly1'],arg_reduc['degree_poly2'],arg_reduc['sizeof_tables']) 
    
    print "\n\033[1mGenerate the first logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table1'], arg_reduc['sizeof_table1'])
    inv_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']],
                           storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False),
                           tag = self.uniquify_name("inv_table_1"))
    log_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']],
                           storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False),
                           tag = self.uniquify_name("log_table_1"))
    for i in xrange(0, arg_reduc['length_table1']-1):
      x1 = 1 + i/S2*arg_reduc['size1']
      inv_x1 = ceil(S2**arg_reduc['prec1']/x1)*S2**arg_reduc['prec1']
      log_x1 = floor(log(x1) * S2**(128-11))*S2**(11-128)
      inv_table_1[i] = inv_x1 #Constant(inv_x1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False))
      log_table_1[i] = log_x1 #Constant(log_x1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))

    print "\n\033[1mGenerate the second logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table2'], arg_reduc['sizeof_table2'])
    inv_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']],
                           storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False),
                           tag = self.uniquify_name("inv_table_2"))
    log_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']],
                           storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False),
                           tag = self.uniquify_name("log_table_2"))
    for i in xrange(0, arg_reduc['length_table2']-1):
      y1 = 1 + i/S2**arg_reduc['size2']
      inv_y1 = ceil(S2**arg_reduc['prec2']/x1) * S2**arg_reduc['prec2']
      log_y1 = floor(log(inv_y1) * S2**(128-11))*S2**(11-128)
      inv_table_2[i] = inv_y1 #Constant(inv_y1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False))
      log_table_2[i] = log_y1 #Constant(log_y1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))
    
    ### Evaluation Scheme ###
    
    print "\n\033[1mGenerate the evaluation scheme:\033[0m"
    input_var = self.implementation.add_input_variable("input_var", self.precision) 
    ve = ExponentExtraction(input_var, tag = "x_exponent", debug = debugd)
    vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = ML_Custom_FixedPoint_Format(0,52,False), debug = debug_lftolx)
    #vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = self.precision, debug = debug_lftolx)

    print "filtering and handling special cases"
    test_is_special_cases = LogicalNot(Test(input_var, specifier = Test.IsIEEENormalPositive, likely = True, debug = debugd, tag = "is_special_cases"))
    handling_special_cases = Statement(
      ConditionBlock(
        Test(input_var, specifier = Test.IsSignalingNaN, debug = True),
        ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision))
      ),
      ConditionBlock(
        Test(input_var, specifier = Test.IsNaN, debug = True),
        Return(input_var)
      )#,
      # TODO: add tests for x == 0 (raise DivideByZero, return -Inf), x < 0 (raise InvalidOperation, return qNaN)
      # all that remains is x is a subnormal positive
      #Statement(
      #  ReferenceAssign(Dereference(ve), Subtraction(ve, Subtraction(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(12, precision = ve.get_precision())))),
      #  ReferenceAssign(Dereference(vx), BitLogicLeftShift(vx, Addition(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(1, precision = ve.get_precision()))))
      #)
    )
    
    print "doing the argument reduction"
    v_dx = vx
    v_x1 = Conversion(v_dx, tag = 'x1',
                      precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size1'],False),
                      rounding_mode = ML_RoundTowardMinusInfty)
    v_index_x = TypeCast(v_x1, tag = 'index_x',
                        precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_x1.get_precision().get_c_bit_size(), 0, False))
    v_inv_x = TableLoad(inv_table_1, v_index_x, tag = 'inv_x')
    v_x = Addition(v_dx, 1, tag = 'x',
                   precision = ML_Custom_FixedPoint_Format(1,52,False))
    v_dy = Multiplication(v_x, v_inv_x, tag = 'dy',
                          precision = ML_Custom_FixedPoint_Format(0,52+arg_reduc['prec1'],False))
    v_y1 = Conversion(v_dy, tag = 'y1',
                      precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size2'],False),
                      rounding_mode = ML_RoundTowardMinusInfty)
    v_index_y = TypeCast(v_y1, tag = 'index_y',
                        precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_y1.get_precision().get_c_bit_size(), 0, False))
    v_inv_y = TableLoad(inv_table_2, v_index_y, tag = 'inv_y')
    v_y = Addition(v_dy, 1, tag = 'y',
                   precision = ML_Custom_FixedPoint_Format(1,52+arg_reduc['prec2'],False))
    # note that we limit the number of bits used to represent dz to 64.
    # we proved during the arg reduction that we can do that (sup(out_interval) < 2^(64-52-prec1-prec2))
    v_dz = Multiplication(v_y, v_inv_y, tag = 'z',
                          precision = ML_Custom_FixedPoint_Format(64-52-arg_reduc['prec1']-arg_reduc['prec2'],52+arg_reduc['prec1']+arg_reduc['prec2'],False))
    # reduce the number of bits used to represent dz. we can do that
    
    print "doing the first polynomial evaluation"
    global_poly1_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, arg_reduc['degree_poly1']-1, [64] * (arg_reduc['degree_poly1']), arg_reduc['out_interval'], fixed, sollya.absolute)
    poly1_object = global_poly1_object.sub_poly(start_index = 1)
    print global_poly1_object
    print poly1_object
    poly1 = PolynomialSchemeEvaluator.generate_horner_scheme(poly1_object, v_dz, unified_precision = v_dz.get_precision())
    return ConditionBlock(test_is_special_cases, handling_special_cases, Return(poly1))

    #approx_interval = Interval(0, 27021597764222975*S2**-61)
    
    #poly_degree = 1+sup(guessdegree(log(1+x)/x, approx_interval, S2**-(self.precision.get_field_size())))
    #global_poly_object = Polynomial.build_from_approximation(log(1+x)/x, poly_degree, [1] + [self.precision]*(poly_degree), approx_interval, sollya.absolute)
    #poly_object = global_poly_object.sub_poly(start_index = 1)
    #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
    #_poly.set_attributes(tag = "poly", debug = debug_lftolx)

    """
    def generate_scheme(self):
        ## Generate Fused multiply and add comput <x> . <y> + <z>
        Log.report(
            Log.Info,
            "generating fixed MPFMA with {ed} extra digit(s) and sign-magnitude accumulator: {sm}"
            .format(ed=self.extra_digit, sm=self.sign_magnitude))

        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = HdlVirtualFormat(self.precision)
        # declaring standard clock and reset input signal
        #clk = self.implementation.add_input_signal("clk", ML_StdLogic)
        # reset = self.implementation.add_input_signal("reset", ML_StdLogic)
        # declaring main input variable

        # maximum weigth for a mantissa product digit
        max_prod_exp = self.precision.get_emax() * 2 + 1
        # minimum wieght for a mantissa product digit
        min_prod_exp = self.precision.get_emin_subnormal() * 2

        ## Most and least significant digit index for the
        #  accumulator
        acc_msb_index = max_prod_exp + self.extra_digit
        acc_lsb_index = min_prod_exp

        acc_width = acc_msb_index - min_prod_exp + 1
        # precision of the accumulator
        acc_prec = ML_StdLogicVectorFormat(acc_width)

        reset = self.implementation.add_input_signal("reset", ML_StdLogic)

        vx = self.implementation.add_input_signal("x", io_precision)
        vy = self.implementation.add_input_signal("y", io_precision)

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        acc = self.implementation.add_input_signal("acc", acc_prec)
        if self.sign_magnitude:
            # the accumulator is in sign-magnitude representation
            sign_acc = self.implementation.add_input_signal(
                "sign_acc", ML_StdLogic)
        else:
            sign_acc = CopySign(acc,
                                precision=ML_StdLogic,
                                tag="sign_acc",
                                debug=debug_std)

        vx_precision = self.precision
        vy_precision = self.precision
        result_precision = acc_prec

        # precision for first operand vx which is to be statically
        # positionned
        p = vx_precision.get_mantissa_size()
        # precision for second operand vy which is to be dynamically shifted
        q = vy_precision.get_mantissa_size()

        # vx must be aligned with vy
        # the largest shit amount (in absolute value) is precision + 2
        # (1 guard bit and 1 rounding bit)
        exp_vx_precision = ML_StdLogicVectorFormat(
            vx_precision.get_exponent_size())
        exp_vy_precision = ML_StdLogicVectorFormat(
            vy_precision.get_exponent_size())

        mant_vx_precision = ML_StdLogicVectorFormat(p - 1)
        mant_vy_precision = ML_StdLogicVectorFormat(q - 1)

        mant_vx = MantissaExtraction(vx, precision=mant_vx_precision)
        mant_vy = MantissaExtraction(vy, precision=mant_vy_precision)

        exp_vx = ExponentExtraction(vx,
                                    precision=exp_vx_precision,
                                    tag="exp_vx",
                                    debug=debug_dec)
        exp_vy = ExponentExtraction(vy,
                                    precision=exp_vy_precision,
                                    tag="exp_vy",
                                    debug=debug_dec)

        # Maximum number of leading zero for normalized <vx> mantissa
        L_x = 0
        # Maximum number of leading zero for normalized <vy> mantissa
        L_y = 0
        # Maximum number of leading zero for the product of <x>.<y>
        # mantissa.
        L_xy = L_x + L_y + 1

        sign_vx = CopySign(vx, precision=ML_StdLogic)
        sign_vy = CopySign(vy, precision=ML_StdLogic)

        # determining if the operation is an addition (effective_op = '0')
        # or a subtraction (effective_op = '1')
        sign_xy = BitLogicXor(sign_vx,
                              sign_vy,
                              precision=ML_StdLogic,
                              tag="sign_xy",
                              debug=ML_Debug(display_format="-radix 2"))
        effective_op = BitLogicXor(sign_xy,
                                   sign_acc,
                                   precision=ML_StdLogic,
                                   tag="effective_op",
                                   debug=ML_Debug(display_format="-radix 2"))

        exp_vx_bias = vx_precision.get_bias()
        exp_vy_bias = vy_precision.get_bias()

        # <acc> is statically positionned in the datapath,
        # it may even constitute the whole datapath
        #
        # the product is shifted with respect to the fix accumulator

        exp_bias = (exp_vx_bias + exp_vy_bias)

        # because of the mantissa range [1, 2[, the product exponent
        # is located one bit to the right (lower) of the product MSB
        prod_exp_offset = 1

        # Determine a working precision to accomodate exponent difference
        # FIXME: check interval and exponent operations size
        exp_precision_ext_size = max(
            vx_precision.get_exponent_size(),
            vy_precision.get_exponent_size(),
            abs(ceil(log2(abs(acc_msb_index)))),
            abs(ceil(log2(abs(acc_lsb_index)))),
            abs(ceil(log2(abs(exp_bias + prod_exp_offset)))),
        ) + 2
        Log.report(Log.Info,
                   "exp_precision_ext_size={}".format(exp_precision_ext_size))
        exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size)

        # static accumulator exponent
        exp_acc = Constant(acc_msb_index,
                           precision=exp_precision_ext,
                           tag="exp_acc",
                           debug=debug_cst_dec)

        # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x
        # and then shifted right by
        # exp_diff = exp_x - exp_y + offset
        # exp_vx in [emin, emax]
        # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2]
        exp_diff = Subtraction(
            exp_acc,
            Addition(Addition(zext(
                exp_vy,
                exp_precision_ext_size - vy_precision.get_exponent_size()),
                              zext(
                                  exp_vx, exp_precision_ext_size -
                                  vx_precision.get_exponent_size()),
                              precision=exp_precision_ext),
                     Constant(exp_bias + prod_exp_offset,
                              precision=exp_precision_ext,
                              tag="diff_bias",
                              debug=debug_cst_dec),
                     precision=exp_precision_ext,
                     tag="pre_exp_diff",
                     debug=debug_dec),
            precision=exp_precision_ext,
            tag="exp_diff",
            debug=debug_dec)
        signed_exp_diff = SignCast(exp_diff,
                                   specifier=SignCast.Signed,
                                   precision=exp_precision_ext)
        datapath_full_width = acc_width
        # the maximum exp diff is the size of the datapath
        # minus the bit size of the product
        max_exp_diff = datapath_full_width - (p + q)
        exp_diff_lt_0 = Comparison(signed_exp_diff,
                                   Constant(0, precision=exp_precision_ext),
                                   specifier=Comparison.Less,
                                   precision=ML_Bool,
                                   tag="exp_diff_lt_0",
                                   debug=debug_std)
        exp_diff_gt_max_diff = Comparison(signed_exp_diff,
                                          Constant(
                                              max_exp_diff,
                                              precision=exp_precision_ext),
                                          specifier=Comparison.Greater,
                                          precision=ML_Bool)

        shift_amount_prec = ML_StdLogicVectorFormat(
            int(floor(log2(max_exp_diff)) + 1))

        mant_shift = Select(exp_diff_lt_0,
                            Constant(0, precision=shift_amount_prec),
                            Select(exp_diff_gt_max_diff,
                                   Constant(max_exp_diff,
                                            precision=shift_amount_prec),
                                   Truncate(exp_diff,
                                            precision=shift_amount_prec),
                                   precision=shift_amount_prec),
                            precision=shift_amount_prec,
                            tag="mant_shift",
                            debug=ML_Debug(display_format="-radix 10"))

        prod_prec = ML_StdLogicVectorFormat(p + q)
        prod = Multiplication(mant_vx,
                              mant_vy,
                              precision=prod_prec,
                              tag="prod",
                              debug=debug_std)

        # attempt at pipelining the operator
        # self.implementation.start_new_stage()

        mant_ext_size = datapath_full_width - (p + q)
        shift_prec = ML_StdLogicVectorFormat(datapath_full_width)
        shifted_prod = BitLogicRightShift(rzext(prod, mant_ext_size),
                                          mant_shift,
                                          precision=shift_prec,
                                          tag="shifted_prod",
                                          debug=debug_std)

        ## Inserting a pipeline stage after the product shifting
        if self.pipelined: self.implementation.start_new_stage()

        if self.sign_magnitude:
            # the accumulator is in sign-magnitude representation

            acc_negated = Select(Comparison(sign_xy,
                                            sign_acc,
                                            specifier=Comparison.Equal,
                                            precision=ML_Bool),
                                 acc,
                                 BitLogicNegate(acc, precision=acc_prec),
                                 precision=acc_prec)

            # one extra MSB bit is added to the final addition
            # to detect overflows
            add_width = acc_width + 1
            add_prec = ML_StdLogicVectorFormat(add_width)

            # FIXME: implement with a proper compound adder
            mant_add_p0_ext = Addition(zext(shifted_prod, 1),
                                       zext(acc_negated, 1),
                                       precision=add_prec)
            mant_add_p1_ext = Addition(
                mant_add_p0_ext,
                Constant(1, precision=ML_StdLogic),
                precision=add_prec,
                tag="mant_add",
                debug=ML_Debug(display_format=" -radix 2"))
            # discarding carry overflow bit
            mant_add_p0 = SubSignalSelection(mant_add_p0_ext,
                                             0,
                                             acc_width - 1,
                                             precision=acc_prec)
            mant_add_p1 = SubSignalSelection(mant_add_p1_ext,
                                             0,
                                             acc_width - 1,
                                             precision=acc_prec)

            mant_add_pre_sign = CopySign(mant_add_p1_ext,
                                         precision=ML_StdLogic,
                                         tag="mant_add_pre_sign",
                                         debug=debug_std)
            mant_add = Select(Comparison(sign_xy,
                                         sign_acc,
                                         specifier=Comparison.Equal,
                                         precision=ML_Bool),
                              mant_add_p0,
                              Select(
                                  Comparison(mant_add_pre_sign,
                                             Constant(1,
                                                      precision=ML_StdLogic),
                                             specifier=Comparison.Equal,
                                             precision=ML_Bool),
                                  mant_add_p1,
                                  BitLogicNegate(mant_add_p0,
                                                 precision=acc_prec),
                                  precision=acc_prec,
                              ),
                              precision=acc_prec,
                              tag="mant_add")

            # if both operands had the same sign, then
            # mant_add is necessarily positive and the result
            # sign matches the input sign
            # if both operands had opposite signs, then
            # the result sign matches the product sign
            # if mant_add is positive, else the accumulator sign
            output_sign = Select(
                Comparison(effective_op,
                           Constant(1, precision=ML_StdLogic),
                           specifier=Comparison.Equal,
                           precision=ML_Bool),
                # if the effective op is a subtraction (prod - acc)
                BitLogicXor(sign_acc, mant_add_pre_sign,
                            precision=ML_StdLogic),
                # the effective op is an addition, thus result and
                # acc share sign
                sign_acc,
                precision=ML_StdLogic,
                tag="output_sign")

            if self.pipelined: self.implementation.start_new_stage()

            # adding output
            self.implementation.add_output_signal("vr_sign", output_sign)
            self.implementation.add_output_signal("vr_acc", mant_add)

        else:
            # 2s complement encoding of the accumulator,
            # the accumulator is never negated, only the producted
            # is negated if negative

            # negate shifted prod when required
            shifted_prod_op = Select(Comparison(sign_xy,
                                                Constant(
                                                    1, precision=ML_StdLogic),
                                                specifier=Comparison.Equal,
                                                precision=ML_Bool),
                                     Negation(shifted_prod,
                                              precision=shift_prec),
                                     shifted_prod,
                                     precision=shift_prec)

            add_prec = shift_prec  # ML_StdLogicVectorFormat(datapath_full_width + 1)

            mant_add = Addition(shifted_prod_op,
                                acc,
                                precision=acc_prec,
                                tag="mant_add",
                                debug=ML_Debug(display_format=" -radix 2"))

            if self.pipelined: self.implementation.start_new_stage()

            self.implementation.add_output_signal("vr_acc", mant_add)

        return [self.implementation]
Exemple #21
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
Exemple #22
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        vx = Abs(vx)
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        # argument reduction
        arg_reg_value = log(2) / 2**index_size
        inv_log2_value = round(1 / arg_reg_value,
                               self.precision.get_sollya_object(), RN)
        inv_log2_cst = Constant(inv_log2_value,
                                precision=self.precision,
                                tag="inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2  for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^21024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision(
        ) - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value,
                              self.precision.get_sollya_object(), RN)
        log2_hi_value_cst = Constant(log2_hi_value,
                                     tag="log2_hi_value",
                                     precision=self.precision)
        log2_lo_value_cst = Constant(log2_lo_value,
                                     tag="log2_lo_value",
                                     precision=self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision)
        k_log2 = Multiplication(k,
                                log2_hi_value_cst,
                                precision=self.precision,
                                exact=True,
                                tag="k_log2",
                                unbreakable=True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag="r", debug=debug_multi)

        r_eval_error = self.get_eval_error(
            r_hi,
            variable_copy_map={
                vx:
                Variable("vx",
                         interval=Interval(0, 715),
                         precision=self.precision),
                k:
                Variable("k",
                         interval=Interval(0, 1024),
                         precision=int_precision)
            })
        print "r_eval_error: ", r_eval_error

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(
            guessdegree(exp(sollya.x), approx_interval, error_goal_approx))
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k,
                               precision=int_precision,
                               tag="k_integer",
                               debug=debug_multi)
        k_hi = BitLogicRightShift(k_integer,
                                  Constant(index_size),
                                  tag="k_int_hi",
                                  precision=int_precision,
                                  debug=debug_multi)
        k_lo = Modulo(k_integer,
                      2**index_size,
                      tag="k_int_lo",
                      precision=int_precision,
                      debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp_table = ML_Table(dimensions=[2 * 2**index_size, 4],
                             storage_precision=self.precision,
                             tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value = sollya.SollyaObject(2)**((input_value) *
                                                 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value) *
                                                  2**-index_size)
            pos_value_hi = round(exp_value, self.precision.get_sollya_object(),
                                 RN)
            pos_value_lo = round(exp_value - pos_value_hi,
                                 self.precision.get_sollya_object(), RN)
            neg_value_hi = round(mexp_value,
                                 self.precision.get_sollya_object(), RN)
            neg_value_lo = round(mexp_value - neg_value_hi,
                                 self.precision.get_sollya_object(), RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # cosh(x) = 1/2 * (exp(x) + exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        #
        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            exp(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print "poly_approx_error: ", poly_approx_error, float(
            log2(poly_approx_error))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            r,
            unified_precision=self.precision)
        poly_pos.set_attributes(tag="poly_pos", debug=debug_multi)

        poly_neg = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            -r,
            unified_precision=self.precision)
        poly_neg.set_attributes(tag="poly_neg", debug=debug_multi)

        table_index = Addition(k_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        neg_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      0,
                                      tag="neg_value_load_hi",
                                      debug=debug_multi)
        neg_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      1,
                                      tag="neg_value_load_lo",
                                      debug=debug_multi)
        pos_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      2,
                                      tag="pos_value_load_hi",
                                      debug=debug_multi)
        pos_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      3,
                                      tag="pos_value_load_lo",
                                      debug=debug_multi)

        k_plus = Max(
            Subtraction(k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_plus",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))
        k_neg = Max(
            Subtraction(-k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_neg",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))

        pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision)
        pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision)

        pos_exp = (
            pos_value_load_hi +
            (pos_value_load_hi * poly_pos +
             (pos_value_load_lo + pos_value_load_lo * poly_pos))) * pow_exp_pos
        pos_exp.set_attributes(tag="pos_exp", debug=debug_multi)

        neg_exp = (
            neg_value_load_hi +
            (neg_value_load_hi * poly_neg +
             (neg_value_load_lo + neg_value_load_lo * poly_neg))) * pow_exp_neg
        neg_exp.set_attributes(tag="neg_exp", debug=debug_multi)

        result = Addition(pos_exp,
                          neg_exp,
                          precision=self.precision,
                          tag="result",
                          debug=debug_multi)

        # ov_value
        ov_value = round(acosh(self.precision.get_max_value()),
                         self.precision.get_sollya_object(), RD)
        ov_flag = Comparison(Abs(vx),
                             Constant(ov_value, precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme