Esempio n. 1
0
def Add211(x, y):
    """ Multi-precision Addition (2sum) HI, LO = x + y 
        TODO: missing assumption on input order """
    zh = Addition(x, y)
    t1 = Subtraction(zh, x)
    zl = Subtraction(y, t1)
    return zh, zl
Esempio n. 2
0
def Split(a, precision=None):
    """... splitting algorithm for Dekker TwoMul"""
    cst_value = {ML_Binary32: 4097, ML_Binary64: 134217729}[a.precision]
    s = Constant(cst_value, precision=a.get_precision(), tag='fp_split')
    c = Multiplication(s, a, precision=precision)
    tmp = Subtraction(a, c, precision=precision)
    ah = Addition(tmp, c, precision=precision)
    al = Subtraction(a, ah, precision=precision)
    return ah, al
Esempio n. 3
0
def Add212(xh, yh, yl):
    """ Multi-precision Addition:
        HI, LO = xh + [yh:yl] """
    r = Addition(xh, yh)
    s1 = Subtraction(xh, r)
    s2 = Addition(s1, yh)
    s = Addition(s2, yl)
    zh = Addition(r, s)
    zl = Addition(Subtraction(r, zh), s)
    return zh, zl
Esempio n. 4
0
def generate_fasttwosum(vx, vy):
    """Return two optrees for a FastTwoSum operation.
 
    Precondition: |vx| >= |vy|.
    The return value is a tuple (sum, error).
    """
    s = Addition(vx, vy)
    b = Subtraction(z, vx)
    e = Subtraction(vy, b)
    return s, e
Esempio n. 5
0
def subnormalize_multi(x_list, factor, precision=None, fma=True):
    """ x_list is a multi-component number with components ordered from the
        most to the least siginificant.
        x_list[0] must be the rounded evaluation of (x_list[0] + x_list[1] + ...)
        @return the field of x as a floating-point number assuming
        the exponent of the result is exponent(x) + factor
        and managing field subnormalization if required """
    x_hi = x_list[0]
    int_precision = precision.get_integer_format()
    ex = ExponentExtraction(x_hi, precision=int_precision)
    scaled_ex = Addition(ex, factor, precision=int_precision)
    CI0 = Constant(0, precision=int_precision)
    CI1 = Constant(1, precision=int_precision)

    # difference betwen x's real exponent and the minimal exponent
    # for a floating of format precision
    delta = Max(Min(Subtraction(Constant(precision.get_emin_normal(),
                                         precision=int_precision),
                                scaled_ex,
                                precision=int_precision),
                    CI0,
                    precision=int_precision),
                Constant(precision.get_field_size(), precision=int_precision),
                precision=int_precision)

    round_factor_exp = Addition(delta, ex, precision=int_precision)
    round_factor = ExponentInsertion(round_factor_exp, precision=precision)

    # to force a rounding as if x_hi was of precision p - delta
    # we use round_factor as follows:
    # o(o(round_factor + x_hi) - round_factor)
    if len(x_list) == 2:
        rounded_x_hi = Subtraction(Add112(round_factor,
                                          x_list[0],
                                          x_list[1],
                                          precision=precision)[0],
                                   round_factor,
                                   precision=precision)
    elif len(x_list) == 3:
        rounded_x_hi = Subtraction(Add113(round_factor,
                                          x_list[0],
                                          x_list[1],
                                          x_list[2],
                                          precision=precision)[0],
                                   round_factor,
                                   precision=precision)
    else:
        Log.report(Log.Error,
                   "len of x_list: {} is not supported in subnormalize_multi",
                   len(x_list))
        raise NotImplementedError

    return [rounded_x_hi] + [
        Constant(0, precision=precision) for i in range(len(x_list) - 1)
    ]
Esempio n. 6
0
def Split(a):
    """... splitting algorithm for Dekker TwoMul"""
    # if a.get_precision() == ML_Binary32:
    s = Constant(4097, precision=a.get_precision(), tag='fp_split')
    # elif a.get_precision() == ML_Binary64:
    #    s = Constant(134217729, precision = a.get_precision(), tag = 'fp_split')
    c = Multiplication(s, a)
    tmp = Subtraction(a, c)
    ah = Addition(tmp, c)
    al = Subtraction(a, ah)
    return ah, al
Esempio n. 7
0
def Add222(xh, xl, yh, yl):
    """ Multi-precision Addition:
        HI, LO = [xh:xl] + [yh:yl] """
    r = Addition(xh, yh)
    s1 = Subtraction(xh, r)
    s2 = Addition(s1, yh)
    s3 = Addition(s2, yl)
    s = Addition(s3, xl)
    zh = Addition(r, s)
    zl = Addition(Subtraction(r, zh), s)
    return zh, zl
Esempio n. 8
0
def generate_twosum(vx, vy):
    """Return two optrees for a TwoSum operation.
 
    The return value is a tuple (sum, error).
    """
    s = Addition(vx, vy)
    _x = Subtraction(s, vy)
    _y = Subtraction(s, _x)
    dx = Subtraction(vx, _x)
    dy = Subtraction(vy, _y)
    e = Addition(dx, dy)
    return s, e
Esempio n. 9
0
def generate_twosum(vx, vy, precision=None):
    """Return two optrees for a TwoSum operation.
 
    The return value is a tuple (sum, error).
    """
    s = Addition(vx, vy, precision=precision)
    _x = Subtraction(s, vy, precision=precision)
    _y = Subtraction(s, _x, precision=precision)
    dx = Subtraction(vx, _x, precision=precision)
    dy = Subtraction(vy, _y, precision=precision)
    e = Addition(dx, dy, precision=precision)
    return s, e
Esempio n. 10
0
def Add222(xh, xl, yh, yl, precision=None):
    """ Multi-precision Addition:
        HI, LO = [xh:xl] + [yh:yl] """
    r = Addition(xh, yh, precision=precision)
    s1 = Subtraction(xh, r, precision=precision)
    s2 = Addition(s1, yh, precision=precision)
    s3 = Addition(s2, yl, precision=precision)
    s = Addition(s3, xl, precision=precision)
    zh = Addition(r, s, precision=precision)
    zl = Addition(Subtraction(r, zh, precision=precision),
                  s,
                  precision=precision)
    return zh, zl
Esempio n. 11
0
def generate_count_leading_zeros(vx):
    """Generate a vectorizable LZCNT optree."""

    y = -BitLogicRightShift(vx, 16)  # If left half of x is 0,
    m = BitLogicAnd(BitArithmeticRightShift(y, 16), 16)
    # set n = 16.  If left half
    n = Subtraction(16, m)  # is nonzero, set n = 0 and
    vx_2 = BitLogicRightShift(vx, m)  # shift x right 16.
    # Now x is of the form 0000xxxx.
    y = vx_2 - 0x100  # If positions 8-15 are 0,
    m = BitLogicAnd(BitLogicRightShift(y, 16), 8)
    # add 8 to n and shift x left 8.
    n = n + m
    vx_3 = BitLogicLeftShift(vx_2, m)

    y = vx_3 - 0x1000  # If positions 12-15 are 0,
    m = BitLogicAnd(BitLogicRightShift(y, 16), 4)
    # add 4 to n and shift x left 4.
    n = n + m
    vx_4 = BitLogicLeftShift(vx_3, m)

    y = vx_4 - 0x4000  # If positions 14-15 are 0,
    m = BitLogicAnd(BitLogicRightShift(y, 16), 2)
    # add 2 to n and shift x left 2.
    n = n + m
    vx_5 = BitLogicLeftShift(vx_4, m)

    y = BitLogicRightShift(vx_5, 14)  # Set y = 0, 1, 2, or 3.
    m = BitLogicAnd(y, BitLogicNegate(BitLogicRightShift(
        y, 1)))  # Set m = 0, 1, 2, or 2 resp.
    return n + 2 - m
Esempio n. 12
0
def generate_node_eval_error(optree, input_mapping, node_error_map, node_value_map):
    if optree in node_error_map or optree in input_mapping:
        return
    # placeholder to avoid diplicate complication
    node_error_map[optree] = None

    # recursive on node inputs
    if not is_leaf_node(optree):
        for op in optree.get_inputs():
            generate_node_eval_error(op, input_mapping, node_error_map, node_value_map)

    expected_value = node_value_map[optree]
    assert expected_value != None
    expected_node = Constant(expected_value, precision=optree.get_precision())
    precision = optree.get_precision()
    #error_node = Abs(
    #    # FIXME/ may need to insert signed type if optree/expected_node are
    #    # unsigned
    #    Subtraction(
    #        optree,
    #        expected_node,
    #        precision=precision),
    #    precision=precision)
    error_node = Subtraction(
            optree,
            expected_node,
            precision=precision)
    error_display_statement = get_printf_value(optree, error_node, expected_node)
    node_error_map[optree] = error_display_statement
Esempio n. 13
0
def Add212(xh, yh, yl, precision=None):
    """ Multi-precision Addition:
        HI, LO = xh + [yh:yl] """
    # r = xh + yh
    # s1 = xh - r
    # s2 = s1 + yh
    # s = s2 + yl
    # zh = r + s
    # zl = (r - zh) + s
    r = Addition(xh, yh, precision=precision)
    s1 = Subtraction(xh, r, precision=precision)
    s2 = Addition(s1, yh, precision=precision)
    s = Addition(s2, yl, precision=precision)
    zh = Addition(r, s, precision=precision)
    zl = Addition(Subtraction(r, zh, precision=precision),
                  s,
                  precision=precision)
    return zh, zl
Esempio n. 14
0
def Mul222(xh, xl, yh, yl):
    """ Multi-precision Multiplication:
        HI, LO = [xh:xl] * [yh:yl] """
    ph = Multiplication(xh, yh)
    pl = FMS(xh, yh, ph)
    pl = FMA(xh, yl, pl)
    pl = FMA(xl, yh, pl)
    zh = Addition(ph, pl)
    zl = Subtraction(ph, zh)
    zl = Addition(zl, pl)
    return zh, zl
Esempio n. 15
0
def Mul211(x, y, fma=True):
    """ Multi-precision Multiplication HI, LO = x * y """
    zh = Multiplication(x, y)
    if fma == True:
        zl = FMS(x, y, zh)
    else:
        xh, xl = Split(x)
        yh, yl = Split(y)
        r1 = Multiplication(xh, yh)
        r2 = Subtraction(r1, zh)
        r3 = Multiplication(xh, yl)
        r4 = Multiplication(xl, yh)
        r5 = Multiplication(xl, yl)
        r6 = Addition(r2, r3)
        r7 = Addition(r6, r4)
        zl = Addition(r7, r5)
    return zh, zl
Esempio n. 16
0
def Mul211(x, y, precision=None, fma=True):
    """ Multi-precision Multiplication HI, LO = x * y """
    zh = Multiplication(x, y, precision=precision)
    if fma == True:
        zl = FMS(x, y, zh, precision=precision)
    else:
        xh, xl = Split(x, precision=precision)
        yh, yl = Split(y, precision=precision)
        r1 = Multiplication(xh, yh, precision=precision)
        r2 = Subtraction(r1, zh, precision=precision)
        r3 = Multiplication(xh, yl, precision=precision)
        r4 = Multiplication(xl, yh, precision=precision)
        r5 = Multiplication(xl, yl, precision=precision)
        r6 = Addition(r2, r3, precision=precision)
        r7 = Addition(r6, r4, precision=precision)
        zl = Addition(r7, r5, precision=precision)
    return zh, zl
Esempio n. 17
0
def Mul222(xh, xl, yh, yl, fma=True):
    """ Multi-precision Multiplication:
        HI, LO = [xh:xl] * [yh:yl] """
    if fma == True:
        ph = Multiplication(xh, yh)
        pl = FMS(xh, yh, ph)
        pl = FMA(xh, yl, pl)
        pl = FMA(xl, yh, pl)
        zh = Addition(ph, pl)
        zl = Subtraction(ph, zh)
        zl = Addition(zl, pl)
    else:
        t1, t2 = Mul211(xh, yh, fma)
        t3 = Multiplication(xh, yl)
        t4 = Multiplication(xl, yh)
        t5 = Addition(t3, t4)
        t6 = Addition(t2, t5)
        zh, zl = Add211(t1, t6)
    return zh, zl
Esempio n. 18
0
 def get_index_node(self, vx):
     assert vx.precision is self.precision
     int_precision = vx.precision.get_integer_format()
     index_size = self.exp_bits + self.field_bits
     # building an index mask from the index_size
     index_mask = Constant(2**index_size - 1, precision=int_precision)
     shift_amount = Constant(vx.get_precision().get_field_size() -
                             self.field_bits,
                             precision=int_precision)
     exp_offset = Constant(self.precision.get_integer_coding(
         S2**self.low_exp_value),
                           precision=int_precision)
     return BitLogicAnd(BitLogicRightShift(Subtraction(
         TypeCast(vx, precision=int_precision),
         exp_offset,
         precision=int_precision),
                                           shift_amount,
                                           precision=int_precision),
                        index_mask,
                        precision=int_precision)
Esempio n. 19
0
def generic_poly_split(offset_fct, indexing, target_eps, coeff_precision, vx):
    """ generate the meta approximation for @p offset_fct over several
        intervals defined by @p indexing object
        For each sub-interval, a polynomial approximation with
        maximal_error @p target_eps is tabulated, and evaluated using format
        @p coeff_precision.
        The input variable is @p vx """
    # computing degree for a different polynomial approximation on each
    # sub-interval
    poly_degree_list = [
        int(sup(guessdegree(offset_fct(offset), sub_interval, target_eps)))
        for offset, sub_interval in indexing.get_offseted_sub_list()
    ]
    poly_max_degree = max(poly_degree_list)

    # tabulating polynomial coefficients on split_num sub-interval of interval
    poly_table = ML_NewTable(
        dimensions=[indexing.split_num, poly_max_degree + 1],
        storage_precision=coeff_precision,
        const=True)
    offset_table = ML_NewTable(dimensions=[indexing.split_num],
                               storage_precision=coeff_precision,
                               const=True)
    max_error = 0.0

    for sub_index in range(indexing.split_num):
        poly_degree = poly_degree_list[sub_index]
        offset, approx_interval = indexing.get_offseted_sub_interval(sub_index)
        offset_table[sub_index] = offset
        if poly_degree == 0:
            # managing constant approximation separately since it seems
            # to break sollya
            local_approx = coeff_precision.round_sollya_object(
                offset_fct(offset)(inf(approx_interval)))
            poly_table[sub_index][0] = local_approx
            for monomial_index in range(1, poly_max_degree + 1):
                poly_table[sub_index][monomial_index] = 0
            approx_error = sollya.infnorm(
                offset_fct(offset) - local_approx, approx_interval)

        else:
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                offset_fct(offset), poly_degree,
                [coeff_precision] * (poly_degree + 1), approx_interval,
                sollya.relative)

            for monomial_index in range(poly_max_degree + 1):
                if monomial_index <= poly_degree:
                    poly_table[sub_index][
                        monomial_index] = poly_object.coeff_map[monomial_index]
                else:
                    poly_table[sub_index][monomial_index] = 0
        max_error = max(approx_error, max_error)

    Log.report(Log.Debug, "max approx error is {}", max_error)

    # indexing function: derive index from input @p vx value
    poly_index = indexing.get_index_node(vx)
    poly_index.set_attributes(tag="poly_index", debug=debug_multi)

    ext_precision = get_extended_fp_precision(coeff_precision)

    # building polynomial evaluation scheme
    offset = TableLoad(offset_table,
                       poly_index,
                       precision=coeff_precision,
                       tag="offset",
                       debug=debug_multi)
    poly = TableLoad(poly_table,
                     poly_index,
                     poly_max_degree,
                     precision=coeff_precision,
                     tag="poly_init",
                     debug=debug_multi)
    red_vx = Subtraction(vx,
                         offset,
                         precision=vx.precision,
                         tag="red_vx",
                         debug=debug_multi)
    for monomial_index in range(poly_max_degree, -1, -1):
        coeff = TableLoad(poly_table,
                          poly_index,
                          monomial_index,
                          precision=coeff_precision,
                          tag="poly_%d" % monomial_index,
                          debug=debug_multi)
        #fma_precision = coeff_precision if monomial_index > 1 else ext_precision
        fma_precision = coeff_precision
        poly = FMA(red_vx, poly, coeff, precision=fma_precision)

    #return Conversion(poly, precision=coeff_precision)
    #return poly.hi
    return poly
Esempio n. 20
0
    def generate_bench_wrapper(self,
                               test_num=1,
                               loop_num=100000,
                               test_ranges=[Interval(-1.0, 1.0)],
                               debug=False):
        # interval where the array lenght is chosen from (randomly)
        index_range = self.test_index_range

        auto_test = CodeFunction("bench_wrapper", output_format=ML_Binary64)

        tested_function = self.implementation.get_function_object()
        function_name = self.implementation.get_name()

        failure_report_op = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void,
                                                 failure_report_op)

        printf_success_op = FunctionOperator(
            "printf",
            arg_map={0: "\"test successful %s\\n\"" % function_name},
            void_function=True)
        printf_success_function = FunctionObject("printf", [], ML_Void,
                                                 printf_success_op)

        output_precision = FormatAttributeWrapper(self.precision, ["volatile"])

        test_total = test_num

        # number of arrays expected as inputs for tested_function
        NUM_INPUT_ARRAY = 1
        # position of the input array in tested_function operands (generally
        # equals to 1 as to 0-th input is often the destination array)
        INPUT_INDEX_OFFSET = 1

        # concatenating standard test array at the beginning of randomly
        # generated array
        TABLE_SIZE_VALUES = [
            len(std_table) for std_table in self.standard_test_cases
        ] + [
            random.randrange(index_range[0], index_range[1] + 1)
            for i in range(test_num)
        ]
        OFFSET_VALUES = [sum(TABLE_SIZE_VALUES[:i]) for i in range(test_total)]

        table_size_offset_array = generate_2d_table(
            test_total,
            2,
            ML_UInt32,
            self.uniquify_name("table_size_array"),
            value_gen=(lambda row_id:
                       (TABLE_SIZE_VALUES[row_id], OFFSET_VALUES[row_id])))

        INPUT_ARRAY_SIZE = sum(TABLE_SIZE_VALUES)

        # TODO/FIXME: implement proper input range depending on input index
        # assuming a single input array
        input_precisions = [self.get_input_precision(1).get_data_precision()]
        rng_map = [
            get_precision_rng(precision, inf(test_range), sup(test_range))
            for precision, test_range in zip(input_precisions, test_ranges)
        ]

        # generated table of inputs
        input_tables = [
            generate_1d_table(
                INPUT_ARRAY_SIZE,
                self.get_input_precision(INPUT_INDEX_OFFSET +
                                         table_id).get_data_precision(),
                self.uniquify_name("input_table_arg%d" % table_id),
                value_gen=(
                    lambda _: input_precisions[table_id].round_sollya_object(
                        rng_map[table_id].get_new_value(), sollya.RN)))
            for table_id in range(NUM_INPUT_ARRAY)
        ]

        # generate output_array
        output_array = generate_1d_table(
            INPUT_ARRAY_SIZE,
            output_precision,
            self.uniquify_name("output_array"),
            #value_gen=(lambda _: FP_QNaN(self.precision))
            value_gen=(lambda _: None),
            const=False,
            empty=True)

        # accumulate element number
        acc_num = Variable("acc_num",
                           precision=ML_Int64,
                           var_type=Variable.Local)

        def empty_post_statement_gen(input_tables, output_array,
                                     table_size_offset_array, array_offset,
                                     array_len, test_id):
            return Statement()

        test_loop = self.get_array_test_wrapper(test_total, tested_function,
                                                table_size_offset_array,
                                                input_tables, output_array,
                                                acc_num,
                                                empty_post_statement_gen)

        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)
        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"%s %%\"PRIi64\" elts computed in %%\"PRIi64\" nanoseconds => %%.3f CPE \\n\""
                % function_name,
                1:
                FO_Arg(0),
                2:
                FO_Arg(1),
                3:
                FO_Arg(2)
            },
            void_function=True)
        printf_timing_function = FunctionObject(
            "printf", [ML_Int64, ML_Int64, ML_Binary64], ML_Void,
            printf_timing_op)

        vj = Variable("j", precision=ML_Int32, var_type=Variable.Local)
        loop_num_cst = Constant(loop_num, precision=ML_Int32, tag="loop_num")
        loop_increment = 1

        # bench measure of clock per element
        cpe_measure = Division(
            Conversion(timer, precision=ML_Binary64),
            Conversion(acc_num, precision=ML_Binary64),
            precision=ML_Binary64,
            tag="cpe_measure",
        )

        # common test scheme between scalar and vector functions
        test_scheme = Statement(
            self.processor.get_init_timestamp(),
            ReferenceAssign(timer, self.processor.get_current_timestamp()),
            ReferenceAssign(acc_num, 0),
            Loop(
                ReferenceAssign(vj, Constant(0, precision=ML_Int32)),
                vj < loop_num_cst,
                Statement(test_loop, ReferenceAssign(vj,
                                                     vj + loop_increment))),
            ReferenceAssign(
                timer,
                Subtraction(self.processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            printf_timing_function(
                Conversion(acc_num, precision=ML_Int64),
                timer,
                cpe_measure,
            ),
            Return(cpe_measure),
            # Return(Constant(0, precision = ML_Int32))
        )
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])
Esempio n. 21
0
    def generate_bench(self, processor, test_num=1000, unroll_factor=10):
        """ generate performance bench for self.op_class """
        initial_inputs = [
            Constant(random.uniform(inf(self.init_interval),
                                    sup(self.init_interval)),
                     precision=precision)
            for i, precision in enumerate(self.input_precisions)
        ]

        var_inputs = [
            Variable("var_%d" % i,
                     precision=FormatAttributeWrapper(precision, ["volatile"]),
                     var_type=Variable.Local)
            for i, precision in enumerate(self.input_precisions)
        ]

        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0: "\"%s[%s] %%lld elts computed "\
                   "in %%lld cycles =>\\n     %%.3f CPE \\n\"" %
                (
                    self.bench_name,
                    self.output_precision.get_display_format()
                ),
                1: FO_Arg(0),
                2: FO_Arg(1),
                3: FO_Arg(2),
                4: FO_Arg(3)
            }, void_function=True
        )
        printf_timing_function = FunctionObject(
            "printf", [self.output_precision, ML_Int64, ML_Int64, ML_Binary64],
            ML_Void, printf_timing_op)
        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)

        void_function_op = FunctionOperator("(void)",
                                            arity=1,
                                            void_function=True)
        void_function = FunctionObject("(void)", [self.output_precision],
                                       ML_Void, void_function_op)

        # initialization of operation inputs
        init_assign = metaop.Statement()
        for var_input, init_value in zip(var_inputs, initial_inputs):
            init_assign.push(ReferenceAssign(var_input, init_value))

        # test loop
        loop_i = Variable("i", precision=ML_Int64, var_type=Variable.Local)
        test_num_cst = Constant(test_num / unroll_factor,
                                precision=ML_Int64,
                                tag="test_num")

        # Goal build a chain of dependant operation to measure
        # elementary operation latency
        local_inputs = tuple(var_inputs)
        local_result = self.op_class(*local_inputs,
                                     precision=self.output_precision,
                                     unbreakable=True)
        for i in range(unroll_factor - 1):
            local_inputs = tuple([local_result] + var_inputs[1:])
            local_result = self.op_class(*local_inputs,
                                         precision=self.output_precision,
                                         unbreakable=True)
        # renormalisation
        local_result = self.renorm_function(local_result)

        # variable assignation to build dependency chain
        var_assign = Statement()
        var_assign.push(ReferenceAssign(var_inputs[0], local_result))
        final_value = var_inputs[0]

        # loop increment value
        loop_increment = 1

        test_loop = Loop(
            ReferenceAssign(loop_i, Constant(0, precision=ML_Int32)),
            loop_i < test_num_cst,
            Statement(var_assign,
                      ReferenceAssign(loop_i, loop_i + loop_increment)),
        )

        # bench scheme
        test_scheme = Statement(
            ReferenceAssign(timer, processor.get_current_timestamp()),
            init_assign,
            test_loop,
            ReferenceAssign(
                timer,
                Subtraction(processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            # prevent intermediary variable simplification
            void_function(final_value),
            printf_timing_function(
                final_value, Constant(test_num, precision=ML_Int64), timer,
                Division(Conversion(timer, precision=ML_Binary64),
                         Constant(test_num, precision=ML_Binary64),
                         precision=ML_Binary64))
            # ,Return(Constant(0, precision = ML_Int32))
        )

        return test_scheme
Esempio n. 22
0
    def generate_cos_scheme(self, computation_precision, tabulated_cos,
                            tabulated_sin, sin_C2, cos_C2, red_vx_lo):
        cos_C2 = Multiplication(tabulated_cos,
                                cos_C2,
                                precision=ML_Custom_FixedPoint_Format(
                                    -1, 32, signed=True),
                                tag="cos_C2")
        u2 = Multiplication(
            red_vx_lo,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="u2")
        sin_u = Multiplication(
            tabulated_sin,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(1, 30, signed = True)
            tag="sin_u")

        cos_C2_u2 = Multiplication(
            cos_C2,
            u2,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(1, 30,signed = True)
            tag="cos_C2_u2")

        S2_u2 = Multiplication(sin_C2,
                               u2,
                               precision=ML_Custom_FixedPoint_Format(
                                   -1, 32, signed=True),
                               tag="S2_u2")

        S2_u3_sin = Multiplication(
            S2_u2,
            sin_u,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5,26, signed = True)
            tag="S2_u3_sin")

        cos_C2_u2_P_cos = Addition(
            tabulated_cos,
            cos_C2_u2,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="cos_C2_u2_P_cos")

        cos_C2_u2_P_cos_M_sin_u = Subtraction(
            cos_C2_u2_P_cos,
            sin_u,
            precision=
            computation_precision  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
        )

        scheme = Subtraction(
            cos_C2_u2_P_cos_M_sin_u,
            S2_u3_sin,
            precision=
            computation_precision  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
        )

        return scheme
Esempio n. 23
0
    def generate_scheme(self):
        # declaring function input variable
        v_x = [
            self.implementation.add_input_variable(
                "x%d" % index, self.get_input_precision(index))
            for index in range(self.arity)
        ]

        double_format = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble
        }[self.precision]

        # testing Add211
        exact_add = Addition(v_x[0],
                             v_x[1],
                             precision=double_format,
                             tag="exact_add")
        # testing Mul211
        exact_mul = Multiplication(v_x[0],
                                   v_x[1],
                                   precision=double_format,
                                   tag="exact_mul")
        # testing Sub211
        exact_sub = Subtraction(v_x[1],
                                v_x[0],
                                precision=double_format,
                                tag="exact_sub")
        # testing Add222
        multi_add = Addition(exact_add,
                             exact_sub,
                             precision=double_format,
                             tag="multi_add")
        # testing Mul222
        multi_mul = Multiplication(multi_add,
                                   exact_mul,
                                   precision=double_format,
                                   tag="multi_mul")
        # testing Add221 and Add212 and Sub222
        multi_sub = Subtraction(Addition(exact_sub,
                                         v_x[1],
                                         precision=double_format,
                                         tag="add221"),
                                Addition(v_x[0],
                                         multi_mul,
                                         precision=double_format,
                                         tag="add212"),
                                precision=double_format,
                                tag="sub222")
        # testing Mul212 and Mul221
        mul212 = Multiplication(multi_sub,
                                v_x[0],
                                precision=double_format,
                                tag="mul212")
        mul221 = Multiplication(exact_mul,
                                v_x[1],
                                precision=double_format,
                                tag="mul221")
        # testing Sub221 and Sub212
        sub221 = Subtraction(mul212,
                             mul221.hi,
                             precision=double_format,
                             tag="sub221")
        sub212 = Subtraction(sub221,
                             mul212.lo,
                             precision=double_format,
                             tag="sub212")
        # testing FMA2111
        fma2111 = FMA(sub221.lo,
                      sub212.hi,
                      mul221.hi,
                      precision=double_format,
                      tag="fma2111")
        # testing FMA2112
        fma2112 = FMA(fma2111.lo,
                      fma2111.hi,
                      fma2111,
                      precision=double_format,
                      tag="fma2112")
        # testing FMA2212
        fma2212 = FMA(fma2112,
                      fma2112.hi,
                      fma2112,
                      precision=double_format,
                      tag="fma2212")
        # testing FMA2122
        fma2122 = FMA(fma2212.lo,
                      fma2212,
                      fma2212,
                      precision=double_format,
                      tag="fma2122")
        # testing FMA22222
        fma2222 = FMA(fma2122,
                      fma2212,
                      fma2111,
                      precision=double_format,
                      tag="fma2222")
        # testing Add122
        add122 = Addition(fma2222,
                          fma2222,
                          precision=self.precision,
                          tag="add122")
        # testing Add112
        add112 = Addition(add122,
                          fma2222,
                          precision=self.precision,
                          tag="add112")
        # testing Add121
        add121 = Addition(fma2222,
                          add112,
                          precision=self.precision,
                          tag="add121")
        # testing subnormalization
        multi_subnormalize = SpecificOperation(
            Addition(add121, add112, precision=double_format),
            Constant(3, precision=self.precision.get_integer_format()),
            specifier=SpecificOperation.Subnormalize,
            precision=double_format,
            tag="multi_subnormalize")
        result = Conversion(multi_subnormalize, precision=self.precision)

        scheme = Statement(Return(result))

        return scheme
Esempio n. 24
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=S2**-24,
                            odd=False,
                            even=False):
    """ Generate a piecewise approximation

        :param function: function to be approximated
        :type function: SollyaObject
        :param variable: input variable
        :type variable: Variable
        :param precision: variable's format
        :type precision: ML_Format
        :param bound_low: lower bound for the approximation interval
        :param bound_high: upper bound for the approximation interval
        :param num_intervals: number of sub-interval / sub-division of the main interval
        :param max_degree: maximum degree for an approximation on any sub-interval
        :param error_threshold: error bound for an approximation on any sub-interval

        :return: pair (scheme, error) where scheme is a graph node for an
            approximation scheme of function evaluated at variable, and error
            is the maximum approximation error encountered
        :rtype tuple(ML_Operation, SollyaObject): """

    degree_generator = piecewise_approximation_degree_generator(
        function,
        bound_low,
        bound_high,
        num_intervals=num_intervals,
        error_threshold=error_threshold,
    )
    degree_list = list(degree_generator)

    # if max_degree is None then we determine it locally
    if max_degree is None:
        max_degree = max(degree_list)
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(
        dimensions=[num_intervals, max_degree + 1],
        storage_precision=precision,
        tag="coeff_table",
        const=True  # by default all approximation coeff table are const
    )

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = degree_list[i]
        if local_degree > max_degree:
            Log.report(
                Log.Warning,
                "local_degree {} exceeds max_degree bound ({}) in piecewise_approximation",
                local_degree, max_degree)
        # as max_degree defines the size of the table we can use
        # it as the degree for each sub-interval polynomial
        # as there is nothing to gain (yet) by using a smaller polynomial
        degree = max_degree  # min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            local_poly_degree_list = list(
                range(1 if even else 0, degree + 1, 2 if odd or even else 1))
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x) / sollya.x,
                local_poly_degree_list,
                [precision] * len(local_poly_degree_list),
                Interval(-subint_high * 0.95, subint_high),
                sollya.absolute,
                error_function=error_function)
            # multiply by sollya.x
            poly_object = poly_object.sub_poly(offset=-1)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                # try to see if function is constant on the interval (possible
                # failure cause for fpminmax)
                cst_value = precision.round_sollya_object(
                    function(subint_low), sollya.RN)
                accuracy = error_threshold
                diff_with_cst_range = sollya.supnorm(cst_value, local_function,
                                                     local_interval,
                                                     sollya.absolute, accuracy)
                diff_with_cst = sup(abs(diff_with_cst_range))
                if diff_with_cst < error_threshold:
                    Log.report(Log.Info, "constant polynomial detected")
                    poly_object = Polynomial([function(subint_low)] +
                                             [0] * degree)
                    approx_error = diff_with_cst
                else:
                    Log.report(
                        Log.error,
                        "degree: {} for index {}, diff_with_cst={} (vs error_threshold={}) ",
                        degree,
                        i,
                        diff_with_cst,
                        error_threshold,
                        error=err)
        for ci in range(max_degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        if approx_error > error_threshold:
            Log.report(
                Log.Warning,
                "piecewise_approximation on index {} exceeds error threshold: {} > {}",
                i, approx_error, error_threshold)
        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       debug=debug_multi,
                       precision=precision)
    int_prec = precision.get_integer_format()

    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(
                        Multiplication(diff, delta_ratio, precision=precision),
                        precision=int_prec,
                    ), num_intervals - 1),
                tag="index",
                debug=debug_multi,
                precision=int_prec)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=debug_multi)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(max_degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error
Esempio n. 25
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
Esempio n. 26
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=sollya.S2**-24):
    """ To be documented """
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(dimensions=[num_intervals, max_degree + 1],
                              storage_precision=precision,
                              tag="coeff_table")

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        #local_function = function(sollya.x)
        #local_interval = Interval(subint_low, subint_high)
        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = sollya.guessdegree(local_function, local_interval,
                                          error_threshold)
        degree = min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            degree_list = range(1, degree + 1)
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x),
                degree_list, [precision] * len(degree_list),
                Interval(-subint_high, subint_high),
                sollya.absolute,
                error_function=error_function)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                print("degree: {}".format(degree))
                raise err
        for ci in range(degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       precision=precision)
    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(Multiplication(diff,
                                                  delta_ratio,
                                                  precision=precision),
                                   precision=ML_Int32), num_intervals - 1),
                tag="index",
                debug=True,
                precision=ML_Int32)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=True)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error