Esempio n. 1
0
def legalize_invsqrt_seed(optree):
    """ Legalize an InverseSquareRootSeed optree """
    assert isinstance(optree, ReciprocalSquareRootSeed)
    op_prec = optree.get_precision()
    # input = 1.m_hi-m_lo * 2^e
    # approx = 2^(-int(e/2)) * approx_insqrt(1.m_hi) * (e % 2 ? 1.0 : ~2**-0.5)
    op_input = optree.get_input(0)
    convert_back = False
    approx_prec = ML_Binary32

    if op_prec != approx_prec:
        op_input = Conversion(op_input, precision=ML_Binary32)
        convert_back = True

    # TODO: fix integer precision selection
    #       as we are in a late code generation stage, every node's precision
    #       must be set
    op_exp = ExponentExtraction(op_input,
                                tag="op_exp",
                                debug=debug_multi,
                                precision=ML_Int32)
    neg_half_exp = Division(Negation(op_exp, precision=ML_Int32),
                            Constant(2, precision=ML_Int32),
                            precision=ML_Int32)
    approx_exp = ExponentInsertion(neg_half_exp,
                                   tag="approx_exp",
                                   debug=debug_multi,
                                   precision=approx_prec)
    op_exp_parity = Modulo(op_exp,
                           Constant(2, precision=ML_Int32),
                           precision=ML_Int32)
    approx_exp_correction = Select(Equal(op_exp_parity,
                                         Constant(0, precision=ML_Int32)),
                                   Constant(1.0, precision=approx_prec),
                                   Select(Equal(
                                       op_exp_parity,
                                       Constant(-1, precision=ML_Int32)),
                                          Constant(S2**0.5,
                                                   precision=approx_prec),
                                          Constant(S2**-0.5,
                                                   precision=approx_prec),
                                          precision=approx_prec),
                                   precision=approx_prec,
                                   tag="approx_exp_correction",
                                   debug=debug_multi)
    table_index = invsqrt_approx_table.get_index_function()(op_input)
    table_index.set_attributes(tag="invsqrt_index", debug=debug_multi)
    approx = Multiplication(TableLoad(invsqrt_approx_table,
                                      table_index,
                                      precision=approx_prec),
                            Multiplication(approx_exp_correction,
                                           approx_exp,
                                           precision=approx_prec),
                            tag="invsqrt_approx",
                            debug=debug_multi,
                            precision=approx_prec)
    if approx_prec != op_prec:
        return Conversion(approx, precision=op_prec)
    else:
        return approx
Esempio n. 2
0
def legalize_reciprocal_seed(optree):
    """ Legalize an ReciprocalSeed optree """
    assert isinstance(optree, ReciprocalSeed)
    op_prec = optree.get_precision()
    initial_prec = op_prec
    back_convert = False
    op_input = optree.get_input(0)

    INV_APPROX_TABLE_FORMAT = generic_inv_approx_table.get_storage_precision()

    if op_prec != INV_APPROX_TABLE_FORMAT:
        op_input = Conversion(op_input, precision=INV_APPROX_TABLE_FORMAT)
        op_prec = INV_APPROX_TABLE_FORMAT
        back_convert = True
    # input = 1.m_hi-m_lo * 2^e
    # approx = 2^(-int(e/2)) * approx_insqrt(1.m_hi) * (e % 2 ? 1.0 : ~2**-0.5)

    # TODO: fix integer precision selection
    #       as we are in a late code generation stage, every node's precision
    #       must be set
    int_prec = op_prec.get_integer_format()
    op_sign = CopySign(op_input,
                       Constant(1.0, precision=op_prec),
                       precision=op_prec)
    op_exp = ExponentExtraction(op_input,
                                tag="op_exp",
                                debug=debug_multi,
                                precision=int_prec)
    neg_exp = Negation(op_exp, precision=int_prec)
    approx_exp = ExponentInsertion(neg_exp,
                                   tag="approx_exp",
                                   debug=debug_multi,
                                   precision=op_prec)
    table_index = generic_inv_approx_table.get_index_function()(op_input)
    table_index.set_attributes(tag="inv_index", debug=debug_multi)
    approx = Multiplication(TableLoad(generic_inv_approx_table,
                                      table_index,
                                      precision=op_prec),
                            Multiplication(approx_exp,
                                           op_sign,
                                           precision=op_prec),
                            tag="inv_approx",
                            debug=debug_multi,
                            precision=op_prec)
    if back_convert:
        return Conversion(approx, precision=initial_prec)
    else:
        return approx
Esempio n. 3
0
    def generate_approx_poly_near_zero(self, function, high_bound, error_bound,
                                       variable):
        """ Generate polynomial approximation scheme """
        error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(
            p - f, ai)
        # Some issues encountered when 0 is one of the interval bound
        # so we use a symetric interval around it
        approx_interval = Interval(2**-100, high_bound)
        local_function = function / sollya.x

        degree = sollya.sup(
            sollya.guessdegree(local_function, approx_interval, error_bound))
        degree_list = range(0, int(degree) + 4, 2)

        poly_object, approx_error = Polynomial.build_from_approximation_with_error(
            function / sollya.x,
            degree_list, [1] + [self.precision] * (len(degree_list) - 1),
            approx_interval,
            sollya.absolute,
            error_function=error_function)
        Log.report(
            Log.Info, "approximation poly: {}\n  with error {}".format(
                poly_object, approx_error))

        poly_scheme = Multiplication(
            variable,
            PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, variable, self.precision))
        return poly_scheme, approx_error
Esempio n. 4
0
def Mul212(x, yh, yl, fma=True):
    """ Multi-precision Multiplication:
        HI, LO = x * [yh:yl] """
    t1, t2 = Mul211(x, yh, fma)
    t3 = Multiplication(x, yl)
    t4 = Addition(t2, t3)
    return Add211(t1, t4)
Esempio n. 5
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", FIXED_FORMAT)
        # declaring specific interval for input variable <x>
        vx.set_interval(Interval(-1, 1))

        acc_format = ML_Custom_FixedPoint_Format(6, 58, False)

        c = Constant(2, precision=acc_format, tag="C2")

        ivx = vx
        add_ivx = Addition(
                    c,
                    Multiplication(ivx, ivx, precision=acc_format, tag="mul"),
                    precision=acc_format,
                    tag="add"
                  )
        result = add_ivx

        input_mapping = {ivx: ivx.get_precision().round_sollya_object(0.125)}
        error_eval_map = runtime_error_eval.generate_error_eval_graph(result, input_mapping)

        # dummy scheme to make functionnal code generation
        scheme = Statement()
        for node in error_eval_map:
            scheme.add(error_eval_map[node])
        scheme.add(Return(result))
        return scheme
Esempio n. 6
0
def Mul211(x, y, precision=None, fma=True):
    """ Multi-precision Multiplication HI, LO = x * y """
    zh = Multiplication(x, y, precision=precision)
    if fma == True:
        zl = FMS(x, y, zh, precision=precision)
    else:
        xh, xl = Split(x, precision=precision)
        yh, yl = Split(y, precision=precision)
        r1 = Multiplication(xh, yh, precision=precision)
        r2 = Subtraction(r1, zh, precision=precision)
        r3 = Multiplication(xh, yl, precision=precision)
        r4 = Multiplication(xl, yh, precision=precision)
        r5 = Multiplication(xl, yl, precision=precision)
        r6 = Addition(r2, r3, precision=precision)
        r7 = Addition(r6, r4, precision=precision)
        zl = Addition(r7, r5, precision=precision)
    return zh, zl
Esempio n. 7
0
def Mul211(x, y, fma=True):
    """ Multi-precision Multiplication HI, LO = x * y """
    zh = Multiplication(x, y)
    if fma == True:
        zl = FMS(x, y, zh)
    else:
        xh, xl = Split(x)
        yh, yl = Split(y)
        r1 = Multiplication(xh, yh)
        r2 = Subtraction(r1, zh)
        r3 = Multiplication(xh, yl)
        r4 = Multiplication(xl, yh)
        r5 = Multiplication(xl, yl)
        r6 = Addition(r2, r3)
        r7 = Addition(r6, r4)
        zl = Addition(r7, r5)
    return zh, zl
Esempio n. 8
0
    def generate_scheme(self):
        size_format = ML_Int32

        # Matrix storage
        in_storage = self.implementation.add_input_variable(
            "buffer_in", ML_Pointer_Format(self.precision))
        kernel_storage = self.implementation.add_input_variable(
            "buffer_kernel", ML_Pointer_Format(self.precision))
        out_storage = self.implementation.add_input_variable(
            "buffer_out", ML_Pointer_Format(self.precision))

        # Matrix sizes
        w = self.implementation.add_input_variable("w", size_format)
        h = self.implementation.add_input_variable("h", size_format)

        # A is a (n x p) matrix in row-major
        tIn = Tensor(in_storage,
                     TensorDescriptor([w, h], [1, w], self.precision))
        # B is a (p x m) matrix in row-major
        kernel_strides = [1]
        for previous_dim in self.kernel_size[:-1]:
            kernel_strides.append(previous_dim * kernel_strides[-1])
        print("kernel_strides: {}".format(kernel_strides))
        tKernel = Tensor(
            kernel_storage,
            TensorDescriptor(self.kernel_size, kernel_strides, self.precision))
        # C is a (n x m) matrix in row-major
        tOut = Tensor(out_storage,
                      TensorDescriptor([w, h], [1, w], self.precision))

        index_format = ML_Int32

        # main NDRange description
        i = Variable("i", precision=index_format, var_type=Variable.Local)
        j = Variable("j", precision=index_format, var_type=Variable.Local)
        k_w = Variable("k_w", precision=index_format, var_type=Variable.Local)
        k_h = Variable("k_h", precision=index_format, var_type=Variable.Local)
        result = NDRange([IterRange(i, 0, w - 1),
                          IterRange(j, 0, h - 1)],
                         WriteAccessor(
                             tOut, [i, j],
                             Sum(Sum(Multiplication(
                                 ReadAccessor(tIn, [i + k_w, j - k_h],
                                              self.precision),
                                 ReadAccessor(tKernel, [k_w, k_h],
                                              self.precision)),
                                     IterRange(k_w,
                                               -(self.kernel_size[0] - 1) // 2,
                                               (self.kernel_size[0] - 1) // 2),
                                     precision=self.precision),
                                 IterRange(k_h,
                                           -(self.kernel_size[1] - 1) // 2,
                                           (self.kernel_size[1] - 1) // 2),
                                 precision=self.precision)))

        mdl_scheme = expand_ndrange(result)
        print("mdl_scheme:\n{}".format(mdl_scheme.get_str(depth=None)))
        return Statement(mdl_scheme, Return())
Esempio n. 9
0
def Split(a, precision=None):
    """... splitting algorithm for Dekker TwoMul"""
    cst_value = {ML_Binary32: 4097, ML_Binary64: 134217729}[a.precision]
    s = Constant(cst_value, precision=a.get_precision(), tag='fp_split')
    c = Multiplication(s, a, precision=precision)
    tmp = Subtraction(a, c, precision=precision)
    ah = Addition(tmp, c, precision=precision)
    al = Subtraction(a, ah, precision=precision)
    return ah, al
Esempio n. 10
0
def Mul222(xh, xl, yh, yl, fma=True):
    """ Multi-precision Multiplication:
        HI, LO = [xh:xl] * [yh:yl] """
    if fma == True:
        ph = Multiplication(xh, yh)
        pl = FMS(xh, yh, ph)
        pl = FMA(xh, yl, pl)
        pl = FMA(xl, yh, pl)
        zh = Addition(ph, pl)
        zl = Subtraction(ph, zh)
        zl = Addition(zl, pl)
    else:
        t1, t2 = Mul211(xh, yh, fma)
        t3 = Multiplication(xh, yl)
        t4 = Multiplication(xl, yh)
        t5 = Addition(t3, t4)
        t6 = Addition(t2, t5)
        zh, zl = Add211(t1, t6)
    return zh, zl
Esempio n. 11
0
def Mul222(xh, xl, yh, yl):
    """ Multi-precision Multiplication:
        HI, LO = [xh:xl] * [yh:yl] """
    ph = Multiplication(xh, yh)
    pl = FMS(xh, yh, ph)
    pl = FMA(xh, yl, pl)
    pl = FMA(xl, yh, pl)
    zh = Addition(ph, pl)
    zl = Subtraction(ph, zh)
    zl = Addition(zl, pl)
    return zh, zl
Esempio n. 12
0
def Split(a):
    """... splitting algorithm for Dekker TwoMul"""
    # if a.get_precision() == ML_Binary32:
    s = Constant(4097, precision=a.get_precision(), tag='fp_split')
    # elif a.get_precision() == ML_Binary64:
    #    s = Constant(134217729, precision = a.get_precision(), tag = 'fp_split')
    c = Multiplication(s, a)
    tmp = Subtraction(a, c)
    ah = Addition(tmp, c)
    al = Subtraction(a, ah)
    return ah, al
Esempio n. 13
0
    def generate_scheme(self):
        var = self.implementation.add_input_variable("x", self.precision)
        var_y = self.implementation.add_input_variable("y", self.precision)
        var_z = self.implementation.add_input_variable("z", self.precision)
        mult = Multiplication(var, var_z, precision=self.precision)
        add = Addition(var_y, mult, precision=self.precision)

        test_program = Statement(
            add,
            Return(add)
        )
        return test_program
Esempio n. 14
0
    def generate_sin_scheme(self, computation_precision, tabulated_cos,
                            tabulated_sin, coeff_S2, coeff_C2, red_vx_lo):
        sin_C2 = Multiplication(tabulated_sin,
                                coeff_C2,
                                precision=ML_Custom_FixedPoint_Format(
                                    -1, 32, signed=True),
                                tag="sin_C2")
        u2 = Multiplication(
            red_vx_lo,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="u2")
        cos_u = Multiplication(
            tabulated_cos,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(1, 30, signed = True)
            tag="cos_u")

        S2_u2 = Multiplication(coeff_S2,
                               u2,
                               precision=ML_Custom_FixedPoint_Format(
                                   -1, 32, signed=True),
                               tag="S2_u2")

        sin_C2_u2 = Multiplication(sin_C2,
                                   u2,
                                   precision=computation_precision,
                                   tag="sin_C2_u2")

        S2_u3_cos = Multiplication(
            S2_u2,
            cos_u,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5,26, signed = True)
            tag="S2_u3_cos")

        sin_P_cos_u = Addition(
            tabulated_sin,
            cos_u,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="sin_P_cos_u")

        sin_P_cos_u_P_C2_u2_sin = Addition(
            sin_P_cos_u,
            sin_C2_u2,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="sin_P_cos_u_P_C2_u2_sin")

        scheme = Addition(
            sin_P_cos_u_P_C2_u2_sin,
            S2_u3_cos,
            precision=
            computation_precision  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
        )

        return scheme
Esempio n. 15
0
    def generate_scheme(self):
        size_format = ML_Int32

        # Matrix storage
        A_storage = self.implementation.add_input_variable("buffer_a", ML_Pointer_Format(self.precision))
        B_storage = self.implementation.add_input_variable("buffer_b", ML_Pointer_Format(self.precision))
        C_storage = self.implementation.add_input_variable("buffer_c", ML_Pointer_Format(self.precision))

        # Matrix sizes
        n = self.implementation.add_input_variable("n", size_format)
        m = self.implementation.add_input_variable("m", size_format)
        p = self.implementation.add_input_variable("p", size_format)


        # A is a (n x p) matrix in row-major
        tA = Tensor(A_storage, TensorDescriptor([p, n], [1, p], self.precision))
        # B is a (p x m) matrix in row-major
        tB = Tensor(B_storage, TensorDescriptor([m, p], [1, m], self.precision))
        # C is a (n x m) matrix in row-major
        tC = Tensor(C_storage, TensorDescriptor([m, n], [1, m], self.precision))

        index_format = ML_Int32

        #
        i = Variable("i", precision=index_format, var_type=Variable.Local)
        j = Variable("j", precision=index_format, var_type=Variable.Local)
        k = Variable("k", precision=index_format, var_type=Variable.Local)
        result = NDRange(
            [IterRange(j, 0, m-1), IterRange(i, 0, n -1)],
            WriteAccessor(
                tC, [j, i],
                Sum(
                    Multiplication(
                        ReadAccessor(tA, [k, i], self.precision),
                        ReadAccessor(tB, [j, k], self.precision),
                        precision=self.precision),
                    IterRange(k, 0, p - 1),
                    precision=self.precision)))

        #mdl_scheme = expand_ndrange(exchange_loop_order(tile_ndrange(result, {j: 2, i: 2}), [1, 0]))
        if self.vectorize:
            mdl_scheme = expand_ndrange(vectorize_ndrange(result, j, 4))
        else:
            mdl_scheme = expand_ndrange(exchange_loop_order(tile_ndrange(result, {j: 2, i: 2}), [1, 0]))
        print("mdl_scheme:\n{}".format(mdl_scheme.get_str(depth=None, display_precision=True)))
        return Statement(
            mdl_scheme,
            Return()
        )
Esempio n. 16
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=sollya.S2**-24):
    """ To be documented """
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(dimensions=[num_intervals, max_degree + 1],
                              storage_precision=precision,
                              tag="coeff_table")

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        #local_function = function(sollya.x)
        #local_interval = Interval(subint_low, subint_high)
        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = sollya.guessdegree(local_function, local_interval,
                                          error_threshold)
        degree = min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            degree_list = range(1, degree + 1)
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x),
                degree_list, [precision] * len(degree_list),
                Interval(-subint_high, subint_high),
                sollya.absolute,
                error_function=error_function)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                print("degree: {}".format(degree))
                raise err
        for ci in range(degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       precision=precision)
    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(Multiplication(diff,
                                                  delta_ratio,
                                                  precision=precision),
                                   precision=ML_Int32), num_intervals - 1),
                tag="index",
                debug=True,
                precision=ML_Int32)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=True)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error
Esempio n. 17
0
    def generate_scheme(self):
        # declaring function input variable
        v_x = [
            self.implementation.add_input_variable(
                "x%d" % index, self.get_input_precision(index))
            for index in range(self.arity)
        ]

        double_format = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble
        }[self.precision]

        # testing Add211
        exact_add = Addition(v_x[0],
                             v_x[1],
                             precision=double_format,
                             tag="exact_add")
        # testing Mul211
        exact_mul = Multiplication(v_x[0],
                                   v_x[1],
                                   precision=double_format,
                                   tag="exact_mul")
        # testing Sub211
        exact_sub = Subtraction(v_x[1],
                                v_x[0],
                                precision=double_format,
                                tag="exact_sub")
        # testing Add222
        multi_add = Addition(exact_add,
                             exact_sub,
                             precision=double_format,
                             tag="multi_add")
        # testing Mul222
        multi_mul = Multiplication(multi_add,
                                   exact_mul,
                                   precision=double_format,
                                   tag="multi_mul")
        # testing Add221 and Add212 and Sub222
        multi_sub = Subtraction(Addition(exact_sub,
                                         v_x[1],
                                         precision=double_format,
                                         tag="add221"),
                                Addition(v_x[0],
                                         multi_mul,
                                         precision=double_format,
                                         tag="add212"),
                                precision=double_format,
                                tag="sub222")
        # testing Mul212 and Mul221
        mul212 = Multiplication(multi_sub,
                                v_x[0],
                                precision=double_format,
                                tag="mul212")
        mul221 = Multiplication(exact_mul,
                                v_x[1],
                                precision=double_format,
                                tag="mul221")
        # testing Sub221 and Sub212
        sub221 = Subtraction(mul212,
                             mul221.hi,
                             precision=double_format,
                             tag="sub221")
        sub212 = Subtraction(sub221,
                             mul212.lo,
                             precision=double_format,
                             tag="sub212")
        # testing FMA2111
        fma2111 = FMA(sub221.lo,
                      sub212.hi,
                      mul221.hi,
                      precision=double_format,
                      tag="fma2111")
        # testing FMA2112
        fma2112 = FMA(fma2111.lo,
                      fma2111.hi,
                      fma2111,
                      precision=double_format,
                      tag="fma2112")
        # testing FMA2212
        fma2212 = FMA(fma2112,
                      fma2112.hi,
                      fma2112,
                      precision=double_format,
                      tag="fma2212")
        # testing FMA2122
        fma2122 = FMA(fma2212.lo,
                      fma2212,
                      fma2212,
                      precision=double_format,
                      tag="fma2122")
        # testing FMA22222
        fma2222 = FMA(fma2122,
                      fma2212,
                      fma2111,
                      precision=double_format,
                      tag="fma2222")
        # testing Add122
        add122 = Addition(fma2222,
                          fma2222,
                          precision=self.precision,
                          tag="add122")
        # testing Add112
        add112 = Addition(add122,
                          fma2222,
                          precision=self.precision,
                          tag="add112")
        # testing Add121
        add121 = Addition(fma2222,
                          add112,
                          precision=self.precision,
                          tag="add121")
        # testing subnormalization
        multi_subnormalize = SpecificOperation(
            Addition(add121, add112, precision=double_format),
            Constant(3, precision=self.precision.get_integer_format()),
            specifier=SpecificOperation.Subnormalize,
            precision=double_format,
            tag="multi_subnormalize")
        result = Conversion(multi_subnormalize, precision=self.precision)

        scheme = Statement(Return(result))

        return scheme
Esempio n. 18
0
    def generate_scalar_scheme(self, vx, vy):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        vy.set_attributes(tag="y")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # pow(x, n) = x^(y)
        #             = exp(y * log(x))
        #             = 2^(y * log2(x))
        #             = 2^(y * (log2(m) + e))
        #
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        m = MantissaExtraction(vx, tag="m", precision=self.precision)

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed, language=None,
            table_getter= lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x) # /sollya.log(self.basis)



        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(Abs(m, precision=self.precision), log_f, inv_approx_table, log_table)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        r = Multiplication(log_approx, vy, tag="r", debug=debug_multi)


        # 2^(y * (log2(m) + e)) = 2^(y * log2(m)) * 2^(y * e)
        #
        # log_approx = log2(Abs(m))
        # r = y * log_approx ~ y * log2(m)
        #
        # NOTES: manage cases where e is negative and
        # (y * log2(m)) AND (y * e) could cancel out
        # if e positive, whichever the sign of y (y * log2(m)) and (y * e) CANNOT
        # be of opposite signs

        # log2(m) in [0, 1[ so cancellation can occur only if e == -1
        # we split 2^x in 2^x = 2^t0 * 2^t1
        # if e < 0: t0 = y * (log2(m) + e), t1=0
        # else:     t0 = y * log2(m), t1 = y * e

        t_cond = e < 0

        # e_y ~ e * y
        e_f = Conversion(e, precision=self.precision)
        #t0 = Select(t_cond, (e_f + log_approx) * vy, Multiplication(e_f, vy), tag="t0")
        #NearestInteger(t0, precision=self.precision, tag="t0_int")

        EY = NearestInteger(e_f * vy, tag="EY", precision=self.precision)
        LY = NearestInteger(log_approx * vy, tag="LY", precision=self.precision)
        t0_int = Select(t_cond, EY + LY, EY, tag="t0_int")
        t0_frac = Select(t_cond, FMA(e_f, vy, -EY) + FMA(log_approx, vy, -LY) ,EY - t0_int, tag="t0_frac")
        #t0_frac.set_attributes(tag="t0_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)

        exp2_t0_frac = ml_exp2.generate_scalar_scheme(t0_frac, inline_select=True)
        exp2_t0_frac.set_attributes(tag="exp2_t0_frac", debug=debug_multi)

        exp2_t0_int = ExponentInsertion(Conversion(t0_int, precision=int_precision), precision=self.precision, tag="exp2_t0_int")

        t1 = Select(t_cond, Constant(0, precision=self.precision), r)
        exp2_t1 = ml_exp2.generate_scalar_scheme(t1, inline_select=True)
        exp2_t1.set_attributes(tag="exp2_t1", debug=debug_multi)

        result_sign = Constant(1.0, precision=self.precision) # Select(n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        y_int = NearestInteger(vy, precision=self.precision)
        y_is_integer = Equal(y_int, vy)
        y_is_even = LogicalOr(
            # if y is a number (exc. inf) greater than 2**mantissa_size * 2,
            # then it is an integer multiple of 2 => even
            Abs(vy) >= 2**(self.precision.get_mantissa_size()+1),
            LogicalAnd(
                y_is_integer and Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                # we want to limit the modulo computation to an integer input
                Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 0)
            )
        )
        y_is_odd = LogicalAnd(
            LogicalAnd(
                Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                y_is_integer
            ),
            Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 1)
        )


        # special cases management
        special_case_results = Statement(
            # x is sNaN OR y is sNaN
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsSignalingNaN), Test(vy, specifier=Test.IsSignalingNaN)),
                Return(FP_QNaN(self.precision))
            ),
            # pow(x, ±0) is 1 if x is not a signaling NaN
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Return(Constant(1.0, precision=self.precision))
            ),
            # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy < 0)),
                Return(Select(Test(vx, specifier=Test.IsPositiveZero), FP_PlusInfty(self.precision), FP_MinusInfty(self.precision))),
            ),
            # pow(±0, −∞) is +∞ with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(±0, +∞) is +0 with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is ±0 for finite y>0 an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy > 0)),
                Return(vx),
            ),
            # pow(−1, ±∞) is 1 with no exception
            ConditionBlock(
                LogicalAnd(Equal(vx, -1), Test(vy, specifier=Test.IsInfty)),
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(+1, y) is 1 for any y (even a quiet NaN)
            ConditionBlock(
                vx == 1,
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(x, +∞) is +0 for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +∞ for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +0 for a number y < 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy < 0),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +∞ for a number y > 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy > 0),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(−∞, y) is −0 for finite y < 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy < 0)),
                Return(FP_MinusZero(self.precision)),
            ),
            # pow(−∞, y) is −∞ for finite y > 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy > 0)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(−∞, y) is +0 for finite y < 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusZero(self.precision)),
            ),
            # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer
            # TODO: signal divideByZero exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +0 for finite y>0 and not an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusZero(self.precision)),
            ),
        )

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = Statement(
            special_case_results,
            # fallback default cases
            Return(result_sign * exp2_t1 * exp2_t0_int * exp2_t0_frac))
        return result
Esempio n. 19
0
def Mul211(x, y):
    """ Multi-precision Multiplication HI, LO = x * y """
    zh = Multiplication(x, y)
    zl = FusedMultiplyAdd(x, y, zh, specifier=FusedMultiplyAdd.Subtract)
    return zh, zl
Esempio n. 20
0
    def generate_scalar_scheme(self, vx, n):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        n.set_attributes(tag="n")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # rootn(x, n) = x^(1/n)
        #             = exp(1/n * log(x))
        #             = 2^(1/n * log2(x))
        #             = 2^(1/n * (log2(m) + e))
        #

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        use_reciprocal = False

        # non-scaled vx used to compute vx^1
        unmodified_vx = vx

        is_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="is_subnormal")
        exp_correction_factor = self.precision.get_mantissa_size()
        mantissa_factor = Constant(2**exp_correction_factor,
                                   tag="mantissa_factor")
        vx = Select(is_subnormal, vx * mantissa_factor, vx, tag="corrected_vx")

        m = MantissaExtraction(vx, tag="m", precision=self.precision)
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        e = Select(is_subnormal,
                   e - exp_correction_factor,
                   e,
                   tag="corrected_e")

        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision,
                                                     basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(
            log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(
            Abs(m, precision=self.precision), log_f, inv_approx_table,
            log_table)
        # floating-point version of n
        n_f = Conversion(n, precision=self.precision, tag="n_f")
        inv_n = Division(Constant(1, precision=self.precision), n_f)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision),
                            log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        if use_reciprocal:
            r = Multiplication(log_approx, inv_n, tag="r", debug=debug_multi)
        else:
            r = Division(log_approx, n_f, tag="r", debug=debug_multi)

        # e_n ~ e / n
        e_f = Conversion(e, precision=self.precision, tag="e_f")
        if use_reciprocal:
            e_n = Multiplication(e_f, inv_n, tag="e_n")
        else:
            e_n = Division(e_f, n_f, tag="e_n")
        error_e_n = FMA(e_n, -n_f, e_f, tag="error_e_n")
        e_n_int = NearestInteger(e_n, precision=self.precision, tag="e_n_int")
        pre_e_n_frac = e_n - e_n_int
        pre_e_n_frac.set_attributes(tag="pre_e_n_frac")
        e_n_frac = pre_e_n_frac + error_e_n * inv_n
        e_n_frac.set_attributes(tag="e_n_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)
        exp2_r = ml_exp2.generate_scalar_scheme(r, inline_select=True)
        exp2_r.set_attributes(tag="exp2_r", debug=debug_multi)

        exp2_e_n_frac = ml_exp2.generate_scalar_scheme(e_n_frac,
                                                       inline_select=True)
        exp2_e_n_frac.set_attributes(tag="exp2_e_n_frac", debug=debug_multi)

        exp2_e_n_int = ExponentInsertion(Conversion(e_n_int,
                                                    precision=int_precision),
                                         precision=self.precision,
                                         tag="exp2_e_n_int")

        n_is_even = Equal(Modulo(n, 2), 0, tag="n_is_even", debug=debug_multi)
        n_is_odd = LogicalNot(n_is_even, tag="n_is_odd")
        result_sign = Select(
            n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        # managing n == -1
        if self.expand_div:
            ml_division_args = ML_Division.get_default_args(
                precision=self.precision, input_formats=[self.precision] * 2)
            ml_division = ML_Division(ml_division_args)
            self.division_implementation = ml_division.implementation
            self.division_implementation.set_scheme(
                ml_division.generate_scheme())
            ml_division_fct = self.division_implementation.get_function_object(
            )
        else:
            ml_division_fct = Division

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = ConditionBlock(
            LogicalOr(LogicalOr(Test(vx, specifier=Test.IsNaN), Equal(n, 0)),
                      LogicalAnd(n_is_even, vx < 0)),
            Return(FP_QNaN(self.precision)),
            Statement(
                ConditionBlock(
                    Equal(n, -1, tag="n_is_mone"),
                    #Return(Division(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)),
                    Return(
                        ml_division_fct(Constant(1, precision=self.precision),
                                        unmodified_vx,
                                        tag="div_res",
                                        precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±inf, n) is +∞ for even n< 0.
                    Test(vx, specifier=Test.IsInfty),
                    Statement(
                        ConditionBlock(
                            n < 0,
                            #LogicalAnd(n_is_odd, n < 0),
                            Return(
                                Select(Test(vx,
                                            specifier=Test.IsPositiveInfty),
                                       Constant(FP_PlusZero(self.precision),
                                                precision=self.precision),
                                       Constant(FP_MinusZero(self.precision),
                                                precision=self.precision),
                                       precision=self.precision)),
                            Return(vx),
                        ), ),
                ),
                ConditionBlock(
                    # rootn(±0, n) is ±∞ for odd n < 0.
                    LogicalAnd(LogicalAnd(n_is_odd, n < 0),
                               Equal(vx, 0),
                               tag="n_is_odd_and_neg"),
                    Return(
                        Select(Test(vx, specifier=Test.IsPositiveZero),
                               Constant(FP_PlusInfty(self.precision),
                                        precision=self.precision),
                               Constant(FP_MinusInfty(self.precision),
                                        precision=self.precision),
                               precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±0, n) is +∞ for even n< 0.
                    LogicalAnd(LogicalAnd(n_is_even, n < 0), Equal(vx, 0)),
                    Return(FP_PlusInfty(self.precision))),
                ConditionBlock(
                    # rootn(±0, n) is +0 for even n > 0.
                    LogicalAnd(n_is_even, Equal(vx, 0)),
                    Return(vx)),
                ConditionBlock(
                    Equal(n, 1), Return(unmodified_vx),
                    Return(result_sign * exp2_r * exp2_e_n_int *
                           exp2_e_n_frac))))
        return result
    ra_1 = ReadAccessor(tB, [j, k], precision)

    # ra_0 is dependent from <i>, so vectorization should lead to a VectorLoad/Gather
    vectorized_ra_0 = vectorize_read_accessor(ra_0, i, 4)
    print("{} vectorized into {}".format(ra_0, vectorized_ra_0))
    print("\n----\n")
    # ra_1 is independent from <i>, so vectorization should lead to a broadcast
    vectorized_ra_1 = vectorize_read_accessor(ra_1, i, 4)
    print("{} vectorized into {}".format(ra_1, vectorized_ra_1))
    print("\n----\n")
    vectorized_ra_2 = vectorize_read_accessor(ra_0, k, 4)
    print("{} vectorized into {}".format(ra_0, vectorized_ra_2))
    print("\n----\n")

    offseted_ra_0 = offset_read_accessor(ra_0, {k: 3, i: 7})
    print("{} offset k->3 into {}".format(ra_0, offseted_ra_0))
    print("\n----\n")

    kernel = WriteAccessor(
        tC, [j, i],
        Sum(Multiplication(ReadAccessor(tA, [k, i], precision),
                           ReadAccessor(tB, [j, k], precision),
                           precision=precision),
            IterRange(k, 0, p - 1),
            precision=precision))

    print("kernel is {}".format(kernel))
    vectorized_kernel = vectorize_kernel_value(kernel, j, 4)
    print("vectorized kernel is {}".format(vectorized_kernel))
    print("vectorized kernel expr is {}".format(vectorized_kernel.value_expr))
Esempio n. 22
0
def mll_implementpoly_horner(ctx, poly_object, eps, variable):
    """ generate an implementation of polynomail @p poly_object of @p variable
        whose evalution error is bounded by @p eps. @p variable must have a
        interval and a precision set

        :param ctx: multi-word precision context to use
        :type ctx: MLL_Context
        :param poly_object: polynomial object to implement
        :type poly_object:
        :param eps: target relative error bound
        :param variable: polynomial input variable
        :type variable: ML_Operation

        :return: <implementation node>, <real relative error>
        :rtype: tuple(ML_Operation, SollyaObject)
    """
    if poly_object.degree == 0:
        # constant only
        cst = poly_object.coeff_map[0]
        rounded_cst = ctx.roundConstant(cst, eps)
        cst_format = ctx.computeConstantFormat(rounded_cst)
        return Constant(cst, precision=cst_format), cst_format.epsilon

    elif poly_object.degree == 1:
        # cst0 + cst1 * var
        # final relative error is
        # (cst0 (1 + e0) + cst1 * var (1 + e1) (1 + ev) (1 + em))(1 + ea)
        # (cst0  + e0 * cst0  + cst1 * var (1 + e1 + ev + e1 * ev) (1 + em))(1 + ea)
        # (cst0  + e0 * cst0  + cst1 * var (1 + e1 + ev + e1 * ev + em + e1 * em + ev * em + e1 * ev * em) )(1 + ea)
        # (cst0 + cst1 * var) (1 + ea) (1 + e0 * cst0 + + e1 + ev + e1 * ev + em + e1 * em + ev * em + e1 * ev * em)
        # em is epsilon for the multiplication
        # ea is epsilon for the addition
        # overall error is
        cst0 = poly_object.coeff_map[0]
        cst1 = poly_object.coeff_map[1]
        eps_mul = eps / 4
        eps_add = eps / 2

        cst1_rounded = ctx.roundConstant(cst1, eps / 4)
        cst1_error = abs((cst1 - cst1_rounded) / cst1_rounded)
        cst1_format = ctx.computeConstantFormat(cst1_rounded)
        cst0_rounded = ctx.roundConstant(cst0, eps / 4)
        cst0_format = ctx.computeConstantFormat(cst0_rounded)

        eps_var = eps / 4
        var_format = ctx.computeNeededVariableFormat(variable.interval,
                                                     eps_var,
                                                     variable.precision)
        var_node = legalize_node_format(variable, var_format)
        mul_format = ctx.computeOutputFormatMultiplication(
            eps_mul, cst1_format, var_format)
        add_format = ctx.computeOutputFormatAddition(eps_add, cst0_format,
                                                     mul_format)

        return Addition(
            Constant(cst0_rounded, precision=cst0_format),
            Multiplication(Constant(cst1_rounded, precision=cst1_format),
                           var_node,
                           precision=mul_format),
            precision=add_format), add_format.epsilon  # TODO: local error only

    elif poly_object.degree > 1:
        # cst0 + var * poly
        cst0 = poly_object.coeff_map[0]
        cst0_rounded = ctx.roundConstant(cst0, eps / 4)
        cst0_format = ctx.computeConstantFormat(cst0_rounded)

        eps_var = eps / 4
        var_format = ctx.computeNeededVariableFormat(variable.interval,
                                                     eps_var,
                                                     variable.precision)
        var_node = legalize_node_format(variable, var_format)

        sub_poly = poly_object.sub_poly(start_index=1, offset=1)
        eps_poly = eps / 4
        poly_node, poly_accuracy = mll_implementpoly_horner(
            ctx, sub_poly, eps_poly, variable)

        eps_mul = eps / 4
        mul_format = ctx.computeOutputFormatMultiplication(
            eps_mul, var_format, poly_node.precision)

        eps_add = eps / 4
        add_format = ctx.computeOutputFormatAddition(eps_add, cst0_format,
                                                     mul_format)

        return Addition(
            Constant(cst0_rounded, precision=cst0_format),
            Multiplication(var_node, poly_node, precision=mul_format),
            precision=add_format), add_format.epsilon  # TODO: local error only
    else:
        Log.report(Log.Error, "poly degree must be positive or null. {}, {}",
                   poly_object.degree, poly_object)
Esempio n. 23
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=S2**-24,
                            odd=False,
                            even=False):
    """ Generate a piecewise approximation

        :param function: function to be approximated
        :type function: SollyaObject
        :param variable: input variable
        :type variable: Variable
        :param precision: variable's format
        :type precision: ML_Format
        :param bound_low: lower bound for the approximation interval
        :param bound_high: upper bound for the approximation interval
        :param num_intervals: number of sub-interval / sub-division of the main interval
        :param max_degree: maximum degree for an approximation on any sub-interval
        :param error_threshold: error bound for an approximation on any sub-interval

        :return: pair (scheme, error) where scheme is a graph node for an
            approximation scheme of function evaluated at variable, and error
            is the maximum approximation error encountered
        :rtype tuple(ML_Operation, SollyaObject): """

    degree_generator = piecewise_approximation_degree_generator(
        function,
        bound_low,
        bound_high,
        num_intervals=num_intervals,
        error_threshold=error_threshold,
    )
    degree_list = list(degree_generator)

    # if max_degree is None then we determine it locally
    if max_degree is None:
        max_degree = max(degree_list)
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(
        dimensions=[num_intervals, max_degree + 1],
        storage_precision=precision,
        tag="coeff_table",
        const=True  # by default all approximation coeff table are const
    )

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = degree_list[i]
        if local_degree > max_degree:
            Log.report(
                Log.Warning,
                "local_degree {} exceeds max_degree bound ({}) in piecewise_approximation",
                local_degree, max_degree)
        # as max_degree defines the size of the table we can use
        # it as the degree for each sub-interval polynomial
        # as there is nothing to gain (yet) by using a smaller polynomial
        degree = max_degree  # min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            local_poly_degree_list = list(
                range(1 if even else 0, degree + 1, 2 if odd or even else 1))
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x) / sollya.x,
                local_poly_degree_list,
                [precision] * len(local_poly_degree_list),
                Interval(-subint_high * 0.95, subint_high),
                sollya.absolute,
                error_function=error_function)
            # multiply by sollya.x
            poly_object = poly_object.sub_poly(offset=-1)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                # try to see if function is constant on the interval (possible
                # failure cause for fpminmax)
                cst_value = precision.round_sollya_object(
                    function(subint_low), sollya.RN)
                accuracy = error_threshold
                diff_with_cst_range = sollya.supnorm(cst_value, local_function,
                                                     local_interval,
                                                     sollya.absolute, accuracy)
                diff_with_cst = sup(abs(diff_with_cst_range))
                if diff_with_cst < error_threshold:
                    Log.report(Log.Info, "constant polynomial detected")
                    poly_object = Polynomial([function(subint_low)] +
                                             [0] * degree)
                    approx_error = diff_with_cst
                else:
                    Log.report(
                        Log.error,
                        "degree: {} for index {}, diff_with_cst={} (vs error_threshold={}) ",
                        degree,
                        i,
                        diff_with_cst,
                        error_threshold,
                        error=err)
        for ci in range(max_degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        if approx_error > error_threshold:
            Log.report(
                Log.Warning,
                "piecewise_approximation on index {} exceeds error threshold: {} > {}",
                i, approx_error, error_threshold)
        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       debug=debug_multi,
                       precision=precision)
    int_prec = precision.get_integer_format()

    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(
                        Multiplication(diff, delta_ratio, precision=precision),
                        precision=int_prec,
                    ), num_intervals - 1),
                tag="index",
                debug=debug_multi,
                precision=int_prec)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=debug_multi)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(max_degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error
Esempio n. 24
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        precision_ptr = self.get_input_precision(0)
        index_format = self.get_input_precision(2)

        dst = self.implementation.add_input_variable("dst", precision_ptr)
        src = self.implementation.add_input_variable("src", precision_ptr)
        n = self.implementation.add_input_variable("len", index_format)

        i = Variable("i", precision=index_format, var_type=Variable.Local)
        CU1 = Constant(1, precision=index_format)
        CU0 = Constant(0, precision=index_format)
        inc = i + CU1

        elt_input = TableLoad(src, i, precision=self.precision)

        local_exp = Variable("local_exp",
                             precision=self.precision,
                             var_type=Variable.Local)

        if self.use_libm_function:
            libm_exp_operator = FunctionOperator("expf", arity=1)
            libm_exp = FunctionObject("expf", [ML_Binary32], ML_Binary32,
                                      libm_exp_operator)

            elt_result = ReferenceAssign(local_exp, libm_exp(elt_input))
        else:
            exponential_args = ML_Exponential.get_default_args(
                precision=self.precision,
                libm_compliant=False,
                debug=False,
            )

            meta_exponential = ML_Exponential(exponential_args)
            exponential_scheme = meta_exponential.generate_scheme()

            elt_result = inline_function(
                exponential_scheme,
                local_exp,
                {meta_exponential.implementation.arg_list[0]: elt_input},
            )

        elt_acc = Variable("elt_acc",
                           precision=self.precision,
                           var_type=Variable.Local)

        exp_loop = Loop(
            ReferenceAssign(i, CU0),
            i < n,
            Statement(ReferenceAssign(local_exp, 0), elt_result,
                      TableStore(local_exp, dst, i, precision=ML_Void),
                      ReferenceAssign(elt_acc, elt_acc + local_exp),
                      ReferenceAssign(i, i + CU1)),
        )

        sum_rcp = Division(1,
                           elt_acc,
                           precision=self.precision,
                           tag="sum_rcp",
                           debug=debug_multi)

        div_loop = Loop(
            ReferenceAssign(i, CU0),
            i < n,
            Statement(
                TableStore(Multiplication(
                    TableLoad(dst, i, precision=self.precision), sum_rcp),
                           dst,
                           i,
                           precision=ML_Void), ReferenceAssign(i, inc)),
        )

        main_scheme = Statement(ReferenceAssign(elt_acc, 0), exp_loop, sum_rcp,
                                div_loop)

        return main_scheme