Beispiel #1
0
    def generate_scheme(self):
        #func_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = self.implementation.add_input_variable("x", self.precision)

        table_size = 16
        row_size = 2

        new_table = ML_NewTable(dimensions=[table_size, row_size],
                                storage_precision=self.precision)
        for i in range(table_size):
            new_table[i][0] = i
            new_table[i][1] = i + 1

        index = Modulo(vx,
                       Constant(table_size, precision=ML_Int32),
                       precision=ML_Int32)
        load_value_lo = TableLoad(new_table,
                                  index,
                                  Constant(0, precision=ML_Int32),
                                  precision=self.precision)
        load_value_hi = TableLoad(new_table,
                                  index,
                                  Constant(1, precision=ML_Int32),
                                  precision=self.precision)

        Log.report(Log.Info,
                   "table interval: {}".format(new_table.get_interval()))

        out_table = ML_NewTable(dimensions=[table_size],
                                storage_precision=self.precision,
                                empty=True)

        result = Addition(load_value_lo,
                          load_value_hi,
                          precision=self.precision)

        scheme = Statement(
            TableStore(
                result,
                out_table,
                Constant(13, precision=ML_Int32),
                precision=ML_Void,
            ),
            Return(
                TableLoad(out_table,
                          Constant(13, precision=ML_Int32),
                          precision=self.precision),
                precision=self.precision,
            ))
        return scheme
Beispiel #2
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", ML_Binary32)
        px = self.implementation.add_input_variable("px", ML_Binary32_p)

        result = vx * vx
        # pointer dereferencing and value assignment
        px_assign = ReferenceAssign(Dereference(px, precision=ML_Binary32),
                                    result)

        # pointer to pointer cast
        py = Variable("py", precision=ML_Binary64_p, vartype=Variable.Local)
        py_assign = ReferenceAssign(py, TypeCast(px, precision=ML_Binary64_p))

        table_size = 16
        row_size = 2

        new_table = ML_NewTable(dimensions=[table_size, row_size],
                                storage_precision=self.precision)
        for i in range(table_size):
            new_table[i][0] = i
            new_table[i][1] = i + 1
        # cast between table and pointer
        pz = Variable("pz",
                      precision=ML_Pointer_Format(self.precision),
                      vartype=Variable.Local)
        pz_assign = ReferenceAssign(
            pz, TypeCast(new_table, precision=ML_Binary64_p))

        scheme = Statement(px_assign, py_assign, pz_assign)

        return scheme
Beispiel #3
0
    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        add_xx = Addition(vx, vx, precision=self.precision)
        mult = Multiplication(add_xx, vx, precision=self.precision)
        cst = Constant(1.1, precision=self.precision)

        index_size = 4
        table_size = 2**index_size

        table = ML_NewTable(dimensions=[table_size],
                            storage_precision=self.precision)
        for i in range(table_size):
            table[i] = i

        index = NearestInteger(vx, precision=ML_Int32)
        # index = index % table_size = index & (2**index_size - 1)
        index = BitLogicAnd(index,
                            Constant(2**index_size - 1, precision=ML_Int32),
                            precision=ML_Int32)

        index = BitLogicRightShift(index,
                                   Constant(1, precision=ML_Int32),
                                   precision=ML_Int32)

        table_value = TableLoad(table, index, precision=self.precision)

        int_tree = Multiplication(index,
                                  Addition(index,
                                           Constant(7, precision=ML_Int32),
                                           precision=ML_Int32),
                                  precision=ML_Int32)

        result = Multiplication(
            table_value,
            FusedMultiplyAdd(Addition(cst,
                                      Conversion(int_tree,
                                                 precision=self.precision),
                                      precision=self.precision),
                             mult,
                             add_xx,
                             specifier=FusedMultiplyAdd.Subtract,
                             precision=self.precision),
            precision=self.precision,
            tag="result")

        scheme = Return(result, precision=self.precision, debug=debug_multi)

        # conv_pass = Pass_M128_Promotion(self.processor)
        # new_scheme = conv_pass.execute(scheme)

        return scheme
Beispiel #4
0
    def generate_log_table(self, log_f, inv_approx_table):
        """ generate 2 tables:
            log_table[i] = 2-word unevaluated sum approximation of log_f(inv_approx_table[i])
            log_table_tho[i] = 2-word unevaluated sum approximation of log_f(2*inv_approx_table[i])
        """

        sollya_precision = self.get_input_precision().get_sollya_object()

        # table creation
        table_index_size = inv_approx_table.index_size
        table_index_range = range(1, 2**table_index_size)
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision,
                                const=True)
        log_table_tho = ML_NewTable(dimensions=[2**table_index_size, 2],
                                    storage_precision=self.precision,
                                    const=True)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        log_table_tho[0][0] = 0.0
        log_table_tho[0][1] = 0.0
        hi_size = self.precision.get_field_size() - (
            self.precision.get_exponent_size() + 1)
        for i in table_index_range:
            inv_value = inv_approx_table[i]
            value_high = round(log_f(inv_value), hi_size, sollya.RN)
            value_low = round(
                log_f(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

            inv_value_tho = S2 * inv_approx_table[i]
            value_high_tho = round(log_f(inv_value_tho), hi_size, sollya.RN)
            value_low_tho = round(
                log_f(inv_value_tho) - value_high_tho, sollya_precision,
                sollya.RN)
            log_table_tho[i][0] = value_high_tho
            log_table_tho[i][1] = value_low_tho

        return log_table, log_table_tho, table_index_range
Beispiel #5
0
    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", self.get_input_precision(0))

        bf16_params = ML_NewTable(dimensions=[self.table_size], storage_precision=BFloat16)
        for i in range(self.table_size):
            bf16_params[i] = 1.1**i

        conv_vx = Conversion(TableLoad(bf16_params, vx), precision=ML_Binary32, tag="conv_vx", debug=debug_multi)

        result = conv_vx

        scheme = Return(result, precision=self.precision, debug=debug_multi)

        return scheme
Beispiel #6
0
def generate_1d_table(dim,
                      storage_precision,
                      tag,
                      value_gen=lambda index: None,
                      empty=False,
                      const=True):
    """ generate a 1D ML_NewTable by using the given value generator @p value_gen """
    gen_table = ML_NewTable(dimensions=[dim],
                            storage_precision=storage_precision,
                            tag=tag,
                            const=const,
                            empty=empty)
    for i in range(dim):
        gen_table[i] = value_gen(i)
    return gen_table
Beispiel #7
0
def generate_2d_table(dim0,
                      dim1,
                      storage_precision,
                      tag,
                      value_gen=(lambda index0: None),
                      const=True):
    """ generate a 2D ML_NewTable by using the given value generator @p value_gen,
        values are generated one row at a time (rather than cell by cell) """
    gen_table = ML_NewTable(dimensions=[dim0, dim1],
                            storage_precision=storage_precision,
                            const=const,
                            tag=tag)
    for i0 in range(dim0):
        row_values = value_gen(i0)
        for i1 in range(dim1):
            gen_table[i0][i1] = row_values[i1]
    return gen_table
Beispiel #8
0
def generate_2d_multi_table(size_offset_list,
                            dim1,
                            storage_precision,
                            tag,
                            value_gen=lambda table_index, sub_row_index: None):
    """ generate a 2D multi-array stored in a ML_NewTable. 
        The multi-array dimensions are defined by the (size, offset) pairs in size_offset_list
        for the first dimension and @p dim1 for the second dimension.
        Table value are obtained by using the given value generator @p value_gen,
        values are generated one row at a time (rather than cell by cell) """
    # table first dimension is the sum of each sub-array size
    dim0 = sum(size_offset_list[sub_id][0]
               for sub_id in range(size_offset_list.dimensions[0]))

    gen_table = ML_NewTable(dimensions=[dim0, dim1],
                            storage_precision=storage_precision,
                            tag=tag)
    for table_index, (size, offset) in enumerate(size_offset_list):
        for i0 in range(size):
            row_values = value_gen(table_index, i0)
            for i1 in range(dim1):
                gen_table[offset + i0][i1] = row_values[i1]
    return gen_table
Beispiel #9
0
    def generate_scalar_scheme(self, vx, inline_select=False):
        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        # r_interval = Interval(0, 1.0)
        index_size = 3
        r_interval = Interval(-2**(-index_size), 2**-index_size)

        local_ulp = sup(ulp(2**r_interval, self.precision))
        Log.report(Log.Info, "ulp: ", local_ulp)
        error_goal = S2**-1 * local_ulp
        Log.report(Log.Info, "error goal: ", error_goal)

        sollya_precision = {
            ML_Binary32: sollya.binary32,
            ML_Binary64: sollya.binary64
        }[self.precision]
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        # Argument Reduction
        # r = x - floor(x), r >= 0
        vx_floor = Floor(vx,
                         precision=self.precision,
                         tag='vx_floor',
                         debug=debug_multi)
        vx_int = Conversion(vx_floor,
                            precision=int_precision,
                            tag="vx_int",
                            debug=debug_multi)
        vx_intf = vx_floor  # Conversion(vx_int, precision = self.precision)
        vx_r = vx - vx_intf
        r_hi = NearestInteger(vx_r * 2**index_size,
                              precision=self.precision,
                              tag="r_hi",
                              debug=debug_multi)
        # clamping r_hi_int within table-size to make sure
        # it does not exceeds hi_part_table when used to index it
        r_hi_int = Max(
            Min(
                Conversion(r_hi,
                           precision=int_precision,
                           tag="r_hi_int",
                           debug=debug_multi), 2**index_size + 1), 0)
        r_lo = vx_r - r_hi * 2**-index_size
        r_lo.set_attributes(tag="r_lo", debug=debug_multi)
        vx_r.set_attributes(tag="vx_r", debug=debug_multi)
        degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2
        precision_list = [1] + [self.precision] * degree

        exp_X = ExponentInsertion(vx_int,
                                  tag="exp_X",
                                  debug=debug_multi,
                                  precision=self.precision)

        #Polynomial Approx
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        poly_object, poly_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x) - 1, degree, precision_list, r_interval,
            sollya.absolute)
        Log.report(Log.Info, "Poly : %s" % poly_object)
        Log.report(Log.Info, "poly_error : ", poly_error)
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         r_lo,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        hi_part_table = ML_NewTable(dimensions=[2**index_size + 1],
                                    storage_precision=self.precision,
                                    tag=self.uniquify_name("exp2_table"),
                                    const=True)
        for i in range(2**index_size + 1):
            input_value = i * 2**-index_size
            tab_value = self.precision.round_sollya_object(
                sollya.SollyaObject(2)**(input_value))
            hi_part_table[i] = tab_value

        hi_part_value = TableLoad(hi_part_table,
                                  r_hi_int,
                                  precision=self.precision,
                                  tag="hi_part_value",
                                  debug=debug_multi)

        #Handling special cases
        oflow_bound = Constant(self.precision.get_emax() + 1,
                               precision=self.precision)
        subnormal_bound = self.precision.get_emin_subnormal()
        uflow_bound = self.precision.get_emin_normal()
        Log.report(Log.Info, "oflow : ", oflow_bound)
        #print "uflow : ", uflow_bound
        #print "sub : ", subnormal_bound
        test_overflow = Comparison(vx,
                                   oflow_bound,
                                   specifier=Comparison.GreaterOrEqual)
        test_overflow.set_attributes(tag="oflow_test",
                                     debug=debug_multi,
                                     likely=False,
                                     precision=ML_Bool)

        test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less)
        test_underflow.set_attributes(tag="uflow_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        test_subnormal = Comparison(vx,
                                    subnormal_bound,
                                    specifier=Comparison.Greater)
        test_subnormal.set_attributes(tag="sub_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        subnormal_offset = -(uflow_bound - vx_int)
        subnormal_offset.set_attributes(tag="offset", debug=debug_multi)
        exp_offset = ExponentInsertion(subnormal_offset,
                                       precision=self.precision,
                                       debug=debug_multi,
                                       tag="exp_offset")
        exp_min = ExponentInsertion(uflow_bound,
                                    precision=self.precision,
                                    debug=debug_multi,
                                    tag="exp_min")
        subnormal_result = hi_part_value * exp_offset * exp_min * poly + hi_part_value * exp_offset * exp_min

        test_std = LogicalOr(test_overflow,
                             test_underflow,
                             precision=ML_Bool,
                             tag="std_test",
                             likely=False,
                             debug=debug_multi)

        #Reconstruction
        result = hi_part_value * exp_X * poly + hi_part_value * exp_X
        result.set_attributes(tag="result", debug=debug_multi)

        C0 = Constant(0, precision=self.precision)

        if inline_select:
            scheme = Select(
                test_std,
                Select(test_overflow, FP_PlusInfty(self.precision),
                       Select(
                           test_subnormal,
                           subnormal_result,
                           C0,
                       )),
                result,
            )
            return scheme

        else:
            return_inf = Return(FP_PlusInfty(self.precision))
            return_C0 = Return(C0)
            return_sub = Return(subnormal_result)
            return_std = Return(result)

            non_std_statement = Statement(
                ConditionBlock(
                    test_overflow, return_inf,
                    ConditionBlock(test_subnormal, return_sub, return_C0)))

            scheme = Statement(
                ConditionBlock(test_std, non_std_statement, return_std))

            return scheme
Beispiel #10
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # constant computation
        invlog2 = round(1 / log(2), sollya_precision, sollya.RN)
        invlog2_cst = Constant(invlog2, precision=self.precision)

        #v_log2_hi = round(log(2), 16, sollya.RN)
        #v_log2_lo = round(log(2) - v_log2_hi, sollya_precision, sollya.RN)

        #log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi")
        #log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        v_log2_hi = round(
            log(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), sollya.RN)
        v_log2_lo = round(
            log(2) - v_log2_hi, self.precision.sollya_object, sollya.RN)
        log2_hi = Constant(v_log2_hi, precision=self.precision, tag="log2_hi")
        log2_lo = Constant(v_log2_lo, precision=self.precision, tag="log2_lo")

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debug_multi)

        int_precision = self.precision.get_integer_format()

        # table creation
        table_index_size = 7
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("inv_table"))
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        integer_precision = {
            ML_Binary32: ML_UInt32,
            ML_Binary64: ML_UInt64
        }[self.precision]

        for i in range(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            inv_value = inv_approx_table[
                i]  # (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            value_high = round(
                log(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          debug=debug_multi,
                                          precision=self.precision)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi)

            table_index = BitLogicAnd(BitLogicRightShift(
                TypeCast(_vx_mant, precision=int_precision, debug=debug_multi),
                self.precision.get_field_size() - 7,
                debug=debug_multi),
                                      0x7f,
                                      tag="table_index",
                                      debug=debug_multi)

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=integer_precision),
                Constant(-2, precision=integer_precision),
                precision=integer_precision),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0), 1.0,
                                   pre_arg_red_index)

            #_red_vx        = arg_red_index * _vx_mant - 1.0
            _red_vx = FusedMultiplyAdd(arg_red_index,
                                       _vx_mant,
                                       1.0,
                                       specifier=FusedMultiplyAdd.Subtract)
            _red_vx.set_attributes(tag="_red_vx", debug=debug_multi)

            inv_err = S2**-7
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_multi)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_multi)

            Log.report(Log.Verbose, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + sollya.x) / sollya.x, poly_degree,
                [1] + [self.precision] * (poly_degree), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Verbose, "generating polynomial evaluation scheme")
            #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
            _poly = PolynomialSchemeEvaluator.generate_estrin_scheme(
                poly_object, _red_vx, unified_precision=self.precision)

            _poly.set_attributes(tag="poly", debug=debug_multi)

            corr_exp = Conversion(
                _vx_exp if exp_corr_factor == None else _vx_exp +
                exp_corr_factor,
                precision=self.precision)
            split_red_vx = Split(_red_vx,
                                 precision=ML_DoubleDouble,
                                 tag="split_red_vx",
                                 debug=debug_multi)
            red_vx_hi = split_red_vx.hi
            red_vx_lo = split_red_vx.lo

            # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo
            pre_result = -_log_inv_hi + (_red_vx +
                                         (_red_vx * _poly +
                                          (corr_exp * log2_lo - _log_inv_lo)))
            pre_result.set_attributes(tag="pre_result", debug=debug_multi)
            exact_log2_hi_exp = corr_exp * log2_hi
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp",
                                             debug=debug_multi)
            cancel_part = (corr_exp * log2_hi - _log_inv_hi)
            cancel_part.set_attributes(tag="cancel_part", debug=debug_multi)
            sub_part = red_vx_hi + cancel_part
            sub_part.set_attributes(tag="sub_part", debug=debug_multi)
            #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part = ((red_vx_lo +
                                    (red_vx_lo * _poly +
                                     (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part.set_attributes(tag="result_one_low_part",
                                               debug=debug_multi)
            _result_one = (
                (sub_part) + red_vx_hi * _poly) + result_one_low_part
            return exact_log2_hi_exp + pre_result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one

        result, poly, log_inv_lo, log_inv_hi, red_vx, new_result_one = compute_log(
            vx)
        result.set_attributes(tag="result", debug=debug_multi)
        new_result_one.set_attributes(tag="new_result_one", debug=debug_multi)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debug_multi,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debug_multi,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debug_multi,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debug_multi,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debug_multi,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debug_multi,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debug_multi,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debug_multi)

        # exp=-1 case
        Log.report(Log.Verbose, "managing exp=-1 case")

        result2 = (-log_inv_hi - log2_hi) + (
            (red_vx + poly * red_vx) - log2_lo - log_inv_lo)
        result2.set_attributes(tag="result2", debug=debug_multi)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _, _ = compute_log(vx * S2100,
                                                      exp_corr_factor=m100)

        Log.report(Log.Verbose, "managing close to 1.0 cases")
        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log(1 + sollya.x) / sollya.x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            sollya.absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_multi)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one",
                                debug=debug_multi,
                                likely=False)

        # main scheme
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result2),
                                       Return(result))
                        #ConditionBlock(cond_one,
                        #Return(new_result_one),
                        #ConditionBlock(exp_mone,
                        #Return(result2),
                        #Return(result)
                        #)
                        #)
                    ))))
        scheme = pre_scheme

        return scheme
Beispiel #11
0
    def generate_scheme(self):
        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = self.precision

        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        # rounding mode input
        rnd_mode = self.implementation.add_input_signal(
            "rnd_mode", rnd_mode_format)

        # size of most significant table index (for linear slope tabulation)
        alpha = self.alpha  # 6
        # size of medium significant table index (for initial value table index LSB)
        beta = self.beta  # 5
        # size of least significant table index (for linear offset tabulation)
        gamma = self.gamma  # 5

        guard_bits = self.guard_bits  # 3

        vx.set_interval(self.interval)

        range_hi = sollya.sup(self.interval)
        range_lo = sollya.inf(self.interval)
        f_hi = self.function(range_hi)
        f_lo = self.function(range_lo)
        # fixed by format used for reduced_x
        range_size = range_hi - range_lo
        range_size_log2 = int(sollya.log2(range_size))
        assert 2**range_size_log2 == range_size

        print("range_size_log2={}".format(range_size_log2))

        reduced_x = Conversion(BitLogicRightShift(vx - range_lo,
                                                  range_size_log2),
                               precision=fixed_point(0,
                                                     alpha + beta + gamma,
                                                     signed=False),
                               tag="reduced_x",
                               debug=debug_fixed)

        alpha_index = get_fixed_slice(reduced_x,
                                      0,
                                      alpha - 1,
                                      align_hi=FixedPointPosition.FromMSBToLSB,
                                      align_lo=FixedPointPosition.FromMSBToLSB,
                                      tag="alpha_index",
                                      debug=debug_std)
        gamma_index = get_fixed_slice(reduced_x,
                                      gamma - 1,
                                      0,
                                      align_hi=FixedPointPosition.FromLSBToLSB,
                                      align_lo=FixedPointPosition.FromLSBToLSB,
                                      tag="gamma_index",
                                      debug=debug_std)

        beta_index = get_fixed_slice(reduced_x,
                                     alpha,
                                     gamma,
                                     align_hi=FixedPointPosition.FromMSBToLSB,
                                     align_lo=FixedPointPosition.FromLSBToLSB,
                                     tag="beta_index",
                                     debug=debug_std)

        # Assuming monotonic function
        f_absmax = max(abs(f_hi), abs(f_lo))
        f_absmin = min(abs(f_hi), abs(f_lo))

        f_msb = int(sollya.ceil(sollya.log2(f_absmax))) + 1
        f_lsb = int(sollya.floor(sollya.log2(f_absmin)))
        storage_lsb = f_lsb - io_precision.get_bit_size() - guard_bits

        f_int_size = f_msb
        f_frac_size = -storage_lsb

        storage_format = fixed_point(f_int_size, f_frac_size, signed=False)
        Log.report(Log.Info, "storage_format is {}".format(storage_format))

        # table of initial value index
        tiv_index = Concatenation(alpha_index,
                                  beta_index,
                                  tag="tiv_index",
                                  debug=debug_std)
        # table of offset value index
        to_index = Concatenation(alpha_index,
                                 gamma_index,
                                 tag="to_index",
                                 debug=debug_std)

        tiv_index_size = alpha + beta
        to_index_size = alpha + gamma

        Log.report(Log.Info, "initial table structures")
        table_iv = ML_NewTable(dimensions=[2**tiv_index_size],
                               storage_precision=storage_format,
                               tag="tiv")
        table_offset = ML_NewTable(dimensions=[2**to_index_size],
                                   storage_precision=storage_format,
                                   tag="to")

        slope_table = [None] * (2**alpha)
        slope_delta = 1.0 / sollya.SollyaObject(2**alpha)
        delta_u = range_size * slope_delta * 2**-15
        Log.report(Log.Info, "computing slope value")
        for i in range(2**alpha):
            # slope is computed at the middle of range_size interval
            slope_x = range_lo + (i + 0.5) * range_size * slope_delta
            # TODO: gross approximation of derivatives
            f_xpu = self.function(slope_x + delta_u / 2)
            f_xmu = self.function(slope_x - delta_u / 2)
            slope = (f_xpu - f_xmu) / delta_u
            slope_table[i] = slope

        range_rcp_steps = 1.0 / sollya.SollyaObject(2**tiv_index_size)
        Log.report(Log.Info, "computing value for initial-value table")
        for i in range(2**tiv_index_size):
            slope_index = i / 2**beta
            iv_x = range_lo + i * range_rcp_steps * range_size
            offset_x = 0.5 * range_rcp_steps * range_size
            # initial value is computed so that the piecewise linear
            # approximation intersects the function at iv_x + offset_x
            iv_y = self.function(
                iv_x + offset_x) - offset_x * slope_table[int(slope_index)]
            initial_value = storage_format.round_sollya_object(iv_y)
            table_iv[i] = initial_value

        # determining table of initial value interval
        tiv_min = table_iv[0]
        tiv_max = table_iv[0]
        for i in range(1, 2**tiv_index_size):
            tiv_min = min(tiv_min, table_iv[i])
            tiv_max = max(tiv_max, table_iv[i])
        table_iv.set_interval(Interval(tiv_min, tiv_max))

        offset_step = range_size / S2**(alpha + beta + gamma)
        for i in range(2**alpha):
            Log.report(Log.Info,
                       "computing offset value for sub-table {}".format(i))
            for j in range(2**gamma):
                to_i = i * 2**gamma + j
                offset = slope_table[i] * j * offset_step
                table_offset[to_i] = offset

        # determining table of offset interval
        to_min = table_offset[0]
        to_max = table_offset[0]
        for i in range(1, 2**(alpha + gamma)):
            to_min = min(to_min, table_offset[i])
            to_max = max(to_max, table_offset[i])
        offset_interval = Interval(to_min, to_max)
        table_offset.set_interval(offset_interval)

        initial_value = TableLoad(table_iv,
                                  tiv_index,
                                  precision=storage_format,
                                  tag="initial_value",
                                  debug=debug_fixed)

        offset_precision = get_fixed_type_from_interval(offset_interval, 16)
        print("offset_precision is {} ({} bits)".format(
            offset_precision, offset_precision.get_bit_size()))
        table_offset.get_precision().storage_precision = offset_precision

        # rounding table value
        for i in range(1, 2**(alpha + gamma)):
            table_offset[i] = offset_precision.round_sollya_object(
                table_offset[i])

        offset_value = TableLoad(table_offset,
                                 to_index,
                                 precision=offset_precision,
                                 tag="offset_value",
                                 debug=debug_fixed)

        Log.report(
            Log.Verbose,
            "initial_value's interval: {}, offset_value's interval: {}".format(
                evaluate_range(initial_value), evaluate_range(offset_value)))

        final_add = initial_value + offset_value
        round_bit = final_add  # + FixedPointPosition(final_add, io_precision.get_bit_size(), align=FixedPointPosition.FromMSBToLSB)

        vr_out = Conversion(initial_value + offset_value,
                            precision=io_precision,
                            tag="vr_out",
                            debug=debug_fixed)

        self.implementation.add_output_signal("vr_out", vr_out)

        # Approximation error evaluation
        approx_error = 0.0
        for i in range(2**alpha):
            for j in range(2**beta):
                tiv_i = (i * 2**beta + j)
                # = range_lo + tiv_i * range_rcp_steps * range_size
                iv = table_iv[tiv_i]
                for k in range(2**gamma):
                    to_i = i * 2**gamma + k
                    offset = table_offset[to_i]
                    approx_value = offset + iv
                    table_x = range_lo + range_size * (
                        (i * 2**beta + j) * 2**gamma + k) / S2**(alpha + beta +
                                                                 gamma)
                    local_error = abs(1 / (table_x) - approx_value)
                    approx_error = max(approx_error, local_error)
        error_log2 = float(sollya.log2(approx_error))
        print("approx_error is {}, error_log2 is {}".format(
            float(approx_error), error_log2))

        # table size
        table_iv_size = 2**(alpha + beta)
        table_offset_size = 2**(alpha + gamma)
        print("tables' size are {} entries".format(table_iv_size +
                                                   table_offset_size))

        return [self.implementation]
Beispiel #12
0
    def generate_scheme(self):
        memory_limit = 2500

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = input_var
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        ### Constants computations ###

        v_log2_hi = nearestint(log(2) * 2**-52) * 2**52
        v_log2_lo = round(log(2) - v_log2_hi, 64 + 53, sollya.RN)
        log2_hi = Constant(v_log2_hi, precision=self.precision, tag="log2_hi")
        log2_lo = Constant(v_log2_lo, precision=self.precision, tag="log2_lo")

        print "\n\033[1mSearch parameters for the argument reduction:\033[0m (this can take a while)"
        arg_reduc = self.generate_argument_reduction(memory_limit)

        print "\n\033[1mArgument reduction found:\033[0m [({},{}),({},{})] -> polynomials of degree {},{}, using {} bytes of memory".format(
            arg_reduc['size1'], arg_reduc['prec1'], arg_reduc['size2'],
            arg_reduc['prec2'], arg_reduc['degree_poly1'],
            arg_reduc['degree_poly2'], arg_reduc['sizeof_tables'])

        print "\n\033[1mGenerate the first logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(
            arg_reduc['length_table1'], arg_reduc['sizeof_table1'])
        inv_table_1 = ML_NewTable(
            dimensions=[arg_reduc['length_table1']],
            storage_precision=ML_Custom_FixedPoint_Format(
                1, arg_reduc['prec1'], False),
            tag=self.uniquify_name("inv_table_1"))
        log_table_1 = ML_NewTable(
            dimensions=[arg_reduc['length_table1']],
            storage_precision=ML_Custom_FixedPoint_Format(11, 128 - 11, False),
            tag=self.uniquify_name("log_table_1"))
        for i in range(0, arg_reduc['length_table1'] - 1):
            x1 = 1 + i / S2 * arg_reduc['size1']
            inv_x1 = ceil(S2**arg_reduc['prec1'] / x1) * S2**arg_reduc['prec1']
            log_x1 = floor(log(x1) * S2**(128 - 11)) * S2**(11 - 128)
            inv_table_1[
                i] = inv_x1  #Constant(inv_x1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False))
            log_table_1[
                i] = log_x1  #Constant(log_x1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))

        print "\n\033[1mGenerate the second logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(
            arg_reduc['length_table2'], arg_reduc['sizeof_table2'])
        inv_table_2 = ML_NewTable(
            dimensions=[arg_reduc['length_table2']],
            storage_precision=ML_Custom_FixedPoint_Format(
                1, arg_reduc['prec2'], False),
            tag=self.uniquify_name("inv_table_2"))
        log_table_2 = ML_NewTable(
            dimensions=[arg_reduc['length_table2']],
            storage_precision=ML_Custom_FixedPoint_Format(11, 128 - 11, False),
            tag=self.uniquify_name("log_table_2"))
        for i in range(0, arg_reduc['length_table2'] - 1):
            y1 = 1 + i / S2**arg_reduc['size2']
            inv_y1 = ceil(S2**arg_reduc['prec2'] / x1) * S2**arg_reduc['prec2']
            log_y1 = floor(log(inv_y1) * S2**(128 - 11)) * S2**(11 - 128)
            inv_table_2[
                i] = inv_y1  #Constant(inv_y1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False))
            log_table_2[
                i] = log_y1  #Constant(log_y1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False))

        ### Evaluation Scheme ###

        print "\n\033[1mGenerate the evaluation scheme:\033[0m"
        input_var = self.implementation.add_input_variable(
            "input_var", self.precision)
        ve = ExponentExtraction(input_var, tag="x_exponent", debug=debugd)
        vx = MantissaExtraction(input_var,
                                tag="x_mantissa",
                                precision=ML_Custom_FixedPoint_Format(
                                    0, 52, False),
                                debug=debug_lftolx)
        #vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = self.precision, debug = debug_lftolx)

        print "filtering and handling special cases"
        test_is_special_cases = LogicalNot(
            Test(input_var,
                 specifier=Test.IsIEEENormalPositive,
                 likely=True,
                 debug=debugd,
                 tag="is_special_cases"))
        handling_special_cases = Statement(
            ConditionBlock(
                Test(input_var, specifier=Test.IsSignalingNaN, debug=True),
                ExpRaiseReturn(ML_FPE_Invalid,
                               return_value=FP_QNaN(self.precision))),
            ConditionBlock(Test(input_var, specifier=Test.IsNaN, debug=True),
                           Return(input_var))  #,
            # TODO: add tests for x == 0 (raise DivideByZero, return -Inf), x < 0 (raise InvalidOperation, return qNaN)
            # all that remains is x is a subnormal positive
            #Statement(
            #  ReferenceAssign(Dereference(ve), Subtraction(ve, Subtraction(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(12, precision = ve.get_precision())))),
            #  ReferenceAssign(Dereference(vx), BitLogicLeftShift(vx, Addition(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(1, precision = ve.get_precision()))))
            #)
        )

        print "doing the argument reduction"
        v_dx = vx
        v_x1 = Conversion(v_dx,
                          tag='x1',
                          precision=ML_Custom_FixedPoint_Format(
                              0, arg_reduc['size1'], False),
                          rounding_mode=ML_RoundTowardMinusInfty)
        v_index_x = TypeCast(
            v_x1, tag='index_x', precision=ML_Int32
        )  #ML_Custom_FixedPoint_Format(v_x1.get_precision().get_c_bit_size(), 0, False))
        v_inv_x = TableLoad(inv_table_1, v_index_x, tag='inv_x')
        v_x = Addition(v_dx,
                       1,
                       tag='x',
                       precision=ML_Custom_FixedPoint_Format(1, 52, False))
        v_dy = Multiplication(v_x,
                              v_inv_x,
                              tag='dy',
                              precision=ML_Custom_FixedPoint_Format(
                                  0, 52 + arg_reduc['prec1'], False))
        v_y1 = Conversion(v_dy,
                          tag='y1',
                          precision=ML_Custom_FixedPoint_Format(
                              0, arg_reduc['size2'], False),
                          rounding_mode=ML_RoundTowardMinusInfty)
        v_index_y = TypeCast(
            v_y1, tag='index_y', precision=ML_Int32
        )  #ML_Custom_FixedPoint_Format(v_y1.get_precision().get_c_bit_size(), 0, False))
        v_inv_y = TableLoad(inv_table_2, v_index_y, tag='inv_y')
        v_y = Addition(v_dy,
                       1,
                       tag='y',
                       precision=ML_Custom_FixedPoint_Format(
                           1, 52 + arg_reduc['prec2'], False))
        # note that we limit the number of bits used to represent dz to 64.
        # we proved during the arg reduction that we can do that (sup(out_interval) < 2^(64-52-prec1-prec2))
        v_dz = Multiplication(
            v_y,
            v_inv_y,
            tag='z',
            precision=ML_Custom_FixedPoint_Format(
                64 - 52 - arg_reduc['prec1'] - arg_reduc['prec2'],
                52 + arg_reduc['prec1'] + arg_reduc['prec2'], False))
        # reduce the number of bits used to represent dz. we can do that

        print "doing the first polynomial evaluation"
        global_poly1_object = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, arg_reduc['degree_poly1'] - 1,
            [64] * (arg_reduc['degree_poly1']), arg_reduc['out_interval'],
            fixed, sollya.absolute)
        poly1_object = global_poly1_object.sub_poly(start_index=1)
        print global_poly1_object
        print poly1_object
        poly1 = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly1_object, v_dz, unified_precision=v_dz.get_precision())
        return ConditionBlock(test_is_special_cases, handling_special_cases,
                              Return(poly1))

        #approx_interval = Interval(0, 27021597764222975*S2**-61)

        #poly_degree = 1+sup(guessdegree(log(1+x)/x, approx_interval, S2**-(self.precision.get_field_size())))
        #global_poly_object = Polynomial.build_from_approximation(log(1+x)/x, poly_degree, [1] + [self.precision]*(poly_degree), approx_interval, sollya.absolute)
        #poly_object = global_poly_object.sub_poly(start_index = 1)
        #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)
        #_poly.set_attributes(tag = "poly", debug = debug_lftolx)
        """
Beispiel #13
0
    def generate_scheme(self):
        """ generate scheme """
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        # retrieving processor inverse approximation table
        lo_bound_global = SollyaObject(0.0)
        hi_bound_global = SollyaObject(0.75)
        approx_interval = Interval(lo_bound_global, hi_bound_global)
        approx_interval_size = hi_bound_global - lo_bound_global

        # table creation
        table_index_size = 7
        field_index_size = 2
        exp_index_size = table_index_size - field_index_size

        table_size = 2**table_index_size
        table_index_range = range(table_size)

        local_degree = 9
        coeff_table = ML_NewTable(dimensions=[table_size, local_degree],
                                  storage_precision=self.precision)

        exp_lo = 2**exp_index_size
        for i in table_index_range:
            lo_bound = (1.0 + (i % 2**field_index_size) * S2**-field_index_size
                        ) * S2**(i / 2**field_index_size - exp_lo)
            hi_bound = (1.0 +
                        ((i % 2**field_index_size) + 1) * S2**-field_index_size
                        ) * S2**(i / 2**field_index_size - exp_lo)
            local_approx_interval = Interval(lo_bound, hi_bound)
            local_poly_object, local_error = Polynomial.build_from_approximation_with_error(
                acos(1 - sollya.x), local_degree,
                [self.precision] * (local_degree + 1), local_approx_interval,
                sollya.absolute)
            local_error = int(
                log2(sup(abs(local_error / acos(1 - local_approx_interval)))))
            coeff_table
            for d in range(local_degree):
                coeff_table[i][d] = sollya.coeff(
                    local_poly_object.get_sollya_object(), d)

        table_index = BitLogicRightShift(
            vx,
            vx.get_precision().get_field_size() -
            field_index_size) - (exp_lo << field_index_size)

        print "building mathematical polynomial"
        poly_degree = sup(
            sollya.guessdegree(acos(x), approx_interval,
                               S2**-(self.precision.get_field_size())))
        print "guessed polynomial degree: ", int(poly_degree)
        #global_poly_object = Polynomial.build_from_approximation(log10(1+x)/x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute)

        print "generating polynomial evaluation scheme"
        #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision)

        # building eval error map
        #eval_error_map = {
        #  red_vx: Variable("red_vx", precision = self.precision, interval = red_vx.get_interval()),
        #  log_inv_hi: Variable("log_inv_hi", precision = self.precision, interval = table_high_interval),
        #  log_inv_lo: Variable("log_inv_lo", precision = self.precision, interval = table_low_interval),
        #}
        # computing gappa error
        #poly_eval_error = self.get_eval_error(result, eval_error_map)

        # main scheme
        print "MDL scheme"
        scheme = Statement(Return(vx))
        return scheme
Beispiel #14
0
def generate_payne_hanek(vx,
                         frac_pi,
                         precision,
                         n=100,
                         k=4,
                         chunk_num=None,
                         debug=False):
    """ generate payne and hanek argument reduction for frac_pi * variable """

    sollya.roundingwarnings = sollya.off
    debug_precision = debug_multi
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[precision]

    p = precision.get_field_size()

    # weight of the most significant digit of the constant
    cst_msb = floor(log2(abs(frac_pi)))
    # length of exponent range which must be covered by the approximation
    # of the constant
    cst_exp_range = cst_msb - precision.get_emin_subnormal() + 1

    # chunk size has to be so than multiplication by a splitted <v>
    # (vx_hi or vx_lo) is exact
    chunk_size = precision.get_field_size() / 2 - 2
    chunk_number = int(ceil((cst_exp_range + chunk_size - 1) / chunk_size))
    scaling_factor = S2**-(chunk_size / 2)

    chunk_size_cst = Constant(chunk_size, precision=ML_Int32)
    cst_msb_node = Constant(cst_msb, precision=ML_Int32)

    # Saving sollya's global precision
    old_global_prec = sollya.settings.prec
    sollya.settings.prec(cst_exp_range + n)

    # table to store chunk of constant multiplicand
    cst_table = ML_NewTable(dimensions=[chunk_number, 1],
                            storage_precision=precision,
                            tag="PH_cst_table")
    # table to store sqrt(scaling_factor) corresponding to the
    # cst multiplicand chunks
    scale_table = ML_NewTable(dimensions=[chunk_number, 1],
                              storage_precision=precision,
                              tag="PH_scale_table")
    tmp_cst = frac_pi

    # cst_table stores normalized constant chunks (they have been
    # scale back to close to 1.0 interval)
    #
    # scale_table stores the scaling factors corresponding to the
    # denormalization of cst_table coefficients

    # this loop divide the digits of frac_pi into chunks
    # the chunk lsb weight is given by a shift from
    # cst_msb, multiple of the chunk index
    for i in range(chunk_number):
        value_div_factor = S2**(chunk_size * (i + 1) - cst_msb)
        local_cst = int(tmp_cst * value_div_factor) / value_div_factor
        local_scale = (scaling_factor**i)
        # storing scaled constant chunks
        cst_table[i][0] = local_cst / (local_scale**2)
        scale_table[i][0] = local_scale
        # Updating constant value
        tmp_cst = tmp_cst - local_cst

    # Computing which part of the constant we do not need to multiply
    # In the following comments, vi represents the bit of frac_pi of weight 2**-i

    # Bits vi so that i <= (vx_exp - p + 1 -k)  are not needed, because they result
    # in a multiple of 2pi and do not contribute to trig functions.

    vx_exp = ExponentExtraction(
        vx, precision=vx.get_precision().get_integer_format())
    vx_exp = Conversion(vx_exp, precision=ML_Int32)

    msb_exp = -(vx_exp - p + 1 - k)
    msb_exp.set_attributes(tag="msb_exp", debug=debug_multi)
    msb_exp = Conversion(msb_exp, precision=ML_Int32)

    # Select the highest index where the reduction should start
    msb_index = Select(cst_msb_node < msb_exp, 0,
                       (cst_msb_node - msb_exp) / chunk_size_cst)
    msb_index.set_attributes(tag="msb_index", debug=debug_multi)

    # For a desired accuracy of 2**-n, bits vi so that i >= (vx_exp + n + 4)  are not needed, because they contribute less than
    # 2**-n to the result

    lsb_exp = -(vx_exp + n + 4)
    lsb_exp.set_attributes(tag="lsb_exp", debug=debug_multi)
    lsb_exp = Conversion(lsb_exp, precision=ML_Int32)

    # Index of the corresponding chunk
    lsb_index = (cst_msb_node - lsb_exp) / chunk_size_cst
    lsb_index.set_attributes(tag="lsb_index", debug=debug_multi)

    # Splitting vx
    half_size = precision.get_field_size() / 2 + 1

    # hi part (most significant digit) of vx input
    vx_hi = TypeCast(BitLogicAnd(
        TypeCast(vx, precision=int_precision),
        Constant(~int(2**half_size - 1), precision=int_precision)),
                     precision=precision)
    vx_hi.set_attributes(tag="vx_hi_ph")  #, debug = debug_multi)

    vx_lo = vx - vx_hi
    vx_lo.set_attributes(tag="vx_lo_ph")  #, debug = debug_multi)

    # loop iterator variable
    vi = Variable("i", precision=ML_Int32, var_type=Variable.Local)
    # step scaling factor
    half_scaling = Constant(S2**(-chunk_size / 2), precision=precision)

    i1 = Constant(1, precision=ML_Int32)

    # accumulator to the output precision
    acc = Variable("acc", precision=precision, var_type=Variable.Local)
    # integer accumulator
    acc_int = Variable("acc_int",
                       precision=int_precision,
                       var_type=Variable.Local)

    init_loop = Statement(
        vx_hi,
        vx_lo,
        ReferenceAssign(vi, msb_index),
        ReferenceAssign(acc, Constant(0, precision=precision)),
        ReferenceAssign(acc_int, Constant(0, precision=int_precision)),
    )

    cst_load = TableLoad(cst_table,
                         vi,
                         0,
                         tag="cst_load",
                         debug=debug_precision)
    sca_load = TableLoad(scale_table,
                         vi,
                         0,
                         tag="sca_load",
                         debug=debug_precision)
    # loop body
    # hi_mult = vx_hi * <scale_factor> * <cst>
    hi_mult = (vx_hi * sca_load) * (cst_load * sca_load)
    hi_mult.set_attributes(tag="hi_mult", debug=debug_precision)
    pre_hi_mult_int = NearestInteger(hi_mult,
                                     precision=int_precision,
                                     tag="hi_mult_int",
                                     debug=(debuglld if debug else None))
    hi_mult_int_f = Conversion(pre_hi_mult_int,
                               precision=precision,
                               tag="hi_mult_int_f",
                               debug=debug_precision)
    pre_hi_mult_red = (hi_mult - hi_mult_int_f).modify_attributes(
        tag="hi_mult_red", debug=debug_precision)

    # for the first chunks (vx_hi * <constant chunk>) exceeds 2**k+1 and may be
    # discard (whereas it may lead to overflow during integer conversion
    pre_exclude_hi = ((cst_msb_node - (vi + i1) * chunk_size + i1) +
                      (vx_exp + Constant(-half_size + 1, precision=ML_Int32))
                      ).modify_attributes(tag="pre_exclude_hi",
                                          debug=(debugd if debug else None))
    pre_exclude_hi.propagate_precision(ML_Int32,
                                       [cst_msb_node, vi, vx_exp, i1])
    Ck = Constant(k, precision=ML_Int32)
    exclude_hi = pre_exclude_hi <= Ck
    exclude_hi.set_attributes(tag="exclude_hi", debug=debug_multi)

    hi_mult_red = Select(exclude_hi, pre_hi_mult_red,
                         Constant(0, precision=precision))
    hi_mult_int = Select(exclude_hi, pre_hi_mult_int,
                         Constant(0, precision=int_precision))

    # lo part of the chunk reduction
    lo_mult = (vx_lo * sca_load) * (cst_load * sca_load)
    lo_mult.set_attributes(tag="lo_mult")  #, debug = debug_multi)
    lo_mult_int = NearestInteger(lo_mult,
                                 precision=int_precision,
                                 tag="lo_mult_int")  #, debug = debug_multi
    lo_mult_int_f = Conversion(lo_mult_int,
                               precision=precision,
                               tag="lo_mult_int_f")  #, debug = debug_multi)
    lo_mult_red = (lo_mult - lo_mult_int_f).modify_attributes(
        tag="lo_mult_red")  #, debug = debug_multi)

    # accumulating fractional part
    acc_expr = (acc + hi_mult_red) + lo_mult_red
    # accumulating integer part
    int_expr = ((acc_int + hi_mult_int) + lo_mult_int) % 2**(k + 1)

    CF1 = Constant(1, precision=precision)
    CI1 = Constant(1, precision=int_precision)

    # extracting exceeding integer part in fractionnal accumulator
    acc_expr_int = NearestInteger(acc_expr, precision=int_precision)
    # normalizing integer and fractionnal accumulator by subtracting then
    # adding exceeding integer part
    normalization = Statement(
        ReferenceAssign(
            acc, acc_expr - Conversion(acc_expr_int, precision=precision)),
        ReferenceAssign(acc_int, int_expr + acc_expr_int),
    )

    acc_expr.set_attributes(tag="acc_expr")  #, debug = debug_multi)
    int_expr.set_attributes(tag="int_expr")  #, debug = debug_multi)

    red_loop = Loop(
        init_loop, vi <= lsb_index,
        Statement(acc_expr, int_expr, normalization,
                  ReferenceAssign(vi, vi + 1)))

    result = Statement(lsb_index, msb_index, red_loop)

    # restoring sollya's global precision
    sollya.settings.prec = old_global_prec

    return result, acc, acc_int
Beispiel #15
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def SincosRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 8
        cw_hi_precision = self.precision.get_field_size() - 4

        ext_precision = {
            ML_Binary32: ML_Binary64,
            ML_Binary64: ML_Binary64
        }[self.precision]

        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        if self.precision is ML_Binary32:
            ph_bound = S2**10
        else:
            ph_bound = S2**33

        test_ph_bound = Comparison(vx,
                                   ph_bound,
                                   specifier=Comparison.GreaterOrEqual,
                                   precision=ML_Bool,
                                   likely=False)

        # argument reduction
        # m
        frac_pi_index = {ML_Binary32: 10, ML_Binary64: 14}[self.precision]

        C0 = Constant(0, precision=int_precision)
        C1 = Constant(1, precision=int_precision)
        C_offset = Constant(3 * S2**(frac_pi_index - 1),
                            precision=int_precision)

        # 2^m / pi
        frac_pi = round(S2**frac_pi_index / pi, cw_hi_precision, sollya.RN)
        frac_pi_lo = round(S2**frac_pi_index / pi - frac_pi, sollya_precision,
                           sollya.RN)
        # pi / 2^m, high part
        inv_frac_pi = round(pi / S2**frac_pi_index, cw_hi_precision, sollya.RN)
        # pi / 2^m, low part
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)

        # computing k
        vx.set_attributes(tag="vx", debug=debug_multi)

        vx_pi = Addition(Multiplication(vx,
                                        Constant(frac_pi,
                                                 precision=self.precision),
                                        precision=self.precision),
                         Multiplication(vx,
                                        Constant(frac_pi_lo,
                                                 precision=self.precision),
                                        precision=self.precision),
                         precision=self.precision,
                         tag="vx_pi",
                         debug=debug_multi)

        k = NearestInteger(vx_pi,
                           precision=int_precision,
                           tag="k",
                           debug=debug_multi)
        # k in floating-point precision
        fk = Conversion(k,
                        precision=self.precision,
                        tag="fk",
                        debug=debug_multi)

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision,
                                   debug=debug_multi)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision,
                                      debug=debug_multi)

        # Cody-Waite reduction
        red_coeff1 = Multiplication(fk,
                                    inv_frac_pi_cst,
                                    precision=self.precision,
                                    exact=True)
        red_coeff2 = Multiplication(Negation(fk, precision=self.precision),
                                    inv_frac_pi_lo_cst,
                                    precision=self.precision,
                                    exact=True)

        # Should be exact / Sterbenz' Lemma
        pre_sub_mul = Subtraction(vx,
                                  red_coeff1,
                                  precision=self.precision,
                                  exact=True)

        # Fast2Sum
        s = Addition(pre_sub_mul,
                     red_coeff2,
                     precision=self.precision,
                     unbreakable=True,
                     tag="s",
                     debug=debug_multi)
        z = Subtraction(s,
                        pre_sub_mul,
                        precision=self.precision,
                        unbreakable=True,
                        tag="z",
                        debug=debug_multi)
        t = Subtraction(red_coeff2,
                        z,
                        precision=self.precision,
                        unbreakable=True,
                        tag="t",
                        debug=debug_multi)

        red_vx_std = Addition(s, t, precision=self.precision)
        red_vx_std.set_attributes(tag="red_vx_std", debug=debug_multi)

        # To compute sine we offset x by 3pi/2
        # which means add 3  * S2^(frac_pi_index-1) to k
        if self.sin_output:
            Log.report(Log.Info, "Computing Sin")
            offset_k = Addition(k,
                                C_offset,
                                precision=int_precision,
                                tag="offset_k")
        else:
            Log.report(Log.Info, "Computing Cos")
            offset_k = k

        modk = Variable("modk",
                        precision=int_precision,
                        var_type=Variable.Local)
        red_vx = Variable("red_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        # Faster modulo using bitwise logic
        modk_std = BitLogicAnd(offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="modk",
                               debug=debug_multi)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        red_vx.set_interval(approx_interval)

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        Log.report(Log.Info,
                   "building tabulated approximation for sin and cos")

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        # polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        table_index_size = frac_pi_index + 1
        cos_table = ML_NewTable(dimensions=[2**table_index_size, 1],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("cos_table"))

        for i in range(2**(frac_pi_index + 1)):
            local_x = i * pi / S2**frac_pi_index
            cos_local = round(cos(local_x), self.precision.get_sollya_object(),
                              sollya.RN)
            cos_table[i][0] = cos_local

        sin_index = Modulo(modk + 2**(frac_pi_index - 1),
                           2**(frac_pi_index + 1),
                           precision=int_precision,
                           tag="sin_index")  #, debug = debug_multi)
        tabulated_cos = TableLoad(cos_table,
                                  modk,
                                  C0,
                                  precision=self.precision,
                                  tag="tab_cos",
                                  debug=debug_multi)
        tabulated_sin = -TableLoad(cos_table,
                                   sin_index,
                                   C0,
                                   precision=self.precision,
                                   tag="tab_sin",
                                   debug=debug_multi)

        poly_degree_cos = sup(
            guessdegree(cos(sollya.x), approx_interval, S2**
                        -self.precision.get_precision()) + 2)
        poly_degree_sin = sup(
            guessdegree(
                sin(sollya.x) / sollya.x, approx_interval, S2**
                -self.precision.get_precision()) + 2)

        poly_degree_cos_list = range(0, int(poly_degree_cos) + 3)
        poly_degree_sin_list = range(0, int(poly_degree_sin) + 3)

        # cosine polynomial: limiting first and second coefficient precision to 1-bit
        poly_cos_prec_list = [self.precision] * len(poly_degree_cos_list)
        # sine polynomial: limiting first coefficient precision to 1-bit
        poly_sin_prec_list = [self.precision] * len(poly_degree_sin_list)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
        Log.report(Log.Info,
                   "building mathematical polynomials for sin and cos")
        # Polynomial approximations
        Log.report(Log.Info, "cos")
        poly_object_cos, poly_error_cos = Polynomial.build_from_approximation_with_error(
            cos(sollya.x),
            poly_degree_cos_list,
            poly_cos_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)
        Log.report(Log.Info, "sin")
        poly_object_sin, poly_error_sin = Polynomial.build_from_approximation_with_error(
            sin(sollya.x),
            poly_degree_sin_list,
            poly_sin_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        Log.report(
            Log.Info, "poly error cos: {} / {:d}".format(
                poly_error_cos, int(sollya.log2(poly_error_cos))))
        Log.report(
            Log.Info, "poly error sin: {0} / {1:d}".format(
                poly_error_sin, int(sollya.log2(poly_error_sin))))
        Log.report(Log.Info, "poly cos : %s" % poly_object_cos)
        Log.report(Log.Info, "poly sin : %s" % poly_object_sin)

        # Polynomial evaluation scheme
        poly_cos = polynomial_scheme_builder(
            poly_object_cos.sub_poly(start_index=1),
            red_vx,
            unified_precision=self.precision)
        poly_sin = polynomial_scheme_builder(
            poly_object_sin.sub_poly(start_index=2),
            red_vx,
            unified_precision=self.precision)
        poly_cos.set_attributes(tag="poly_cos", debug=debug_multi)
        poly_sin.set_attributes(tag="poly_sin",
                                debug=debug_multi,
                                unbreakable=True)

        # TwoProductFMA
        mul_cos_x = tabulated_cos * poly_cos
        mul_cos_y = FusedMultiplyAdd(tabulated_cos,
                                     poly_cos,
                                     -mul_cos_x,
                                     precision=self.precision)

        mul_sin_x = tabulated_sin * poly_sin
        mul_sin_y = FusedMultiplyAdd(tabulated_sin,
                                     poly_sin,
                                     -mul_sin_x,
                                     precision=self.precision)

        mul_coeff_sin_hi = tabulated_sin * red_vx
        mul_coeff_sin_lo = FusedMultiplyAdd(tabulated_sin, red_vx,
                                            -mul_coeff_sin_hi)

        mul_cos = Addition(mul_cos_x,
                           mul_cos_y,
                           precision=self.precision,
                           tag="mul_cos")  #, debug = debug_multi)
        mul_sin = Negation(Addition(mul_sin_x,
                                    mul_sin_y,
                                    precision=self.precision),
                           precision=self.precision,
                           tag="mul_sin")  #, debug = debug_multi)
        mul_coeff_sin = Negation(Addition(mul_coeff_sin_hi,
                                          mul_coeff_sin_lo,
                                          precision=self.precision),
                                 precision=self.precision,
                                 tag="mul_coeff_sin")  #, debug = debug_multi)

        mul_cos_x.set_attributes(
            tag="mul_cos_x", precision=self.precision)  #, debug = debug_multi)
        mul_cos_y.set_attributes(
            tag="mul_cos_y", precision=self.precision)  #, debug = debug_multi)
        mul_sin_x.set_attributes(
            tag="mul_sin_x", precision=self.precision)  #, debug = debug_multi)
        mul_sin_y.set_attributes(
            tag="mul_sin_y", precision=self.precision)  #, debug = debug_multi)

        cos_eval_d_1 = (((mul_cos + mul_sin) + mul_coeff_sin) + tabulated_cos)

        cos_eval_d_1.set_attributes(tag="cos_eval_d_1",
                                    precision=self.precision,
                                    debug=debug_multi)

        result_1 = Statement(Return(cos_eval_d_1))

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################
        # payne and hanek argument reduction for large arguments
        ph_k = frac_pi_index
        ph_frac_pi = round(S2**ph_k / pi, 1500, sollya.RN)
        ph_inv_frac_pi = pi / S2**ph_k

        ph_statement, ph_acc, ph_acc_int = generate_payne_hanek(vx,
                                                                ph_frac_pi,
                                                                self.precision,
                                                                n=100,
                                                                k=ph_k)

        # assigning Large Argument Reduction reduced variable
        lar_vx = Variable("lar_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        lar_red_vx = Addition(Multiplication(lar_vx,
                                             inv_frac_pi,
                                             precision=self.precision),
                              Multiplication(lar_vx,
                                             inv_frac_pi_lo,
                                             precision=self.precision),
                              precision=self.precision,
                              tag="lar_red_vx",
                              debug=debug_multi)

        C32 = Constant(2**(ph_k + 1), precision=int_precision, tag="C32")
        ph_acc_int_red = Select(ph_acc_int < C0,
                                C32 + ph_acc_int,
                                ph_acc_int,
                                precision=int_precision,
                                tag="ph_acc_int_red")
        if self.sin_output:
            lar_offset_k = Addition(ph_acc_int_red,
                                    C_offset,
                                    precision=int_precision,
                                    tag="lar_offset_k")
        else:
            lar_offset_k = ph_acc_int_red

        ph_acc_int_red.set_attributes(tag="ph_acc_int_red", debug=debug_multi)
        lar_modk = BitLogicAnd(lar_offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="lar_modk",
                               debug=debug_multi)

        lar_statement = Statement(ph_statement,
                                  ReferenceAssign(lar_vx,
                                                  ph_acc,
                                                  debug=debug_multi),
                                  ReferenceAssign(red_vx,
                                                  lar_red_vx,
                                                  debug=debug_multi),
                                  ReferenceAssign(modk, lar_modk),
                                  prevent_optimization=True)

        test_NaN_or_Inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               tag="NaN_or_Inf",
                               debug=debug_multi)
        return_NaN_or_Inf = Statement(Return(FP_QNaN(self.precision)))

        scheme = ConditionBlock(
            test_NaN_or_Inf, Statement(ClearException(), return_NaN_or_Inf),
            Statement(
                modk, red_vx,
                ConditionBlock(
                    test_ph_bound, lar_statement,
                    Statement(
                        ReferenceAssign(modk, modk_std),
                        ReferenceAssign(red_vx, red_vx_std),
                    )), result_1))

        return scheme
Beispiel #16
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=S2**-24,
                            odd=False,
                            even=False):
    """ Generate a piecewise approximation

        :param function: function to be approximated
        :type function: SollyaObject
        :param variable: input variable
        :type variable: Variable
        :param precision: variable's format
        :type precision: ML_Format
        :param bound_low: lower bound for the approximation interval
        :param bound_high: upper bound for the approximation interval
        :param num_intervals: number of sub-interval / sub-division of the main interval
        :param max_degree: maximum degree for an approximation on any sub-interval
        :param error_threshold: error bound for an approximation on any sub-interval

        :return: pair (scheme, error) where scheme is a graph node for an
            approximation scheme of function evaluated at variable, and error
            is the maximum approximation error encountered
        :rtype tuple(ML_Operation, SollyaObject): """

    degree_generator = piecewise_approximation_degree_generator(
        function,
        bound_low,
        bound_high,
        num_intervals=num_intervals,
        error_threshold=error_threshold,
    )
    degree_list = list(degree_generator)

    # if max_degree is None then we determine it locally
    if max_degree is None:
        max_degree = max(degree_list)
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(
        dimensions=[num_intervals, max_degree + 1],
        storage_precision=precision,
        tag="coeff_table",
        const=True  # by default all approximation coeff table are const
    )

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = degree_list[i]
        if local_degree > max_degree:
            Log.report(
                Log.Warning,
                "local_degree {} exceeds max_degree bound ({}) in piecewise_approximation",
                local_degree, max_degree)
        # as max_degree defines the size of the table we can use
        # it as the degree for each sub-interval polynomial
        # as there is nothing to gain (yet) by using a smaller polynomial
        degree = max_degree  # min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            local_poly_degree_list = list(
                range(1 if even else 0, degree + 1, 2 if odd or even else 1))
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x) / sollya.x,
                local_poly_degree_list,
                [precision] * len(local_poly_degree_list),
                Interval(-subint_high * 0.95, subint_high),
                sollya.absolute,
                error_function=error_function)
            # multiply by sollya.x
            poly_object = poly_object.sub_poly(offset=-1)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                # try to see if function is constant on the interval (possible
                # failure cause for fpminmax)
                cst_value = precision.round_sollya_object(
                    function(subint_low), sollya.RN)
                accuracy = error_threshold
                diff_with_cst_range = sollya.supnorm(cst_value, local_function,
                                                     local_interval,
                                                     sollya.absolute, accuracy)
                diff_with_cst = sup(abs(diff_with_cst_range))
                if diff_with_cst < error_threshold:
                    Log.report(Log.Info, "constant polynomial detected")
                    poly_object = Polynomial([function(subint_low)] +
                                             [0] * degree)
                    approx_error = diff_with_cst
                else:
                    Log.report(
                        Log.error,
                        "degree: {} for index {}, diff_with_cst={} (vs error_threshold={}) ",
                        degree,
                        i,
                        diff_with_cst,
                        error_threshold,
                        error=err)
        for ci in range(max_degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        if approx_error > error_threshold:
            Log.report(
                Log.Warning,
                "piecewise_approximation on index {} exceeds error threshold: {} > {}",
                i, approx_error, error_threshold)
        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       debug=debug_multi,
                       precision=precision)
    int_prec = precision.get_integer_format()

    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(
                        Multiplication(diff, delta_ratio, precision=precision),
                        precision=int_prec,
                    ), num_intervals - 1),
                tag="index",
                debug=debug_multi,
                precision=int_prec)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=debug_multi)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(max_degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error
  def generate_scheme(self):
    """Produce an abstract scheme for the logarithm.

    This abstract scheme will be used by the code generation backend.
    """
    if self.precision not in [ML_Binary32, ML_Binary64]:
        Log.report(Log.Error, "The demanded precision is not supported")

    vx = self.implementation.add_input_variable("x", self.precision)


    def default_bool_convert(optree, precision=None, **kw):
        return bool_convert(optree, precision, -1, 0, **kw) \
                if isinstance(self.processor, VectorBackend) \
                else bool_convert(optree, precision, 1, 0, **kw)

    precision = self.precision.sollya_object
    int_prec = self.precision.get_integer_format()
    Log.report(Log.Info, "int_prec is %s" % int_prec)
    uint_prec = self.precision.get_unsigned_integer_format()


    Log.report(Log.Info, "MDL constants")
    cgpe_scheme_idx = int(self.cgpe_index)
    table_index_size = int(self.tbl_index_size)
    #
    table_nb_elements = 2**(table_index_size)
    table_dimensions = [2*table_nb_elements]  # two values are stored for each element
    field_size = Constant(self.precision.get_field_size(),
                          precision = int_prec,
                          tag = 'field_size')
    if self.log_radix == EXP_1:
      log2_hi = Constant(
        round(log(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log(2) - round(log(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    elif self.log_radix == 10:
      log2_hi = Constant(
        round(log10(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log10(2) - round(log10(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    # ... if log_radix == '2' then log2(2) == 1

    # subnormal_mask aims at trapping positive subnormals except zero.
    # That's why we will subtract 1 to the integer bitstring of the input, and
    # then compare for Less (strict) the resulting integer bitstring to this
    # mask, e.g.  0x7fffff for binary32.
    if self.no_subnormal == False:
      subnormal_mask = Constant((1 << self.precision.get_field_size()) - 1,
                                precision = int_prec, tag = 'subnormal_mask')
    fp_one = Constant(1.0, precision = self.precision, tag = 'fp_one')
    fp_one_as_uint = TypeCast(fp_one, precision = uint_prec,
                              tag = 'fp_one_as_uint')
    int_zero = Constant(0, precision = int_prec, tag = 'int_zero')
    int_one  = Constant(1, precision = int_prec, tag = 'int_one')
    table_mantissa_half_ulp = Constant(
            1 << (self.precision.field_size - table_index_size - 1),
            precision = int_prec
            )
    table_s_exp_index_mask = Constant(
            ~((table_mantissa_half_ulp.get_value() << 1) - 1),
            precision = uint_prec
            )

    Log.report(Log.Info, "MDL table")
    # The table holds approximations of -log(2^tau * r_i) so we first compute
    # the index value for which tau changes from 1 to 0.
    cut = sqrt(2.)
    tau_index_limit = floor(table_nb_elements * (2./cut - 1))
    sollya_logtbl = [
      (-log1p(float(i) / table_nb_elements)
      + (0 if i <= tau_index_limit else log(2.))) / log(self.log_radix)
      for i in range(table_nb_elements)
    ]
    # ...
    init_logtbl_hi = [
            round(sollya_logtbl[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl_lo = [
            round(sollya_logtbl[i] - init_logtbl_hi[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl = [tmp[i] for i in range(len(init_logtbl_hi)) for tmp in [init_logtbl_hi, init_logtbl_lo]]
    log1p_table = ML_NewTable(dimensions = table_dimensions,
                              storage_precision = self.precision,
                              init_data = init_logtbl,
                              tag = 'ml_log1p_table')
    # ...
    if self.no_rcp:
      sollya_rcptbl = [
        (1/((1+float(i)/table_nb_elements)+2**(-1-int(self.tbl_index_size))))
        for i in range(table_nb_elements)
      ]
      init_rcptbl = [
            round(sollya_rcptbl[i],
                  int(self.tbl_index_size)+1, # self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
      ]
      rcp_table = ML_NewTable(dimensions = [table_nb_elements],
                              storage_precision = self.precision,
                              init_data = init_rcptbl,
                              tag = 'ml_rcp_table')
    # ...

    Log.report(Log.Info, 'MDL unified subnormal handling')
    vx_as_int = TypeCast(vx, precision = int_prec, tag = 'vx_as_int')
    if self.no_subnormal == False:
      vx_as_uint = TypeCast(vx, precision = uint_prec, tag = 'vx_as_uint')
      # Avoid the 0.0 case by subtracting 1 from vx_as_int
      tmp = Comparison(vx_as_int - 1, subnormal_mask,
                       specifier = Comparison.Less)
      is_subnormal = default_bool_convert(
        tmp, # Will catch negative values as well as NaNs with sign bit set
        precision = int_prec)
      is_subnormal.set_attributes(tag = "is_subnormal")
      if not(isinstance(self.processor, VectorBackend)):
        is_subnormal = Subtraction(Constant(0, precision = int_prec),
                                   is_subnormal,
                                   precision = int_prec)

      #################################################
      # Vectorizable integer based subnormal handling #
      #################################################
      # 1. lzcnt
      # custom lzcount-like for subnormal numbers using FPU (see draft article)
      Zi = BitLogicOr(vx_as_uint, fp_one_as_uint, precision = uint_prec, tag="Zi")
      Zf = Subtraction(
        TypeCast(Zi, precision = self.precision),
        fp_one,
        precision = self.precision,
        tag="Zf")
      # Zf exponent is -(nlz(x) - exponent_size).
      # 2. compute shift value
      # Vectorial comparison on x86+sse/avx is going to look like
      # '|0x00|0xff|0x00|0x00|' and that's why we use Negate.
      # But for scalar code generation, comparison will rather be either 0 or 1
      # in C. Thus mask below won't be correct for a scalar implementation.
      # FIXME: Can we know the backend that will be called and choose in
      # consequence? Should we make something arch-agnostic instead?
      #
      n_value = BitLogicAnd(
        Addition(
          DirtyExponentExtraction(Zf, self.precision),
          Constant(
            self.precision.get_bias(),
            precision = int_prec),
          precision = int_prec),
        is_subnormal,
        precision = int_prec,
        tag = "n_value")
      alpha = Negation(n_value, tag="alpha")
      #
      # 3. shift left
      # renormalized_mantissa = BitLogicLeftShift(vx_as_int, value)
      normal_vx_as_int = BitLogicLeftShift(vx_as_int, alpha)
      # 4. set exponent to the right value
      # Compute the exponent to add : (p-1)-(value) + 1 = p-1-value
      # The final "+ 1" comes from the fact that once renormalized, the
      # floating-point datum has a biased exponent of 1
      #tmp0 = Subtraction(
      #        field_size,
      #        value,
      #        precision = int_prec,
      #        tag="tmp0")
      # Set the value to 0 if the number is not subnormal
      #tmp1 = BitLogicAnd(tmp0, is_subnormal)
      #renormalized_exponent = BitLogicLeftShift(
      #        tmp1,
      #        field_size
      #        )
    else: # no_subnormal == True
      normal_vx_as_int = vx_as_int
      
    #normal_vx_as_int = renormalized_mantissa + renormalized_exponent
    normal_vx = TypeCast(normal_vx_as_int, precision = self.precision,
                         tag = 'normal_vx')

    # alpha = BitLogicAnd(field_size, is_subnormal, tag = 'alpha')
    # XXX Extract the mantissa, see if this is supported in the x86 vector
    # backend or if it still uses the support_lib.
    vx_mantissa = MantissaExtraction(normal_vx, precision = self.precision)

    Log.report(Log.Info, "MDL scheme")
    if self.force_division == True:
      rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    elif self.no_rcp == False:
      rcp_m = ReciprocalSeed(vx_mantissa, precision = self.precision)
      if not self.processor.is_supported_operation(rcp_m):
        if self.precision == ML_Binary64:
          # Try using a binary32 FastReciprocal
          binary32_m = Conversion(vx_mantissa, precision = ML_Binary32)
          rcp_m = ReciprocalSeed(binary32_m, precision = ML_Binary32)
          rcp_m = Conversion(rcp_m, precision = ML_Binary64)
        if not self.processor.is_supported_operation(rcp_m):
          # FIXME An approximation table could be used instead but for vector
          # implementations another GATHER would be required.
          # However this may well be better than a division...
          rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    else: # ... use a look-up table
      rcp_shift = BitLogicLeftShift(normal_vx_as_int, self.precision.get_exponent_size() + 1)
      rcp_idx = BitLogicRightShift(rcp_shift, self.precision.get_exponent_size() + 1 + self.precision.get_field_size() - int(self.tbl_index_size))
      rcp_m = TableLoad(rcp_table, rcp_idx, tag = 'rcp_idx',
                        debug = debug_multi)
    #  
    rcp_m.set_attributes(tag = 'rcp_m')

    # exponent is normally either 0 or -1, since m is in [1, 2). Possible
    # optimization?
    # exponent = ExponentExtraction(rcp_m, precision = self.precision,
    #         tag = 'exponent')

    ri_round = TypeCast(
            Addition(
                TypeCast(rcp_m, precision = int_prec),
                table_mantissa_half_ulp,
                precision = int_prec
                ),
            precision = uint_prec
            )
    ri_fast_rndn = BitLogicAnd(
            ri_round,
            table_s_exp_index_mask,
            tag = 'ri_fast_rndn',
            precision = uint_prec
            )
    # u = m * ri - 1
    ul = None
    if self.no_rcp == True: # ... u does not fit on a single word
      tmp_u, tmp_ul = Mul211(vx_mantissa,         
                             TypeCast(ri_fast_rndn, precision = self.precision), 
                             fma = (self.no_fma == False))
      fp_minus_one = Constant(-1.0, precision = self.precision, tag = 'fp_minus_one')
      u, ul = Add212(fp_minus_one, tmp_u, tmp_ul)      
      u.set_attributes(tag='uh')
      ul.set_attributes(tag='ul')
    elif self.no_fma == False:
      u = FusedMultiplyAdd(
        vx_mantissa,
        TypeCast(ri_fast_rndn, precision = self.precision),
        fp_one,
        specifier = FusedMultiplyAdd.Subtract,
        tag = 'u')
    else: # disable FMA
      # tmph + tmpl = m * ri, where tmph ~ 1
      tmph, tmpl = Mul211(vx_mantissa,         
                          TypeCast(ri_fast_rndn, precision = self.precision), 
                          fma = False)
      # u_tmp = tmph - 1 ... exact due to Sterbenz
      u_tmp = Subtraction(tmph, fp_one, precision = self.precision)
      # u = u_tmp - tmpl ... exact since the result u is representable as a single word
      u = Addition(u_tmp, tmpl, precision = self.precision, tag = 'u')
    
    unneeded_bits = Constant(
            self.precision.field_size - table_index_size,
            precision=uint_prec,
            tag="unneeded_bits"
            )
    assert self.precision.field_size - table_index_size >= 0
    ri_bits = BitLogicRightShift(
            ri_fast_rndn,
            unneeded_bits,
            precision = uint_prec,
            tag = "ri_bits"
            )
    # Retrieve mantissa's MSBs + first bit of exponent, for tau computation in case
    # exponent is 0 (i.e. biased 127, i.e. first bit of exponent is set.).
    # In this particular case, i = 0 but tau is 1
    # table_index does not need to be as long as uint_prec might be,
    # try and keep it the size of size_t.
    size_t_prec = ML_UInt32
    signed_size_t_prec = ML_Int32
    table_index_mask = Constant(
            (1 << (table_index_size + 1)) - 1,
            precision = size_t_prec
            )
    table_index = BitLogicAnd(
            Conversion(ri_bits, precision = size_t_prec),
            table_index_mask,
            tag = 'table_index',
            precision = size_t_prec
            )
    # Compute tau using the tau_index_limit value.
    tmp = default_bool_convert(
            Comparison(
                TypeCast(table_index, precision = signed_size_t_prec),
                Constant(tau_index_limit, precision = signed_size_t_prec),
                specifier = Comparison.Greater
                if isinstance(self.processor, VectorBackend)
                else Comparison.LessOrEqual
                ),
            precision = signed_size_t_prec,
            tag="tmp"
            )
    # A true tmp will typically be -1 for VectorBackends, but 1 for standard C.
    tau = Conversion(
        Addition(tmp, Constant(1, precision=signed_size_t_prec), precision = signed_size_t_prec, tag="pre_add")
            if isinstance(self.processor, VectorBackend)
            else tmp,
            precision=int_prec,
            tag="pre_tau"
        )
    tau.set_attributes(tag = 'tau')
    # Update table_index: keep only table_index_size bits
    table_index_hi = BitLogicAnd(
            table_index,
            Constant((1 << table_index_size) - 1, precision = size_t_prec),
            precision = size_t_prec
            )
    # table_index_hi = table_index_hi << 1
    table_index_hi = BitLogicLeftShift(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_hi"
            )
    # table_index_lo = table_index_hi + 1
    table_index_lo = Addition(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_lo"
            )

    tbl_hi = TableLoad(log1p_table, table_index_hi, tag = 'tbl_hi',
                       debug = debug_multi)
    tbl_lo = TableLoad(log1p_table, table_index_lo, tag = 'tbl_lo',
                       debug = debug_multi)
    # Compute exponent e + tau - alpha, but first subtract the bias.
    if self.no_subnormal == False:
      tmp_eptau = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
      exponent = Subtraction(tmp_eptau, alpha, precision = int_prec)
    else:
      exponent = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
    #
    fp_exponent = Conversion(exponent, precision = self.precision,
                             tag = 'fp_exponent')

    Log.report(Log.Info, 'MDL polynomial approximation')
    if self.log_radix == EXP_1:
      sollya_function = log(1 + sollya.x)
    elif self.log_radix == 2:
      sollya_function = log2(1 + sollya.x)
    elif self.log_radix == 10:
      sollya_function = log10(1 + sollya.x)
    # ...
    if self.force_division == True: # rcp accuracy is 2^(-p)
      boundrcp = 2**(-self.precision.get_precision())
    else:
      boundrcp = 1.5 * 2**(-12)           # ... see Intel intrinsics guide
      if self.precision in [ML_Binary64]:
        if not self.processor.is_supported_operation(rcp_m):
          boundrcp = (1+boundrcp)*(1+2**(-24)) - 1
        else:
          boundrcp = 2**(-14)             # ... see Intel intrinsics guide
    arg_red_mag = boundrcp + 2**(-table_index_size-1) + boundrcp * 2**(-table_index_size-1)
    if self.no_rcp == False:
      approx_interval = Interval(-arg_red_mag, arg_red_mag)
    else:
      approx_interval = Interval(-2**(-int(self.tbl_index_size)+1),2**(-int(self.tbl_index_size)+1))
    max_eps = 2**-(2*(self.precision.get_field_size()))
    Log.report(Log.Info, "max acceptable error for polynomial = {}".format(float.hex(max_eps)))
    poly_degree = sup(
            guessdegree(
                sollya_function,
                approx_interval,
                max_eps,
                )
            )
    Log.report(Log.Info, "poly degree is ", poly_degree)
    if self.log_radix == EXP_1:
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(2, int(poly_degree) + 1), # Force 1st 2 coeffs to 0 and 1, resp.
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree - 1),
        approx_interval,
        sollya.absolute,
        0 + sollya._x_) # Force the first 2 coefficients to 0 and 1, resp.
    else: # ... == '2' or '10'
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(1, int(poly_degree) + 1), # Force 1st coeff to 0
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree),
        approx_interval,
        sollya.absolute,
        0) # Force the first coefficients to 0

    Log.report(Log.Info, str(poly_object))

    constant_precision = ML_SingleSingle if self.precision == ML_Binary32 \
            else ML_DoubleDouble if self.precision == ML_Binary64 \
            else None
    if is_cgpe_available():
        log1pu_poly = PolynomialSchemeEvaluator.generate_cgpe_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision, scheme_id = cgpe_scheme_idx
                )
    else:
        Log.report(Log.Warning,
                "CGPE not available, falling back to std poly evaluator")
        log1pu_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision
                )

    # XXX Dirty implementation of double-(self.precision) poly
    def dirty_poly_node_conversion(node, variable_h, variable_l, use_fma):
        return dirty_multi_node_expand(
          node, self.precision, mem_map={variable_h: (variable_h, variable_l)}, fma=use_fma)
    log1pu_poly_hi, log1pu_poly_lo = dirty_poly_node_conversion(log1pu_poly, u, ul,
                                                                use_fma=(self.no_fma == False))

    log1pu_poly_hi.set_attributes(tag = 'log1pu_poly_hi')
    log1pu_poly_lo.set_attributes(tag = 'log1pu_poly_lo')

    # Compute log(2) * (e + tau - alpha)
    if self.log_radix != 2: # 'e' or '10'
      log2e_hi, log2e_lo = Mul212(fp_exponent, log2_hi, log2_lo, 
                                  fma = (self.no_fma == False))
   
    # Add log1p(u)
    if self.log_radix != 2: # 'e' or '10'
      tmp_res_hi, tmp_res_lo = Add222(log2e_hi, log2e_lo,
                                      log1pu_poly_hi, log1pu_poly_lo)
    else:
      tmp_res_hi, tmp_res_lo = Add212(fp_exponent,
                                      log1pu_poly_hi, log1pu_poly_lo)

    # Add -log(2^(tau)/m) approximation retrieved by two table lookups
    logx_hi = Add122(tmp_res_hi, tmp_res_lo, tbl_hi, tbl_lo)[0]
    logx_hi.set_attributes(tag = 'logx_hi')

    scheme = Return(logx_hi, precision = self.precision)

    return scheme
Beispiel #18
0
    def generate_scheme(self):
        #func_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        log2_hi_value = round(
            log10(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), RN)
        log2_lo_value = round(
            log10(2) - log2_hi_value, self.precision.sollya_object, RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = self.precision.get_integer_format()

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = 7
        table_index_range = range(1, 2**table_index_size)
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in table_index_range:
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            inv_value = inv_approx_table[i][0]
            value_high = round(
                log10(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log10(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        # determining log_table range
        high_index_function = lambda table, i: table[i][0]
        low_index_function = lambda table, i: table[i][1]
        table_high_interval = log_table.get_subset_interval(
            high_index_function, table_index_range)
        table_low_interval = log_table.get_subset_interval(
            low_index_function, table_index_range)

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          debug=debug_lftolx)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            table_index = BitLogicAnd(BitLogicRightShift(
                TypeCast(_vx_mant, precision=int_precision, debug=debuglx),
                self.precision.get_field_size() - 7,
                debug=debuglx),
                                      0x7f,
                                      tag="table_index",
                                      debug=debuglld)

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(DivisionSeed(_vx_mant,
                                      precision=self.precision,
                                      tag="seed",
                                      debug=debug_lftolx,
                                      silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_lftolx)
            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_lftolx)
            #if not processor.is_supported_operation(arg_red_index):
            #    if self.precision != ML_Binary32:
            #        arg_red_index = DivisionSeed(Conversion(_vx_mant, precision = ML_Binary32), precision = ML_Binary32,
            _red_vx = arg_red_index * _vx_mant - 1.0
            inv_err = S2**-7
            red_interval = Interval(1 - inv_err, 1 + inv_err)
            _red_vx.set_attributes(tag="_red_vx",
                                   debug=debug_lftolx,
                                   interval=red_interval)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_lftolx)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_lftolx)

            print("building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log10(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log10(1 + x) / x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object  #.sub_poly(start_index = 1)

            print("generating polynomial evaluation scheme")
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_lftolx)
            print(global_poly_object.get_sollya_object())

            corr_exp = Conversion(
                _vx_exp if exp_corr_factor == None else _vx_exp +
                exp_corr_factor,
                precision=self.precision)
            split_red_vx = Split(_red_vx,
                                 precision=ML_DoubleDouble,
                                 tag="split_red_vx",
                                 debug=debug_ddtolx)
            red_vx_hi = split_red_vx.hi
            red_vx_lo = split_red_vx.lo

            # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo
            pre_result = -_log_inv_hi + ((_red_vx * _poly +
                                          (corr_exp * log2_lo - _log_inv_lo)))
            pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
            exact_log2_hi_exp = corr_exp * log2_hi
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex",
                                             debug=debug_lftolx)
            cancel_part = (corr_exp * log2_hi - _log_inv_hi)
            cancel_part.set_attributes(tag="cancel_part", debug=debug_lftolx)
            sub_part = red_vx_hi + cancel_part
            sub_part.set_attributes(tag="sub_part", debug=debug_lftolx)
            #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part = ((red_vx_lo +
                                    (red_vx_lo * _poly +
                                     (corr_exp * log2_lo - _log_inv_lo))))
            result_one_low_part.set_attributes(tag="result_one_low_part",
                                               debug=debug_lftolx)
            _result_one = (
                (sub_part) + red_vx_hi * _poly) + result_one_low_part
            _result = exact_log2_hi_exp + pre_result
            return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one, corr_exp

        result, poly, log_inv_lo, log_inv_hi, red_vx, new_result_one, corr_exp = compute_log(
            vx)
        result.set_attributes(tag="result", debug=debug_lftolx)
        new_result_one.set_attributes(tag="new_result_one", debug=debug_lftolx)

        # building eval error map
        eval_error_map = {
            red_vx:
            Variable("red_vx",
                     precision=self.precision,
                     interval=red_vx.get_interval()),
            log_inv_hi:
            Variable("log_inv_hi",
                     precision=self.precision,
                     interval=table_high_interval),
            log_inv_lo:
            Variable("log_inv_lo",
                     precision=self.precision,
                     interval=table_low_interval),
            corr_exp:
            Variable("corr_exp_g",
                     precision=self.precision,
                     interval=self.precision.get_exponent_interval()),
        }
        # computing gappa error
        if is_gappa_installed():
            poly_eval_error = self.get_eval_error(result, eval_error_map)
            print("poly_eval_error: ", poly_eval_error)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debugd,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debugd,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd)

        # exp=-1 case
        print("managing exp=-1 case")
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_lftolx)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        log_subtract = -log_inv_hi - log2_hi
        log_subtract.set_attributes(tag="log_subtract", debug=debug_lftolx)
        result2 = (log_subtract) + ((poly * red_vx) - (log_inv_lo + log2_lo))
        result2.set_attributes(tag="result2", debug=debug_lftolx)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _, _, _ = compute_log(vx * S2100,
                                                         exp_corr_factor=m100)

        print("managing close to 1.0 cases")
        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log10(1 + sollya.x) / sollya.x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log10(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            sollya.absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_lftolx)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False)

        # main scheme
        print("MDL scheme")
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result2),
                                       Return(result))
                        #ConditionBlock(cond_one,
                        #Return(new_result_one),
                        #ConditionBlock(exp_mone,
                        #Return(result2),
                        #Return(result)
                        #)
                        #)
                    ))))
        scheme = pre_scheme
        return scheme
Beispiel #19
0
  def generate_scheme(self):
    
    def compute_reciprocal(vx):
      inv_seed = ReciprocalSeed(vx, precision = self.precision, tag = "inv_seed", debug = debug_multi)
      nr_1 = 2*inv_seed - vx*inv_seed*inv_seed
      nr_2 = 2*nr_1 - vx*nr_1*nr_1
      nr_3 =2*nr_2 - vx*nr_2*nr_2
      inv_vx = 2*nr_3 - vx*nr_3*nr_3
      
      return inv_vx
      
    vx = self.implementation.add_input_variable("x", self.get_input_precision()) 

    sollya_precision = self.precision.get_sollya_object()
    
    int_precision = {
        ML_Binary32 : ML_Int32,
        ML_Binary64 : ML_Int64
      }[self.precision]
    
    hi_precision = self.precision.get_field_size() - 12
    
    half_pi = round(pi/2, sollya_precision, sollya.RN)
    half_pi_cst = Constant(half_pi, precision = self.precision)
    
    test_sign = Comparison(vx, 0, specifier = Comparison.Less, precision = ML_Bool, debug = debug_multi, tag = "Is_Negative")
    neg_vx = -vx
    
    sign = Variable("sign", precision = self.precision, var_type = Variable.Local)
    abs_vx_std = Variable("abs_vx", precision = self.precision, var_type = Variable.Local)
    red_vx_std = Variable("red_vx", precision = self.precision, var_type = Variable.Local)
    const_index_std = Variable("const_index", precision = int_precision, var_type = Variable.Local)
    
    set_sign = Statement(
        ConditionBlock(test_sign,
          Statement(ReferenceAssign(abs_vx_std, neg_vx), ReferenceAssign(sign, -1)),
          Statement(ReferenceAssign(abs_vx_std, vx), ReferenceAssign(sign, 1))
      ))
      
    if self.precision is ML_Binary32:
      bound = 24
    else:
      bound = 53
      
    test_bound = Comparison(abs_vx_std, S2**bound, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    test_bound1 = Comparison(abs_vx_std, 39.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    test_bound2 = Comparison(abs_vx_std, 19.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    test_bound3 = Comparison(abs_vx_std, 11.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    test_bound4 = Comparison(abs_vx_std, 7.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound")
    
    
    
    set_bound = Return(sign*half_pi_cst)
    
    set_bound1 = Statement(
      ReferenceAssign(red_vx_std, -compute_reciprocal(abs_vx_std)),
      ReferenceAssign(const_index_std, 3)
    )
    
    set_bound2 = Statement(
      ReferenceAssign(red_vx_std, (abs_vx_std - 1.5)*compute_reciprocal(1 + 1.5*abs_vx_std)),
      ReferenceAssign(const_index_std, 2)
    )
    
    set_bound3 = Statement(
      ReferenceAssign(red_vx_std, (abs_vx_std - 1.0)*compute_reciprocal(abs_vx_std + 1.0)),
      ReferenceAssign(const_index_std, 1)
    )
    
    set_bound4 = Statement(
      ReferenceAssign(red_vx_std, (abs_vx_std - 0.5)*compute_reciprocal(1 + abs_vx_std*0.5)),
      ReferenceAssign(const_index_std, 0)
    )
    
    set_bound5 = Statement(
      ReferenceAssign(red_vx_std, abs_vx_std),
      ReferenceAssign(const_index_std, 4)
    )
    
    
    cons_table = ML_NewTable(dimensions = [5, 2], storage_precision = self.precision, tag = self.uniquify_name("cons_table"))
    coeff_table = ML_NewTable(dimensions = [11], storage_precision = self.precision, tag = self.uniquify_name("coeff_table"))
    
    cons_hi = round(atan(0.5), hi_precision, sollya.RN)
    cons_table[0][0] = cons_hi
    cons_table[0][1] = round(atan(0.5) - cons_hi, sollya_precision, sollya.RN)
    
    cons_hi = round(atan(1.0), hi_precision, sollya.RN)
    cons_table[1][0] = cons_hi
    cons_table[1][1] = round(atan(1.0) - cons_hi, sollya_precision, sollya.RN)
    
    cons_hi = round(atan(1.5), hi_precision, sollya.RN)
    cons_table[2][0] = cons_hi
    cons_table[2][1] = round(atan(1.5) - cons_hi, sollya_precision, sollya.RN)
    
    cons_hi = round(pi/2, hi_precision, sollya.RN)
    cons_table[3][0] = cons_hi
    cons_table[3][1] = round(pi/2 - cons_hi, sollya_precision, sollya.RN)
    
    cons_table[4][0] = 0.0
    cons_table[4][1] = 0.0
    
    coeff_table[0] = round(3.33333333333329318027e-01, sollya_precision, sollya.RN)
    coeff_table[1] = round(-1.99999999998764832476e-01, sollya_precision, sollya.RN)
    coeff_table[2] = round(1.42857142725034663711e-01, sollya_precision, sollya.RN)
    coeff_table[3] = round(-1.11111104054623557880e-01, sollya_precision, sollya.RN)
    coeff_table[4] = round(9.09088713343650656196e-02, sollya_precision, sollya.RN)
    coeff_table[5] = round(-7.69187620504482999495e-02, sollya_precision, sollya.RN)
    coeff_table[6] = round(6.66107313738753120669e-02, sollya_precision, sollya.RN)
    coeff_table[7] = round(-5.83357013379057348645e-02, sollya_precision, sollya.RN)
    coeff_table[8] = round(4.97687799461593236017e-02, sollya_precision, sollya.RN)
    coeff_table[9] = round(-3.65315727442169155270e-02, sollya_precision, sollya.RN)
    coeff_table[10] = round(1.62858201153657823623e-02, sollya_precision, sollya.RN)
    
    red_vx2 = red_vx_std*red_vx_std
    red_vx4 = red_vx2*red_vx2
    a0 = TableLoad(coeff_table, 0, precision = self.precision)
    a1 = TableLoad(coeff_table, 1, precision = self.precision)
    a2 = TableLoad(coeff_table, 2, precision = self.precision)
    a3 = TableLoad(coeff_table, 3, precision = self.precision)
    a4 = TableLoad(coeff_table, 4, precision = self.precision)
    a5 = TableLoad(coeff_table, 5, precision = self.precision)
    a6 = TableLoad(coeff_table, 6, precision = self.precision)
    a7 = TableLoad(coeff_table, 7, precision = self.precision)
    a8 = TableLoad(coeff_table, 8, precision = self.precision)
    a9 = TableLoad(coeff_table, 9, precision = self.precision)
    a10 = TableLoad(coeff_table, 10, precision = self.precision)
    
    poly_even = red_vx2*(a0 + red_vx4*(a2 + red_vx4*(a4 + red_vx4*(a6 + red_vx4*(a8 + red_vx4*a10)))))
    poly_odd = red_vx4*(a1 + red_vx4*(a3 + red_vx4*(a5 + red_vx4*(a7 + red_vx4*a9))))
    
    
    poly_even.set_attributes(tag = "poly_even", debug = debug_multi)
    poly_odd.set_attributes(tag = "poly_odd", debug = debug_multi)
    
    const_load_hi = TableLoad(cons_table, const_index_std, 0, tag = "const_load_hi", debug = debug_multi)
    const_load_lo = TableLoad(cons_table, const_index_std, 1, tag = "const_load_lo", debug = debug_multi)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, tag = "nan_or_inf", likely = False)
    test_nan = Test(vx, specifier = Test.IsNaN, debug = debug_multi, tag = "is_nan_test", likely = False)
    test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = debug_multi, tag = "inf_sign", likely = False)
                
        
    result = const_load_hi - ((red_vx_std*(poly_even + poly_odd) - const_load_lo) - red_vx_std)
    result.set_attributes(tag = "result", debug = debug_multi)
    
    std_scheme = Statement(
          sign,
          abs_vx_std,
          red_vx_std,
          const_index_std,
          set_sign,
          ConditionBlock(
            test_bound,
            set_bound,
            ConditionBlock(
              test_bound1,
              set_bound1,
              ConditionBlock(
                test_bound2,
                set_bound2,
                ConditionBlock(
                  test_bound3,
                  set_bound3,
                  ConditionBlock(
                    test_bound4,
                    set_bound4,
                    set_bound5
                  )
                )
              )
            )
          ),
          Return(sign*result)
        )
    infty_return = ConditionBlock(test_positive, Return(half_pi_cst), Return(-half_pi_cst))
    non_std_return = ConditionBlock(test_nan, Return(FP_QNaN(self.precision)), infty_return)
    scheme = ConditionBlock(test_NaN_or_inf, Statement(ClearException(), non_std_return), std_scheme)
    return scheme
Beispiel #20
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        log2_hi_value = round(
            log_f(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), RN)
        log2_lo_value = round(
            log_f(2) - log2_hi_value, self.precision.sollya_object, RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        int_precision = self.precision.get_integer_format()

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debug_multi)

        #---------------------
        # Approximation scheme
        #---------------------
        # log10(x) = log10(m.2^e) = log10(m.2^(e-t+t))
        #           = log10(m.2^-t) + (e+t) log10(2)
        #  t = (m > sqrt(2)) ? 1 : 0  is used to avoid catastrophic cancellation
        #  when e = -1 and m ~ 2
        #
        #
        # log10(m.2^-t) = log10(m.r/r.2^-t) = log10(m.r) + log10(2^-t/r)
        #               = log10(m.r) - log10(r.2^t)
        #     where r = rcp(m) an approximation of 1/m such that r.m ~ 1

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = inv_approx_table.index_size
        table_index_range = range(1, 2**table_index_size)
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision)
        log_table_tho = ML_NewTable(dimensions=[2**table_index_size, 2],
                                    storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        log_table_tho[0][0] = 0.0
        log_table_tho[0][1] = 0.0
        hi_size = self.precision.get_field_size() - (
            self.precision.get_exponent_size() + 1)
        for i in table_index_range:
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            inv_value = inv_approx_table[i]
            value_high = round(log_f(inv_value), hi_size, sollya.RN)
            value_low = round(
                log_f(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

            inv_value_tho = S2 * inv_approx_table[i]
            value_high_tho = round(log_f(inv_value_tho), hi_size, sollya.RN)
            value_low_tho = round(
                log_f(inv_value_tho) - value_high_tho, sollya_precision,
                sollya.RN)
            log_table_tho[i][0] = value_high_tho
            log_table_tho[i][1] = value_low_tho

        # determining log_table range
        high_index_function = lambda table, i: table[i][0]
        low_index_function = lambda table, i: table[i][1]
        table_high_interval = log_table.get_subset_interval(
            high_index_function, table_index_range)
        table_low_interval = log_table.get_subset_interval(
            low_index_function, table_index_range)

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_multi)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi)

            table_index = inv_approx_table.index_function(_vx_mant)

            table_index.set_attributes(tag="table_index", debug=debug_multi)

            tho_cond = _vx_mant > Constant(sollya.sqrt(2),
                                           precision=self.precision)
            tho = Select(tho_cond,
                         Constant(1.0, precision=self.precision),
                         Constant(0.0, precision=self.precision),
                         precision=self.precision,
                         tag="tho",
                         debug=debug_multi)

            rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp")
            r = Multiplication(rcp,
                               _vx_mant,
                               precision=self.precision,
                               tag="r")

            int_format = self.precision.get_integer_format()

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=int_format),
                Constant(-2, precision=int_format),
                precision=int_format),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_multi)
            _red_vx = arg_red_index * _vx_mant - 1.0
            inv_err = S2**-6
            red_interval = Interval(1 - inv_err, 1 + inv_err)
            _red_vx.set_attributes(tag="_red_vx",
                                   debug=debug_multi,
                                   interval=red_interval)

            # return in case of standard (non-special) input
            _log_inv_lo = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 1),
                                 TableLoad(log_table, table_index, 1),
                                 tag="log_inv_lo",
                                 debug=debug_multi)

            _log_inv_hi = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 0),
                                 TableLoad(log_table, table_index, 0),
                                 tag="log_inv_hi",
                                 debug=debug_multi)

            Log.report(Log.Info, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + x) / x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Info, "generating polynomial evaluation scheme")
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_multi)
            Log.report(Log.Info, poly_object.get_sollya_object())

            corr_exp = Conversion(_vx_exp if exp_corr_factor == None else
                                  _vx_exp + exp_corr_factor,
                                  precision=self.precision) + tho
            corr_exp.set_attributes(tag="corr_exp", debug=debug_multi)

            # _poly approximates log10(1+r)/r
            # _poly * red_vx approximates log10(x)

            m0h, m0l = Mul211(_red_vx, _poly)
            m0h, m0l = Add212(_red_vx, m0h, m0l)
            m0h.set_attributes(tag="m0h", debug=debug_multi)
            m0l.set_attributes(tag="m0l")
            l0_h = corr_exp * log2_hi
            l0_l = corr_exp * log2_lo
            l0_h.set_attributes(tag="l0_h")
            l0_l.set_attributes(tag="l0_l")
            rh, rl = Add222(l0_h, l0_l, m0h, m0l)
            rh.set_attributes(tag="rh0", debug=debug_multi)
            rl.set_attributes(tag="rl0", debug=debug_multi)
            rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl)
            rh.set_attributes(tag="rh", debug=debug_multi)
            rl.set_attributes(tag="rl", debug=debug_multi)

            if sollya.log(self.basis) != 1.0:
                lbh = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis))
                lbl = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis) - lbh)
                rh, rl = Mul222(rh, rl, lbh, lbl)
                return rh
            else:
                return rh

        result = compute_log(vx)
        result.set_attributes(tag="result", debug=debug_multi)

        if False:
            # building eval error map
            eval_error_map = {
                red_vx:
                Variable("red_vx",
                         precision=self.precision,
                         interval=red_vx.get_interval()),
                log_inv_hi:
                Variable("log_inv_hi",
                         precision=self.precision,
                         interval=table_high_interval),
                log_inv_lo:
                Variable("log_inv_lo",
                         precision=self.precision,
                         interval=table_low_interval),
                corr_exp:
                Variable("corr_exp_g",
                         precision=self.precision,
                         interval=self.precision.get_exponent_interval()),
            }
            # computing gappa error
            if is_gappa_installed():
                poly_eval_error = self.get_eval_error(result, eval_error_map)
                Log.report(Log.Info, "poly_eval_error: ", poly_eval_error)

        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debug_multi,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debug_multi,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debug_multi,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debug_multi,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debug_multi,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debug_multi,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debug_multi,
                         likely=False)

        # exp=-1 case
        Log.report(Log.Info, "managing exp=-1 case")
        #red_vx_2 = arg_red_index * vx_mant * 0.5
        #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err)
        #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1
        #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute)
        #print "poly_object2: ", poly_object2.get_sollya_object()
        #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision)
        #poly2.set_attributes(tag = "poly2", debug = debug_multi)
        #result2 = (poly2 - log_inv_hi - log_inv_lo)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal = compute_log(vx * S2100, exp_corr_factor=m100)

        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)), Return(result))))
        scheme = pre_scheme
        return scheme
Beispiel #21
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "target: %s " % self.processor.target_name)

        # display parameter information
        Log.report(Log.Info, "accuracy      : %s " % self.accuracy)
        Log.report(Log.Info, "input interval: %s " % self.input_interval)

        accuracy_goal = self.accuracy.get_goal()
        Log.report(Log.Info, "accuracy_goal=%f" % accuracy_goal)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_interval))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_NewTable(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in range(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.io_precisions[0]
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(x), [0, 2], [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.io_precisions[0])

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme
Beispiel #22
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        approx_interval = Interval(0.0, 2**-index_size)
        error_goal_approx = 2**-(self.precision.get_precision())
        int_precision = self.precision.get_integer_format()

        vx_int = Floor(vx * 2**index_size,
                       precision=self.precision,
                       tag="vx_int",
                       debug=debug_multi)
        vx_frac = vx - (vx_int * 2**-index_size)
        vx_frac.set_attributes(tag="vx_frac",
                               debug=debug_multi,
                               unbreakable=True)
        poly_degree = sup(
            guessdegree(2**(sollya.x), approx_interval, error_goal_approx)) + 1
        precision_list = [1] + [self.precision] * (poly_degree)

        vx_integer = Conversion(vx_int,
                                precision=int_precision,
                                tag="vx_integer",
                                debug=debug_multi)
        vx_int_hi = BitLogicRightShift(vx_integer,
                                       Constant(index_size),
                                       tag="vx_int_hi",
                                       debug=debug_multi)
        vx_int_lo = Modulo(vx_integer,
                           2**index_size,
                           tag="vx_int_lo",
                           debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(vx_int_hi,
                                               precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp2_table = ML_NewTable(dimensions=[2 * 2**index_size, 2],
                                 storage_precision=self.precision,
                                 tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i
            exp2_value = SollyaObject(2)**((input_value) * 2**-index_size)
            hi_value = round(exp2_value, self.precision.get_sollya_object(),
                             RN)
            lo_value = round(exp2_value - hi_value,
                             self.precision.get_sollya_object(), RN)
            exp2_table[i][0] = lo_value
            exp2_table[i][1] = hi_value

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print "poly_approx_error: ", poly_approx_error, float(
            log2(poly_approx_error))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         vx_frac,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        table_index = Addition(vx_int_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        lo_value_load = TableLoad(exp2_table,
                                  table_index,
                                  0,
                                  tag="lo_value_load",
                                  debug=debug_multi)
        hi_value_load = TableLoad(exp2_table,
                                  table_index,
                                  1,
                                  tag="hi_value_load",
                                  debug=debug_multi)

        result = (hi_value_load +
                  (hi_value_load * poly +
                   (lo_value_load + lo_value_load * poly))) * pow_exp
        ov_flag = Comparison(vx_int_hi,
                             Constant(self.precision.get_emax(),
                                      precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme
Beispiel #23
0
    def generate_scheme(self):
        def get_virtual_cst(prec, value, language):
            return prec.get_support_format().get_cst(
                prec.get_base_format().get_integer_coding(value, language))

        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = VirtualFormat(base_format=self.precision,
                                     support_format=ML_StdLogicVectorFormat(
                                         self.precision.get_bit_size()),
                                     get_cst=get_virtual_cst)

        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        # rounding mode input
        rnd_mode = self.implementation.add_input_signal(
            "rnd_mode", rnd_mode_format)

        if self.pipelined:
            self.implementation.add_input_signal("reset", ML_StdLogic)

        vx_precision = self.precision

        p = vx_precision.get_mantissa_size()
        exp_size = vx_precision.get_exponent_size()

        exp_vx_precision = ML_StdLogicVectorFormat(
            vx_precision.get_exponent_size())
        mant_vx_precision = ML_StdLogicVectorFormat(p)
        # fixed-point precision for operand's exponent
        exp_fixed_precision = fixed_point(exp_size, 0, signed=False)

        # mantissa extraction
        mant_vx = TypeCast(MantissaExtraction(vx,
                                              precision=mant_vx_precision,
                                              tag="extracted_mantissa"),
                           precision=fixed_point(1, p - 1, signed=False),
                           debug=debug_fixed,
                           tag="mant_vx")
        # exponent extraction
        exp_vx = TypeCast(RawExponentExtraction(vx,
                                                precision=exp_vx_precision,
                                                tag="exp_vx"),
                          precision=exp_fixed_precision)

        approx_index_size = 8
        approx_precision = fixed_point(
            2,
            approx_index_size,
        )

        # selecting table index from input mantissa MSBs
        tab_index = SubSignalSelection(mant_vx,
                                       p - 2 - approx_index_size + 1,
                                       p - 2,
                                       tag="tab_index")

        # declaring reciprocal approximation table
        inv_approx_table = ML_NewTable(dimensions=[2**approx_index_size],
                                       storage_precision=approx_precision,
                                       tag="inv_approx_table")
        for i in range(2**approx_index_size):
            num_input = 1 + i * S2**-approx_index_size
            table_value = io_precision.get_base_format().round_sollya_object(
                1 / num_input)
            inv_approx_table[i] = table_value

        # extracting initial reciprocal approximation
        inv_approx_value = TableLoad(inv_approx_table,
                                     tab_index,
                                     precision=approx_precision,
                                     tag="inv_approx_value",
                                     debug=debug_fixed)

        #inv_approx_value = TypeCast(inv_approx_value, precision = approx_precision)
        pre_it0_input = zext(
            SubSignalSelection(mant_vx,
                               p - 1 - approx_index_size,
                               p - 1,
                               tag="it0_input"), 1)
        it0_input = TypeCast(pre_it0_input,
                             precision=approx_precision,
                             tag="it0_input",
                             debug=debug_fixed)

        it1_precision = RTL_FixedPointFormat(
            2,
            2 * approx_index_size,
            support_format=ML_StdLogicVectorFormat(2 + 2 * approx_index_size))

        it1_input = mant_vx

        final_approx = generate_NR_iteration(
            mant_vx,
            inv_approx_value,
            (2, approx_index_size * 2),  # mult precision
            (-3, 2 * approx_index_size),  # error precision
            (2, approx_index_size * 3),  # new-approx mult
            (2, approx_index_size * 2),  # new approx precision
            self.implementation,
            pipelined=0,  #1 if self.pipelined else 0,
            tag_suffix="_first")

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        final_approx = generate_NR_iteration(
            mant_vx,
            final_approx,
            # mult precision
            (2, approx_index_size * 3),
            # error precision
            (-6, approx_index_size * 3),
            # approx mult precision
            (2, approx_index_size * 3),
            # new approx precision
            (2, approx_index_size * 3),
            self.implementation,
            pipelined=1 if self.pipelined else 0,
            tag_suffix="_second")

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        final_approx = generate_NR_iteration(
            mant_vx,
            final_approx,
            # mult-precision
            (2, 2 * p - 1),
            # error precision
            (-(3 * approx_index_size) / 2, approx_index_size * 2 + p - 1),
            # mult approx mult precision
            (2, approx_index_size * 2 + p - 1),
            # approx precision
            (2, p),
            self.implementation,
            pipelined=2 if self.pipelined else 0,
            tag_suffix="_third")

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        final_approx = generate_NR_iteration(
            mant_vx,
            final_approx, (2, 2 * p), (-(4 * p) / 5, 2 * p), (2, 2 * p),
            (2, 2 * p),
            self.implementation,
            pipelined=2 if self.pipelined else 0,
            tag_suffix="_last")

        # Inserting post-input pipeline stage
        if self.pipelined: self.implementation.start_new_stage()

        final_approx.set_attributes(tag="final_approx", debug=debug_hex)

        last_approx_norm = final_approx

        offset_bit = BitSelection(last_approx_norm,
                                  FixedPointPosition(
                                      last_approx_norm,
                                      0,
                                      align=FixedPointPosition.FromPointToLSB),
                                  tag="offset_bit",
                                  debug=debug_std)

        # extracting bit to determine if result should be left-shifted and
        # exponent incremented
        not_decrement = offset_bit

        final_approx_reduced = SubSignalSelection(
            final_approx,
            FixedPointPosition(final_approx,
                               -(p - 1),
                               align=FixedPointPosition.FromPointToLSB),
            FixedPointPosition(final_approx,
                               0,
                               align=FixedPointPosition.FromPointToLSB),
            precision=fixed_point(p, 0, signed=False))
        final_approx_reduced_shifted = SubSignalSelection(
            final_approx,
            FixedPointPosition(final_approx,
                               -p,
                               align=FixedPointPosition.FromPointToLSB),
            FixedPointPosition(final_approx,
                               -1,
                               align=FixedPointPosition.FromPointToLSB),
            precision=fixed_point(p, 0, signed=False))

        # unrounded mantissa field excluding leading digit
        unrounded_mant_field = Select(
            equal_to(not_decrement, 1),
            final_approx_reduced,
            final_approx_reduced_shifted,
            precision=fixed_point(p, 0, signed=False),
            tag="unrounded_mant_field",
            debug=debug_hex,
        )

        def get_bit(optree, bit_index):
            bit_sel = BitSelection(
                optree,
                FixedPointPosition(optree,
                                   -bit_index,
                                   align=FixedPointPosition.FromPointToLSB))
            return bit_sel

        mant_lsb = Select(
            equal_to(not_decrement, 1),
            get_bit(final_approx, p - 1),
            get_bit(final_approx, p),
            precision=ML_StdLogic,
            tag="mant_lsb",
            debug=debug_std,
        )
        round_bit = Select(
            equal_to(not_decrement, 1),
            get_bit(final_approx, p),
            get_bit(final_approx, p + 1),
            precision=ML_StdLogic,
            tag="round_bit",
            debug=debug_std,
        )
        sticky_bit_input = Select(
            equal_to(not_decrement, 1),
            SubSignalSelection(final_approx,
                               0,
                               FixedPointPosition(
                                   final_approx,
                                   -(p + 1),
                                   align=FixedPointPosition.FromPointToLSB),
                               precision=None,
                               tag="sticky_bit_input"),
            SubSignalSelection(final_approx,
                               0,
                               FixedPointPosition(
                                   final_approx,
                                   -(p + 2),
                                   align=FixedPointPosition.FromPointToLSB),
                               precision=None,
                               tag="sticky_bit_input"),
        )
        sticky_bit = Select(Equal(sticky_bit_input, Constant(0,
                                                             precision=None)),
                            Constant(0, precision=ML_StdLogic),
                            Constant(1, precision=ML_StdLogic),
                            precision=ML_StdLogic,
                            tag="sticky_bit",
                            debug=debug_std)
        # TODO: manage leading digit (in case of subnormal result)
        pre_result = unrounded_mant_field

        # real_exp = exp_vx - bias
        # - real_exp = bias - exp_vx
        # encoded negated exp = bias - exp_vx + bias = 2 * bias - exp_vx
        fp_io_precision = io_precision.get_base_format()

        neg_exp = -2 * fp_io_precision.get_bias() - exp_vx
        neg_exp.set_attributes(tag="neg_exp", debug=debug_fixed)
        res_exp = Subtraction(neg_exp,
                              Select(equal_to(not_decrement, 1),
                                     Constant(0,
                                              precision=exp_fixed_precision),
                                     Constant(1,
                                              precision=exp_fixed_precision),
                                     precision=None,
                                     tag="exp_offset",
                                     debug=debug_fixed),
                              tag="res_exp",
                              debug=debug_fixed)
        res_exp_field = SubSignalSelection(
            res_exp,
            FixedPointPosition(res_exp,
                               0,
                               align=FixedPointPosition.FromPointToLSB,
                               tag="res_exp_field LSB"),
            FixedPointPosition(res_exp,
                               exp_size - 1,
                               align=FixedPointPosition.FromPointToLSB,
                               tag="res_exp_field MSB"),
            precision=None,
            tag="res_exp_field",
            # debug=debug_fixed
        )

        result_sign = CopySign(vx, precision=ML_StdLogic)

        exp_mant_precision = ML_StdLogicVectorFormat(
            io_precision.get_bit_size() - 1)

        rnd_mode_is_rne = Equal(rnd_mode, rnd_rne, precision=ML_Bool)
        rnd_mode_is_ru = Equal(rnd_mode, rnd_ru, precision=ML_Bool)
        rnd_mode_is_rd = Equal(rnd_mode, rnd_rd, precision=ML_Bool)
        rnd_mode_is_rz = Equal(rnd_mode, rnd_rz, precision=ML_Bool)

        round_incr = Conversion(
            logical_or_reduce([
                logical_and_reduce([
                    rnd_mode_is_rne,
                    equal_to(round_bit, 1),
                    equal_to(sticky_bit, 1)
                ]),
                logical_and_reduce([
                    rnd_mode_is_rne,
                    equal_to(round_bit, 1),
                    equal_to(sticky_bit, 0),
                    equal_to(mant_lsb, 1)
                ]),
                logical_and_reduce([
                    rnd_mode_is_ru,
                    equal_to(result_sign, 0),
                    LogicalOr(equal_to(round_bit, 1),
                              equal_to(sticky_bit, 1),
                              precision=ML_Bool)
                ]),
                logical_and_reduce([
                    rnd_mode_is_rd,
                    equal_to(result_sign, 1),
                    LogicalOr(equal_to(round_bit, 1),
                              equal_to(sticky_bit, 1),
                              precision=ML_Bool)
                ]),
            ]),
            precision=fixed_point(1, 0, signed=False),
            tag="round_incr",
            #debug=debug_fixed
        )

        # Precision for result without sign
        unsigned_result_prec = fixed_point((p - 1) + exp_size, 0)

        unrounded_mant_field_nomsb = Conversion(
            unrounded_mant_field,
            precision=fixed_point(p - 1, 0, signed=False),
            tag="unrounded_mant_field_nomsb",
            debug=debug_hex)

        pre_rounded_unsigned_result = Concatenation(
            res_exp_field,
            unrounded_mant_field_nomsb,
            precision=unsigned_result_prec,
            tag="pre_rounded_unsigned_result")
        unsigned_result_rounded = Addition(pre_rounded_unsigned_result,
                                           round_incr,
                                           precision=unsigned_result_prec,
                                           tag="unsigned_result")

        vr_out = TypeCast(Concatenation(
            result_sign,
            TypeCast(unsigned_result_rounded,
                     precision=ML_StdLogicVectorFormat(p - 1 + exp_size)),
            precision=ML_StdLogicVectorFormat(io_precision.get_bit_size())),
                          precision=io_precision,
                          debug=debug_hex,
                          tag="vr_out")

        self.implementation.add_output_signal("vr_out", vr_out)

        return [self.implementation]
Beispiel #24
0
  def generate_scheme(self):

    def get_virtual_cst(prec, value, language):
      return prec.get_support_format().get_cst(
        prec.get_base_format().get_integer_coding(value, language))

    ## convert @p value from an input floating-point precision
    #  @p in_precision to an output support format @p out_precision
    io_precision = HdlVirtualFormat(self.precision)

    # declaring main input variable
    vx = self.implementation.add_input_signal("x", io_precision)

    if self.pipelined:
      self.implementation.add_input_signal("reset", ML_StdLogic)

    vx_precision = self.precision

    p = vx_precision.get_mantissa_size()

    exp_vx_precision     = ML_StdLogicVectorFormat(vx_precision.get_exponent_size())
    mant_vx_precision    = ML_StdLogicVectorFormat(p)

    # mantissa extraction
    mant_vx = MantissaExtraction(vx, precision = mant_vx_precision, tag = "mant_vx")
    # exponent extraction
    exp_vx = RawExponentExtraction(vx, precision = exp_vx_precision, tag = "exp_vx", debug = debug_dec)

    approx_index_size = 8

    approx_precision = RTL_FixedPointFormat(
      2, approx_index_size,
      support_format = ML_StdLogicVectorFormat(approx_index_size + 2),
    )

    # selecting table index from input mantissa MSBs
    tab_index = SubSignalSelection(mant_vx, p-2 - approx_index_size +1, p-2, tag = "tab_index")

    # declaring reciprocal approximation table
    inv_approx_table = ML_NewTable(dimensions = [2**approx_index_size], storage_precision = approx_precision, tag = "inv_approx_table")
    for i in range(2**approx_index_size):
      num_input = 1 + i * S2**-approx_index_size
      table_value = io_precision.get_base_format().round_sollya_object(1 / num_input)
      inv_approx_table[i] = table_value

    # extracting initial reciprocal approximation
    inv_approx_value = TableLoad(inv_approx_table, tab_index, precision = approx_precision, tag = "inv_approx_value", debug = debug_fixed)


    #inv_approx_value = TypeCast(inv_approx_value, precision = approx_precision)
    pre_it0_input = zext(SubSignalSelection(mant_vx, p-1 - approx_index_size , p-1, tag = "it0_input"), 1)
    it0_input = TypeCast(pre_it0_input, precision = approx_precision, tag = "it0_input", debug = debug_fixed)

    it1_precision = RTL_FixedPointFormat(
      2,
      2 * approx_index_size,
      support_format = ML_StdLogicVectorFormat(2 + 2 * approx_index_size)
    )

    pre_it1_input = zext(SubSignalSelection(mant_vx, p - 1 - 2 * approx_index_size, p -1, tag = "it1_input"), 1)
    it1_input = TypeCast(pre_it1_input, precision = it1_precision, tag = "it1_input", debug = debug_fixed)

    final_approx = generate_NR_iteration(
      it0_input,
      inv_approx_value,
      (2, approx_index_size * 2), # mult precision
      (-3, 2 * approx_index_size), # error precision
      (2, approx_index_size * 3), # new-approx mult
      (2, approx_index_size * 2), # new approx precision
      self.implementation,
      pipelined = 0, #1 if self.pipelined else 0,
      tag_suffix = "_first"
    )

    # Inserting post-input pipeline stage
    if self.pipelined: self.implementation.start_new_stage()

    final_approx = generate_NR_iteration(
      it1_input,
      final_approx,
      # mult precision
      (2, approx_index_size * 3),
      # error precision
      (-6, approx_index_size * 3),
      # approx mult precision
      (2, approx_index_size * 3),
      # new approx precision
      (2, approx_index_size * 3),
      self.implementation,
      pipelined = 1 if self.pipelined else 0,
      tag_suffix = "_second"
    )

    # Inserting post-input pipeline stage
    if self.pipelined: self.implementation.start_new_stage()

    last_it_precision = RTL_FixedPointFormat(
      2,
      p - 1,
      support_format=ML_StdLogicVectorFormat(2 + p - 1)
    )

    pre_last_it_input = zext(mant_vx, 1)
    last_it_input = TypeCast(
        pre_last_it_input, precision=last_it_precision,
        tag="last_it_input", debug=debug_fixed
    )

    final_approx = generate_NR_iteration(
      last_it_input,
      final_approx,
      # mult-precision
      (2, 2 * p - 1),
      # error precision
      (int(- (3 * approx_index_size) / 2), approx_index_size * 2 + p - 1),
      # mult approx mult precision
      (2, approx_index_size * 2 + p - 1),
      # approx precision
      (2, p),
      self.implementation,
      pipelined = 2 if self.pipelined else 0,
      tag_suffix = "_third"
    )

    # Inserting post-input pipeline stage
    if self.pipelined: self.implementation.start_new_stage()

    final_approx = generate_NR_iteration(
      last_it_input,
      final_approx,
      (2, 2 * p),
      (int(-(4 * p)/5), 2 * p),
      (2, 2 * p),
      (2, 2 * p),
      self.implementation,
      pipelined = 2 if self.pipelined else 0,
      tag_suffix = "_last"
    )

    # Inserting post-input pipeline stage
    if self.pipelined: self.implementation.start_new_stage()


    final_approx.set_attributes(tag = "final_approx", debug = debug_fixed)

    # bit indexes to select mantissa from final_approximation
    pre_mant_size = min(self.precision.get_field_size(), final_approx.get_precision().get_frac_size()) 
    final_approx_frac_msb_index = final_approx.get_precision().get_frac_size() - 1
    final_approx_frac_lsb_index = final_approx.get_precision().get_frac_size() - pre_mant_size

    # extracting bit to determine if result should be left-shifted and
    # exponent incremented
    cst_index = Constant(final_approx.get_precision().get_frac_size(), precision = ML_Integer)
    final_approx_casted = TypeCast(final_approx, precision = ML_StdLogicVectorFormat(final_approx.get_precision().get_bit_size()))
    not_decrement = final_approx_casted[cst_index] 
    not_decrement.set_attributes(precision = ML_StdLogic, tag = "not_decrement", debug = debug_std)
    logic_1 = Constant(1, precision = ML_StdLogic)

    result = Select(
      Comparison( not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool),
      SubSignalSelection(
        TypeCast(
          final_approx, 
          precision = ML_StdLogicVectorFormat(final_approx.get_precision().get_bit_size())
        ),
        final_approx_frac_lsb_index,
        final_approx_frac_msb_index,
      ),
      SubSignalSelection(
        TypeCast(
          final_approx, 
          precision = ML_StdLogicVectorFormat(final_approx.get_precision().get_bit_size())
        ),
        final_approx_frac_lsb_index - 1,
        final_approx_frac_msb_index - 1,
      ),
      precision = ML_StdLogicVectorFormat(pre_mant_size),
      tag = "result"
    )
    def get_bit(optree, bit_index):
      bit_index_cst = Constant(bit_index, precision = ML_Integer)
      bit_sel = VectorElementSelection(
        optree,
        bit_index_cst,
        precision = ML_StdLogic)
      return bit_sel

    least_bit = Select(
      Comparison(not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool),
      get_bit(final_approx_casted, final_approx_frac_lsb_index),
      get_bit(final_approx_casted, final_approx_frac_lsb_index - 1),
      precision = ML_StdLogic,
      tag = "least_bit",
      debug = debug_std,
    )
    round_bit = Select(
      Comparison(not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool),
      get_bit(final_approx_casted, final_approx_frac_lsb_index - 1),
      get_bit(final_approx_casted, final_approx_frac_lsb_index - 2),
      precision = ML_StdLogic,
      tag = "round_bit",
      debug = debug_std,
    )
    sticky_bit_input = Select( 
      Comparison(not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool),
      SubSignalSelection(
        final_approx_casted, 0, 
        final_approx_frac_lsb_index - 2, 
        precision = ML_StdLogicVectorFormat(final_approx_frac_lsb_index - 1)
      ),
      zext(
        SubSignalSelection(
          final_approx_casted, 0, 
          final_approx_frac_lsb_index - 3, 
          precision = ML_StdLogicVectorFormat(final_approx_frac_lsb_index - 2)
        ),
        1
      ),
      precision = ML_StdLogicVectorFormat(final_approx_frac_lsb_index - 1)
    )
    sticky_bit = Select(
      Equal(
        sticky_bit_input, 
        Constant(0, precision = ML_StdLogicVectorFormat(final_approx_frac_lsb_index - 1))
      ),
      Constant(0, precision = ML_StdLogic),
      Constant(1, precision = ML_StdLogic),
      precision = ML_StdLogic,
      tag = "sticky_bit",
      debug = debug_std
    )
    # if mantissa require extension
    if pre_mant_size < self.precision.get_mantissa_size() - 1:
      result = rzext(result, self.precision.get_mantissa_size() - 1 - pre_mant_size) 

    res_mant_field = result

    # real_exp = exp_vx - bias
    # - real_exp = bias - exp_vx
    # encoded negated exp = bias - exp_vx + bias = 2 * bias - exp_vx
    fp_io_precision = io_precision.get_base_format()
    exp_op_precision = ML_StdLogicVectorFormat(fp_io_precision.get_exponent_size() + 2)
    biasX2 = Constant(- 2 * fp_io_precision.get_bias(), precision = exp_op_precision)

    neg_exp = Subtraction(
      SignCast(
        biasX2,
        specifier = SignCast.Unsigned,
        precision = get_unsigned_precision(exp_op_precision)
      ),
      SignCast(
        zext(exp_vx, 2),
        specifier = SignCast.Unsigned,
        precision = get_unsigned_precision(exp_op_precision),
      ),
      precision = exp_op_precision,
      tag = "neg_exp",
      debug = debug_dec
    )
    neg_exp_field = SubSignalSelection(
      neg_exp,
      0,
      fp_io_precision.get_exponent_size() - 1,
      precision = ML_StdLogicVectorFormat(fp_io_precision.get_exponent_size())
    )


    res_exp = Addition(
      SignCast(
        neg_exp_field,
        precision = get_unsigned_precision(exp_vx.get_precision()),
        specifier = SignCast.Unsigned
      ),
      SignCast(
        Select(
          Comparison(not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool),
          Constant(0, precision = exp_vx_precision),
          Constant(-1, precision = exp_vx_precision),
          precision = exp_vx_precision
        ),
        precision = get_unsigned_precision(exp_vx_precision),
        specifier = SignCast.Unsigned
      ),
      precision = exp_vx_precision,
      tag = "result_exp",
      debug = debug_dec
    )

    res_sign = CopySign(vx, precision = ML_StdLogic)

    exp_mant_precision = ML_StdLogicVectorFormat(io_precision.get_bit_size() - 1)

    round_incr = Select(
      LogicalAnd(
        Equal(round_bit, Constant(1, precision = ML_StdLogic)),
        LogicalOr(
          Equal(sticky_bit, Constant(1, precision = ML_StdLogic)),
          Equal(least_bit, Constant(1, precision = ML_StdLogic)),
          precision = ML_Bool,
        ),
        precision = ML_Bool,
      ),
      Constant(1, precision = ML_StdLogic),
      Constant(0, precision = ML_StdLogic),
      tag = "round_incr",
      precision = ML_StdLogic,
      debug = debug_std
    )

    exp_mant = Concatenation(
      res_exp,
      res_mant_field,
      precision = exp_mant_precision
    )

    exp_mant_rounded = Addition(
      SignCast(
        exp_mant,
        SignCast.Unsigned,
        precision = get_unsigned_precision(exp_mant_precision)
      ),
      round_incr,
      precision = exp_mant_precision,
      tag = "exp_mant_rounded"
    )
    vr_out = TypeCast(
      Concatenation(
        res_sign,
        exp_mant_rounded,
        precision = ML_StdLogicVectorFormat(io_precision.get_bit_size())
      ),
      precision = io_precision,
      debug = debug_hex,
      tag = "vr_out"
    )

    self.implementation.add_output_signal("vr_out", vr_out)

    return [self.implementation]
Beispiel #25
0
    def generate_scalar_scheme(self, vx):
        Log.set_dump_stdout(True)

        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
                Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        index_size = 5

        comp_lo = (vx < 0)
        comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool)
        sign = Select(comp_lo, -1, 1, precision = self.precision)

        # as sinh is an odd function, we can simplify the input to its absolute
        # value once the sign has been extracted
        vx = Abs(vx)
        int_precision = self.precision.get_integer_format()

        # argument reduction
        arg_reg_value = log(2)/2**index_size
        inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN)
        inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2    for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx    = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN)
        log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision)
        log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision)
        k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag = "r", debug = debug_multi)

        if is_gappa_installed():
                r_eval_error = self.get_eval_error(r_hi, variable_copy_map =
                    {
                        vx: Variable("vx", interval = Interval(0, 715), precision = self.precision),
                        k: Variable("k", interval = Interval(0, 1024), precision = self.precision)
                    })
                Log.report(Log.Verbose, "r_eval_error: ", r_eval_error)

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi)
        k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi)
        k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi)

        exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i

            reduced_hi_prec = int(self.precision.get_mantissa_size() - 8)
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value    = sollya.SollyaObject(2)**((input_value)* 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size)
            pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN)
            pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN)
            neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN)
            neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # sinh(x) = 1/2 * (exp(x) - exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value)
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        # S=2^(h-1), T = 2^(-h-1)
        # exp(r)    = 1 + poly_pos(r)
        # exp(-r) = 1 + poly_neg(r)
        # 2^(l / 2^index_size)    = pos_value_hi + pos_value_lo
        # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo
        #

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function)

        Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error))))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision)
        poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi)

        poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision)
        poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi)

        table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi)

        neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi)
        neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi)
        pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi)
        pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi)

        k_plus = Max(
            Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))
        k_neg = Max(
            Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))

        # 2^(h-1)
        pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi)
        # 2^(-h-1)
        pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi)

        hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg)
        hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi)


        pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos
        pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi)

        neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg
        neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi)

        result = Addition(
            Subtraction(
                pos_exp,
                neg_exp,
                precision=self.precision,
            ),
            hi_terms,
            precision=self.precision,
            tag="result",
            debug=debug_multi
        )

        # ov_value
        ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD)
        ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater)

        # main scheme
        scheme = Statement(
            Return(
                Select(
                    ov_flag,
                    sign*FP_PlusInfty(self.precision),
                    sign*result
                )))

        return scheme
Beispiel #26
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", self.precision)
        sollya_precision = self.get_input_precision().sollya_object

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
                kwords["arg_value"] = vx
                kwords["function_name"] = self.function_name
                return RaiseReturn(*args, **kwords)

        # 2-limb approximation of log(2)
        # hi part precision is reduced to provide exact operation
        # when multiplied by an exponent value
        log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)


        int_precision = self.precision.get_integer_format()

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_rcp_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(dummy_rcp_seed, language = None, table_getter = lambda self: self.approx_table_map)

        # table creation
        table_index_size = inv_approx_table.index_size
        log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
        # storing accurate logarithm approximation of value returned
        # by the fast reciprocal operation
        for i in range(0, 2**table_index_size):
            inv_value = inv_approx_table[i]
            value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low


        neg_input = Comparison(vx, -1, likely=False, precision=ML_Bool, specifier=Comparison.Less, debug=debug_multi, tag="neg_input")
        vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, precision=ML_Bool, debug=debug_multi, tag="nan_or_inf")
        vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan")
        vx_inf    = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf")
        vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal")

        # for x = m.2^e, such that e >= 0
        #
        # log(1+x) = log(1 + m.2^e)
        #          = log(2^e . 2^-e + m.2^e)
        #          = log(2^e . (2^-e + m))
        #          = log(2^e) + log(2^-e + m)
        #          = e . log(2) + log (2^-e + m)
        #
        # t = (2^-e + m)
        # t = m_t . 2^e_t
        # r ~ 1 / m_t   => r.m_t ~ 1 ~ 0
        #
        # t' = t . 2^-e_t
        #    = 2^-e-e_t + m . 2^-e_t
        #
        # if e >= 0, then 2^-e <= 1, then 1 <= m + 2^-e <= 3
        # r = m_r . 2^e_r
        #
        # log(1+x) = e.log(2) + log(r . 2^e_t . 2^-e_t . (2^-e + m) / r)
        #          = e.log(2) + log(r . 2^(-e-e_t) + r.m.2^-e_t) + e_t . log(2)- log(r)
        #          = (e+e_t).log(2) + log(r . t') - log(r)
        #          = (e+e_t).log(2) + log(r . t') - log(r)
        #          = (e+e_t).log(2) + P_log1p(r . t' - 1) - log(r)
        #
        #

        # argument reduction
        m = MantissaExtraction(vx, tag="vx", precision=self.precision, debug=debug_multi)
        e = ExponentExtraction(vx, tag="e", precision=int_precision, debug=debug_multi)

        # 2^-e
        TwoMinusE = ExponentInsertion(-e, tag="Two_minus_e", precision=self.precision, debug=debug_multi)
        t = Addition(TwoMinusE, m, precision=self.precision, tag="t", debug=debug_multi)

        m_t = MantissaExtraction(t, tag="m_t", precision=self.precision, debug=debug_multi)
        e_t = ExponentExtraction(t, tag="e_t", precision=int_precision, debug=debug_multi)

        # 2^(-e-e_t)
        TwoMinusEEt = ExponentInsertion(-e-e_t, tag="Two_minus_e_et", precision=self.precision)
        TwoMinusEt = ExponentInsertion(-e_t, tag="Two_minus_et", precision=self.precision, debug=debug_multi)

        rcp_mt = ReciprocalSeed(m_t, tag="rcp_mt", precision=self.precision, debug=debug_multi)

        INDEX_SIZE = table_index_size
        table_index = generic_mantissa_msb_index_fct(INDEX_SIZE, m_t)
        table_index.set_attributes(tag="table_index", debug=debug_multi)

        log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_multi) 
        log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_multi)

        inv_err = S2**-6 # TODO: link to target DivisionSeed precision

        Log.report(Log.Info, "building mathematical polynomial")
        approx_interval = Interval(-inv_err, inv_err)
        approx_fct = sollya.log1p(sollya.x) / (sollya.x)
        poly_degree = sup(guessdegree(approx_fct, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
        Log.report(Log.Debug, "poly_degree is {}", poly_degree)
        global_poly_object = Polynomial.build_from_approximation(approx_fct, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
        poly_object = global_poly_object # .sub_poly(start_index=1)

        EXT_PRECISION_MAP = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble,
            ML_SingleSingle: ML_TripleSingle,
            ML_DoubleDouble: ML_TripleDouble
        }
        if not self.precision in EXT_PRECISION_MAP:
            Log.report(Log.Error, "no extended precision available for {}", self.precision)

        ext_precision = EXT_PRECISION_MAP[self.precision]

        # pre_rtp = r . 2^(-e-e_t) + m .2^-e_t
        pre_rtp = Addition(
            rcp_mt * TwoMinusEEt,
            Multiplication(
                rcp_mt,
                Multiplication(
                    m,
                    TwoMinusEt,
                    precision=self.precision,
                    tag="pre_mult",
                    debug=debug_multi,
                ),
                precision=ext_precision,
                tag="pre_mult2",
                debug=debug_multi,
            ),
            precision=ext_precision,
            tag="pre_rtp",
            debug=debug_multi
        )
        pre_red_vx = Addition(
            pre_rtp,
            -1,
            precision=ext_precision,
        )

        red_vx = Conversion(pre_red_vx, precision=self.precision, tag="red_vx", debug=debug_multi)

        Log.report(Log.Info, "generating polynomial evaluation scheme")
        poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vx, unified_precision=self.precision)

        poly.set_attributes(tag="poly", debug=debug_multi)
        Log.report(Log.Debug, "{}", global_poly_object.get_sollya_object())

        fp_e = Conversion(e + e_t, precision=self.precision, tag="fp_e", debug=debug_multi)


        ext_poly = Multiplication(red_vx, poly, precision=ext_precision)

        pre_result = Addition(
            Addition(
                fp_e * log2_hi,
                fp_e * log2_lo,
                precision=ext_precision
            ),
            Addition(
                Addition(
                    -log_inv_hi,
                    -log_inv_lo,
                    precision=ext_precision
                ),
                ext_poly,
                precision=ext_precision
            ),
            precision=ext_precision
        )

        result = Conversion(pre_result, precision=self.precision, tag="result", debug=debug_multi)


        # main scheme
        Log.report(Log.Info, "MDL scheme")
        pre_scheme = ConditionBlock(neg_input,
            Statement(
                ClearException(),
                Raise(ML_FPE_Invalid),
                Return(FP_QNaN(self.precision))
            ),
            ConditionBlock(vx_nan_or_inf,
                ConditionBlock(vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(
                        ClearException(),
                        ConditionBlock(vx_snan,
                            Raise(ML_FPE_Invalid)
                        ),
                        Return(FP_QNaN(self.precision))
                    )
                ),
                Return(result)
            )
        )
        scheme = pre_scheme
        return scheme
Beispiel #27
0
def generic_poly_split(offset_fct, indexing, target_eps, coeff_precision, vx):
    """ generate the meta approximation for @p offset_fct over several
        intervals defined by @p indexing object
        For each sub-interval, a polynomial approximation with
        maximal_error @p target_eps is tabulated, and evaluated using format
        @p coeff_precision.
        The input variable is @p vx """
    # computing degree for a different polynomial approximation on each
    # sub-interval
    poly_degree_list = [
        int(sup(guessdegree(offset_fct(offset), sub_interval, target_eps)))
        for offset, sub_interval in indexing.get_offseted_sub_list()
    ]
    poly_max_degree = max(poly_degree_list)

    # tabulating polynomial coefficients on split_num sub-interval of interval
    poly_table = ML_NewTable(
        dimensions=[indexing.split_num, poly_max_degree + 1],
        storage_precision=coeff_precision,
        const=True)
    offset_table = ML_NewTable(dimensions=[indexing.split_num],
                               storage_precision=coeff_precision,
                               const=True)
    max_error = 0.0

    for sub_index in range(indexing.split_num):
        poly_degree = poly_degree_list[sub_index]
        offset, approx_interval = indexing.get_offseted_sub_interval(sub_index)
        offset_table[sub_index] = offset
        if poly_degree == 0:
            # managing constant approximation separately since it seems
            # to break sollya
            local_approx = coeff_precision.round_sollya_object(
                offset_fct(offset)(inf(approx_interval)))
            poly_table[sub_index][0] = local_approx
            for monomial_index in range(1, poly_max_degree + 1):
                poly_table[sub_index][monomial_index] = 0
            approx_error = sollya.infnorm(
                offset_fct(offset) - local_approx, approx_interval)

        else:
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                offset_fct(offset), poly_degree,
                [coeff_precision] * (poly_degree + 1), approx_interval,
                sollya.relative)

            for monomial_index in range(poly_max_degree + 1):
                if monomial_index <= poly_degree:
                    poly_table[sub_index][
                        monomial_index] = poly_object.coeff_map[monomial_index]
                else:
                    poly_table[sub_index][monomial_index] = 0
        max_error = max(approx_error, max_error)

    Log.report(Log.Debug, "max approx error is {}", max_error)

    # indexing function: derive index from input @p vx value
    poly_index = indexing.get_index_node(vx)
    poly_index.set_attributes(tag="poly_index", debug=debug_multi)

    ext_precision = get_extended_fp_precision(coeff_precision)

    # building polynomial evaluation scheme
    offset = TableLoad(offset_table,
                       poly_index,
                       precision=coeff_precision,
                       tag="offset",
                       debug=debug_multi)
    poly = TableLoad(poly_table,
                     poly_index,
                     poly_max_degree,
                     precision=coeff_precision,
                     tag="poly_init",
                     debug=debug_multi)
    red_vx = Subtraction(vx,
                         offset,
                         precision=vx.precision,
                         tag="red_vx",
                         debug=debug_multi)
    for monomial_index in range(poly_max_degree, -1, -1):
        coeff = TableLoad(poly_table,
                          poly_index,
                          monomial_index,
                          precision=coeff_precision,
                          tag="poly_%d" % monomial_index,
                          debug=debug_multi)
        #fma_precision = coeff_precision if monomial_index > 1 else ext_precision
        fma_precision = coeff_precision
        poly = FMA(red_vx, poly, coeff, precision=fma_precision)

    #return Conversion(poly, precision=coeff_precision)
    #return poly.hi
    return poly
Beispiel #28
0
    def generate_scheme(self):
        # declaring main input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        # declaring approximation parameters
        index_size = 6
        num_iteration = 8

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        def cbrt_newton_iteration(current_approx, input_value, input_inverse):
            # Cubic root of A is approximated by a Newton-Raphson iteration
            # on f(x) = 1 - A / x^3
            # x_n+1 = 4/3 * x_n - x_n^4 / (3 * A)
            # x_n+1 = 1/3 * (x_n * (1 - x_n^3/A) + x_n)

            approx_triple = Multiplication(
                current_approx, Multiplication(current_approx, current_approx))

            diff = FMSN(approx_triple, input_inverse,
                        Constant(1, precision=self.precision))
            injection = FMA(
                Multiplication(
                    current_approx,
                    Constant(1 / 3.0, precision=self.precision),
                ), diff, current_approx)

            new_approx = injection

            return new_approx

        reduced_vx = MantissaExtraction(vx, precision=self.precision)

        int_precision = self.precision.get_integer_format()

        cbrt_approx_table = ML_NewTable(
            dimensions=[2**index_size, 1],
            storage_precision=self.precision,
            tag=self.uniquify_name("cbrt_approx_table"))
        for i in range(2**index_size):
            input_value = 1 + i / SollyaObject(2**index_size)

            cbrt_approx = cbrt(input_value)
            cbrt_approx_table[i][0] = round(cbrt_approx,
                                            self.precision.get_sollya_object(),
                                            RN)

        # Modulo operations will returns a reduced exponent within [-3, 2]
        # so we approximate cbrt on this interval (with index offset by -3)
        cbrt_mod_table = ML_NewTable(dimensions=[6, 1],
                                     storage_precision=self.precision,
                                     tag=self.uniquify_name("cbrt_mod_table"))
        for i in range(6):
            input_value = SollyaObject(2)**(i - 3)
            cbrt_mod_table[i][0] = round(cbrt(input_value),
                                         self.precision.get_sollya_object(),
                                         RN)

        vx_int = TypeCast(reduced_vx, precision=int_precision)
        mask = BitLogicRightShift(vx_int,
                                  self.precision.get_precision() - index_size,
                                  precision=int_precision)
        mask = BitLogicAnd(mask,
                           Constant(2**index_size - 1,
                                    precision=int_precision),
                           precision=int_precision,
                           tag="table_index",
                           debug=debug_multi)
        table_index = mask

        int_precision = self.precision.get_integer_format()

        exp_vx = ExponentExtraction(vx, precision=int_precision, tag="exp_vx")
        exp_vx_third = Division(exp_vx,
                                Constant(3, precision=int_precision),
                                precision=int_precision,
                                tag="exp_vx_third")
        exp_vx_mod = Modulo(exp_vx,
                            Constant(3, precision=int_precision),
                            precision=int_precision,
                            tag="exp_vx_mod",
                            debug=debug_multi)

        # offset on modulo to make sure table index is positive
        exp_vx_mod = exp_vx_mod + 3

        cbrt_mod = TableLoad(cbrt_mod_table,
                             exp_vx_mod,
                             Constant(0),
                             tag="cbrt_mod")

        init_approx = Multiplication(
            Multiplication(
                # approx cbrt(mantissa)
                TableLoad(cbrt_approx_table,
                          table_index,
                          Constant(0, precision=ML_Int32),
                          tag="seed",
                          debug=debug_multi),
                # approx cbrt(2^(e%3))
                cbrt_mod,
                tag="init_mult",
                debug=debug_multi,
                precision=self.precision),
            # 2^(e/3)
            ExponentInsertion(exp_vx_third,
                              precision=self.precision,
                              tag="exp_vx_third",
                              debug=debug_multi),
            tag="init_approx",
            debug=debug_multi,
            precision=self.precision)

        inverse_red_vx = Division(Constant(1, precision=self.precision),
                                  reduced_vx)
        inverse_vx = Division(Constant(1, precision=self.precision), vx)

        current_approx = init_approx

        for i in range(num_iteration):
            #current_approx = cbrt_newton_iteration(current_approx, reduced_vx, inverse_red_vx)
            current_approx = cbrt_newton_iteration(current_approx, vx,
                                                   inverse_vx)
            current_approx.set_attributes(tag="approx_%d" % i,
                                          debug=debug_multi)

        result = current_approx
        result.set_attributes(tag="result", debug=debug_multi)

        # last iteration
        ext_precision = ML_DoubleDouble
        xn_2 = Multiplication(current_approx,
                              current_approx,
                              precision=ext_precision)
        xn_3 = Multiplication(current_approx, xn_2, precision=ext_precision)

        FourThird = Constant(4 / SollyaObject(3), precision=ext_precision)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(Return(result))

        return scheme
Beispiel #29
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        sollya_precision = self.get_input_precision().get_sollya_object()

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        # testing special value inputs
        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")
        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        # if input is a signaling NaN, raise an invalid exception and returns
        # a quiet NaN
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = self.precision.get_integer_format()

        # log2(vx)
        # r = vx_mant
        # e = vx_exp
        # vx reduced to r in [1, 2[
        # log2(vx) = log2(r * 2^e)
        #          = log2(r) + e
        #
        ## log2(r) is approximated by
        #  log2(r) = log2(inv_seed(r) * r / inv_seed(r)
        #          = log2(inv_seed(r) * r) - log2(inv_seed(r))
        # inv_seed(r) in ]1/2, 1] => log2(inv_seed(r)) in ]-1, 0]
        #
        # inv_seed(r) * r ~ 1
        # we can easily tabulate -log2(inv_seed(r))
        #

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)
        # table creation
        table_index_size = 7
        log_table = ML_NewTable(dimensions=[2**table_index_size, 2],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("inv_table"))
        # value for index 0 is set to 0.0
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in range(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1
            #print inv_approx_table[i][0], inv_value
            inv_value = inv_approx_table[i][0]
            value_high_bitsize = self.precision.get_field_size() - (
                self.precision.get_exponent_size() + 1)
            value_high = round(log2(inv_value), value_high_bitsize, sollya.RN)
            value_low = round(
                log2(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_lftolx)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd)

            # The main table is indexed by the 7 most significant bits
            # of the mantissa
            table_index = inv_approx_table.index_function(_vx_mant)
            table_index.set_attributes(tag="table_index", debug=debuglld)

            # argument reduction
            # Using AND -2 to exclude LSB set to 1 for Newton-Raphson convergence
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(DivisionSeed(_vx_mant,
                                      precision=self.precision,
                                      tag="seed",
                                      debug=debug_lftolx,
                                      silent=True),
                         precision=ML_UInt64),
                Constant(-2, precision=ML_UInt64),
                precision=ML_UInt64),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_lftolx)
            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_lftolx)
            _red_vx = FMA(arg_red_index, _vx_mant, -1.0)
            _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx)
            inv_err = S2**-inv_approx_table.index_size
            red_interval = Interval(1 - inv_err, 1 + inv_err)

            # return in case of standard (non-special) input
            _log_inv_lo = TableLoad(log_table,
                                    table_index,
                                    1,
                                    tag="log_inv_lo",
                                    debug=debug_lftolx)
            _log_inv_hi = TableLoad(log_table,
                                    table_index,
                                    0,
                                    tag="log_inv_hi",
                                    debug=debug_lftolx)

            Log.report(Log.Verbose, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log2(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() * 1.1))) + 1
            sollya.settings.display = sollya.hexadecimal
            global_poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                log2(1 + sollya.x) / sollya.x,
                poly_degree, [self.precision] * (poly_degree + 1),
                approx_interval,
                sollya.absolute,
                error_function=lambda p, f, ai, mod, t: sollya.dirtyinfnorm(
                    p - f, ai))
            Log.report(
                Log.Info, "poly_degree={}, approx_error={}".format(
                    poly_degree, approx_error))
            poly_object = global_poly_object.sub_poly(start_index=1, offset=1)
            #poly_object = global_poly_object.sub_poly(start_index=0,offset=0)

            Attributes.set_default_silent(True)
            Attributes.set_default_rounding_mode(ML_RoundToNearest)

            Log.report(Log.Verbose, "generating polynomial evaluation scheme")
            pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly = FMA(pre_poly, _red_vx,
                        global_poly_object.get_cst_coeff(0, self.precision))
            _poly.set_attributes(tag="poly", debug=debug_lftolx)
            Log.report(
                Log.Verbose, "sollya global_poly_object: {}".format(
                    global_poly_object.get_sollya_object()))
            Log.report(
                Log.Verbose, "sollya poly_object: {}".format(
                    poly_object.get_sollya_object()))

            corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor

            Attributes.unset_default_rounding_mode()
            Attributes.unset_default_silent()

            pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo))
            pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
            exact_log2_hi_exp = Conversion(corr_exp, precision=self.precision)
            exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex",
                                             debug=debug_lftolx)
            _result = exact_log2_hi_exp + pre_result
            return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx

        result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx)
        result.set_attributes(tag="result", debug=debug_lftolx)

        # specific input value predicate
        neg_input = Comparison(vx,
                               0,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="vx_snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="vx_inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")
        vx_zero = Test(vx,
                       specifier=Test.IsZero,
                       likely=False,
                       debug=debugd,
                       tag="vx_zero")

        exp_mone = Equal(vx_exp,
                         -1,
                         tag="exp_minus_one",
                         debug=debugd,
                         likely=False)
        vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd)

        # Specific specific for the case exp == -1
        # log2(x) = log2(m) - 1
        #
        # as m in [1, 2[, log2(m) in [0, 1[
        # if r is close to 2, a catastrophic cancellation can occur
        #
        # r = seed(m)
        # log2(x) = log2(seed(m) * m / seed(m)) - 1
        #         = log2(seed(m) * m) - log2(seed(m)) - 1
        #
        # for m really close to 2 => seed(m) = 0.5
        #     => log2(x) = log2(0.5 * m)
        #                =
        result_exp_m1 = (-log_inv_hi - 1.0) + FMA(poly, red_vx, -log_inv_lo)
        result_exp_m1.set_attributes(tag="result_exp_m1", debug=debug_lftolx)

        m100 = -100
        S2100 = Constant(S2**100, precision=self.precision)
        result_subnormal, _, _, _, _ = compute_log(vx * S2100,
                                                   exp_corr_factor=m100)
        result_subnormal.set_attributes(tag="result_subnormal",
                                        debug=debug_lftolx)

        one_err = S2**-7
        approx_interval_one = Interval(-one_err, one_err)
        red_vx_one = vx - 1.0
        poly_degree_one = sup(
            guessdegree(
                log(1 + x) / x, approx_interval_one, S2**
                -(self.precision.get_field_size() + 1))) + 1
        poly_object_one = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree_one,
            [self.precision] * (poly_degree_one + 1), approx_interval_one,
            absolute).sub_poly(start_index=1)
        poly_one = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object_one, red_vx_one, unified_precision=self.precision)
        poly_one.set_attributes(tag="poly_one", debug=debug_lftolx)
        result_one = red_vx_one + red_vx_one * poly_one
        cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err))
        cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False)

        # main scheme
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal,
                    ConditionBlock(
                        vx_zero,
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ),
                        Statement(ClearException(), result_subnormal,
                                  Return(result_subnormal))),
                    ConditionBlock(
                        vx_one,
                        Statement(
                            ClearException(),
                            Return(FP_PlusZero(self.precision)),
                        ),
                        ConditionBlock(exp_mone, Return(result_exp_m1),
                                       Return(result))))))
        scheme = Statement(result, pre_scheme)
        return scheme
Beispiel #30
0
  def generate_scheme(self):
    vx = self.implementation.add_input_variable("x", self.precision) 
    sollya_precision = self.get_input_precision().sollya_object

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)


    log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
    log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

    log2_hi = Constant(log2_hi_value, precision = self.precision)
    log2_lo = Constant(log2_lo_value, precision = self.precision)

    vx_exp  = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    int_precision = self.precision.get_integer_format()

    # retrieving processor inverse approximation table
    dummy_var = Variable("dummy", precision = self.precision)
    dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
    inv_approx_table = self.processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map)

    # table creation
    table_index_size = 7
    log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
    log_table[0][0] = 0.0
    log_table[0][1] = 0.0
    for i in range(1, 2**table_index_size):
        #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
        inv_value = inv_approx_table[i] # (1.0 + (inv_approx_table[i] / S2**9) ) * S2**-1
        value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
        log_table[i][0] = value_high
        log_table[i][1] = value_low


    vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    # case close to 0: ctz
    ctz_exp_limit = -7
    ctz_cond = vx_exp < ctz_exp_limit
    ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

    ctz_poly_degree = sup(guessdegree(log1p(sollya.x)/sollya.x, ctz_interval, S2**-(self.precision.get_field_size()+1))) + 1
    ctz_poly_object = Polynomial.build_from_approximation(log1p(sollya.x)/sollya.x, ctz_poly_degree, [self.precision]*(ctz_poly_degree+1), ctz_interval, sollya.absolute)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(ctz_poly_object, vx, unified_precision = self.precision)
    ctz_poly.set_attributes(tag = "ctz_poly", debug = debug_lftolx)

    ctz_result = vx * ctz_poly

    neg_input = Comparison(vx, -1, likely = False, specifier = Comparison.Less, debug = debugd, tag = "neg_input")
    vx_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debugd, tag = "nan_or_inf")
    vx_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False, debug = debugd, tag = "snan")
    vx_inf  = Test(vx, specifier = Test.IsInfty, likely = False, debug = debugd, tag = "inf")
    vx_subnormal = Test(vx, specifier = Test.IsSubnormal, likely = False, debug = debugd, tag = "vx_subnormal")
    
    log_function_code = CodeFunction("new_log", [Variable("x", precision = ML_Binary64)], output_format = ML_Binary64) 
    log_call_generator = FunctionOperator(log_function_code.get_name(), arity = 1, output_precision = ML_Binary64, declare_prototype = log_function_code)
    newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64,), ML_Binary64, log_call_generator)


    # case away from 0.0
    pre_vxp1 = vx + 1.0
    pre_vxp1.set_attributes(tag = "pre_vxp1", debug = debug_lftolx)
    pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag = "pre_vxp1_exp", debug = debugd)
    cm500 = Constant(-500, precision = ML_Int32)
    c0 = Constant(0, precision = ML_Int32)
    cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size()-2)
    scaling_factor_exp = Select(cond_scaling, cm500, c0)
    scaling_factor = ExponentInsertion(scaling_factor_exp, precision = self.precision, tag = "scaling_factor")

    vxp1 = pre_vxp1 * scaling_factor
    vxp1.set_attributes(tag = "vxp1", debug = debug_lftolx)
    vxp1_exp = ExponentExtraction(vxp1, tag = "vxp1_exp", debug = debugd)

    vxp1_inv = ReciprocalSeed(vxp1, precision = self.precision, tag = "vxp1_inv", debug = debug_lftolx, silent = True)

    vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision = self.precision, tag = "vxp1_dirty_inv", debug = debug_lftolx)

    table_index = BitLogicAnd(BitLogicRightShift(TypeCast(vxp1, precision = int_precision, debug = debuglx), self.precision.get_field_size() - 7, debug = debuglx), 0x7f, tag = "table_index", debug = debuglx) 

    # argument reduction
    # TODO: detect if single operand inverse seed is supported by the targeted architecture
    pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision = ML_UInt64), Constant(-2, precision = ML_UInt64), precision = ML_UInt64), precision = self.precision, tag = "pre_arg_red_index", debug = debug_lftolx)
    arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag = "arg_red_index", debug = debug_lftolx)

    red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index)
    #red_vxp1 = arg_red_index * vxp1 - 1.0
    red_vxp1.set_attributes(tag = "red_vxp1", debug = debug_lftolx)

    log_inv_lo = TableLoad(log_table, table_index, 1, tag = "log_inv_lo", debug = debug_lftolx) 
    log_inv_hi = TableLoad(log_table, table_index, 0, tag = "log_inv_hi", debug = debug_lftolx)

    inv_err = S2**-6 # TODO: link to target DivisionSeed precision

    Log.report(Log.Info, "building mathematical polynomial")
    approx_interval = Interval(-inv_err, inv_err)
    poly_degree = sup(guessdegree(log(1+sollya.x)/sollya.x, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
    global_poly_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
    poly_object = global_poly_object.sub_poly(start_index = 1)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    _poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, red_vxp1, unified_precision = self.precision)
    _poly.set_attributes(tag = "poly", debug = debug_lftolx)
    Log.report(Log.Info, global_poly_object.get_sollya_object())


    vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag = "vxp1_inv_exp", debug = debugd)
    corr_exp = Conversion(-vxp1_exp + scaling_factor_exp, precision = self.precision)# vxp1_inv_exp

    #poly = (red_vxp1) * (1 +  _poly)
    #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

    pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo))
    pre_result.set_attributes(tag = "pre_result", debug = debug_lftolx)
    exact_log2_hi_exp = - corr_exp * log2_hi
    exact_log2_hi_exp.set_attributes(tag = "exact_log2_hi_exp", debug = debug_lftolx, prevent_optimization = True)
    #std_result =  exact_log2_hi_exp + pre_result

    exact_log2_lo_exp = - corr_exp * log2_lo
    exact_log2_lo_exp.set_attributes(tag = "exact_log2_lo_exp", debug = debug_lftolx)#, prevent_optimization = True)
    
    init = exact_log2_lo_exp  - log_inv_lo
    init.set_attributes(tag = "init", debug = debug_lftolx, prevent_optimization = True)
    fma0 = (red_vxp1 * _poly + init) # - log_inv_lo)
    fma0.set_attributes(tag = "fma0", debug = debug_lftolx)
    step0 = fma0 
    step0.set_attributes(tag = "step0", debug = debug_lftolx) #, prevent_optimization = True)
    step1 = step0 + red_vxp1
    step1.set_attributes(tag = "step1", debug = debug_lftolx, prevent_optimization = True)
    step2 = -log_inv_hi + step1
    step2.set_attributes(tag = "step2", debug = debug_lftolx, prevent_optimization = True)
    std_result = exact_log2_hi_exp + step2
    std_result.set_attributes(tag = "std_result", debug = debug_lftolx, prevent_optimization = True)


    # main scheme
    Log.report(Log.Info, "MDL scheme")
    pre_scheme = ConditionBlock(neg_input,
        Statement(
            ClearException(),
            Raise(ML_FPE_Invalid),
            Return(FP_QNaN(self.precision))
        ),
        ConditionBlock(vx_nan_or_inf,
            ConditionBlock(vx_inf,
                Statement(
                    ClearException(),
                    Return(FP_PlusInfty(self.precision)),
                ),
                Statement(
                    ClearException(),
                    ConditionBlock(vx_snan,
                        Raise(ML_FPE_Invalid)
                    ),
                    Return(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(vx_subnormal,
                Return(vx),
                ConditionBlock(ctz_cond,
                    Statement(
                        Return(ctz_result),
                    ),
                    Statement(
                        Return(std_result)
                    )
                )
            )
        )
    )
    scheme = pre_scheme
    return scheme