コード例 #1
0
        def sollya_gamma_fct(x, diff_order, prec):
            """ wrapper to use bigfloat implementation of exponential
                rather than sollya's implementation directly.
                This wrapper implements sollya's function API.

                :param x: numerical input value (may be an Interval)
                :param diff_order: differential order
                :param prec: numerical precision expected (min)
            """
            fct = None
            if diff_order == 0:
                fct = sollya_gamma
            elif diff_order == 1:
                fct = sollya_gamma_d0
            elif diff_order == 2:
                fct = sollya_gamma_d1
            else:
                raise NotImplementedError
            with bigfloat.precision(prec):
                if x.is_range():
                    lo = sollya.inf(x)
                    hi = sollya.sup(x)
                    return sollya.Interval(fct(lo), fct(hi))
                else:
                    return fct(x)
コード例 #2
0
 def get_value_str(self, value):
     if value is Gappa_Unknown:
         return "?"
     elif isinstance(value, sollya.SollyaObject) and value.is_range():
         return "[%s, %s]" % (sollya.inf(value), sollya.sup(value))
     else:
         return str(value)
コード例 #3
0
ファイル: ml_entity.py プロジェクト: hbrunie/metalibm
    def generate_test_case(self,
                           input_signals,
                           io_map,
                           index,
                           test_range=Interval(-1.0, 1.0)):
        """ generic test case generation: generate a random input
        with index @p index

        Args:
            index (int): integer index of the test case

        Returns:
            dict: mapping (input tag -> numeric value)
    """
        # extracting test interval boundaries
        low_input = sollya.inf(test_range)
        high_input = sollya.sup(test_range)
        input_values = {}
        for input_tag in input_signals:
            input_signal = io_map[input_tag]
            # FIXME: correct value generation depending on signal precision
            input_precision = input_signal.get_precision().get_base_format()
            if isinstance(input_precision, ML_FP_Format):
                input_value = generate_random_fp_value(input_precision,
                                                       low_input, high_input)
            elif isinstance(input_precision, ML_Fixed_Format):
                # TODO: does not depend on low and high range bounds
                input_value = generate_random_fixed_value(input_precision)
            else:
                input_value = random.randrange(
                    2**input_precision.get_bit_size())
            # registering input value
            input_values[input_tag] = input_value
        return input_values
コード例 #4
0
ファイル: ml2_exp.py プロジェクト: IanBriggs/OpTuner
        def generate_fptaylor(x):
            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  r rnd64= x;",
                "  retval rnd64= {};".format(poly_expr), "Expressions",
                "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), x, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), x, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err
コード例 #5
0
def get_integer_format(backend, optree):
    """ return integer format to use for optree """
    int_range = optree.get_interval()
    if int_range == None:
        return backend.default_integer_format
    elif inf(int_range) < 0:
        # signed
        if sup(int_range) > 2**31 - 1 or inf(int_range) < -2**31:
            return ML_Int64
        else:
            return ML_Int32
    else:
        # unsigned
        if sup(int_range) >= 2**32 - 1:
            return ML_UInt64
        else:
            return ML_UInt32
コード例 #6
0
ファイル: meta_interval.py プロジェクト: metalibm/metalibm
def inf(obj):
    """ generic getter for interval inferior bound """
    if isinstance(obj, SollyaObject) and obj.is_range():
        return sollya.inf(obj)
    elif isinstance(obj, (MetaInterval, MetaIntervalList)):
        return obj.inf
    else:
        raise NotImplementedError
コード例 #7
0
 def test_interval_out_of_bound_risk(x_range, y_range):
     """ Try to determine from x and y's interval if there is a risk
         of underflow or overflow """
     div_range = abs(x_range / y_range)
     underflow_risk = sollya.inf(div_range) < S2**(
         self.precision.get_emin_normal() + 2)
     overflow_risk = sollya.sup(div_range) > S2**(
         self.precision.get_emax() - 2)
     return underflow_risk or overflow_risk
コード例 #8
0
def is_simplifiable_to_cst(node):
    """ node can be simplified to a constant """
    node_interval = node.get_interval()
    if node_interval is None or isinstance(node, Constant):
        return False
    elif isinstance(node_interval, SollyaObject) and node_interval.is_range():
        return sollya.inf(node_interval) == sollya.sup(node_interval)
    elif isinstance(node_interval, (MetaInterval, MetaIntervalList)):
        return not node_interval.is_empty and (node_interval.inf == node_interval.sup)
    else:
        return False
コード例 #9
0
ファイル: ml2_wide_sin.py プロジェクト: IanBriggs/OpTuner
        def split_domain(starting_domain, slivers):
            in_domains = [starting_domain]

            # abs
            out_domains = list()
            for I in in_domains:
                if sollya.inf(I) < 0 and sollya.sup(I) > 0:
                    out_domains.append(sollya.Interval(sollya.inf(I), 0))
                    out_domains.append(sollya.Interval(0, sollya.sup(I)))
                else:
                    out_domains.append(I)
            in_domains = out_domains

            # k
            out_domains = list()
            while len(in_domains) > 0:
                I = in_domains.pop()
                #print("in: [{}, {}]".format(float(sollya.inf(I)), float(sollya.sup(I))))
                unround_mult = I * n_invpi
                mult_low = sollya.floor(sollya.inf(unround_mult))
                mult_high = sollya.floor(sollya.sup(unround_mult))
                if mult_low == mult_high or (mult_low == -1
                                             and mult_high == 0):
                    #print("  accepted")
                    out_domains.append(I)
                    continue
                if sollya.sup(I) <= 0:
                    divider_low = (mult_low + 1) * n_pi
                    divider_high = divider_low - divider_low * 2**-53
                else:
                    divider_high = (mult_low + 1) * n_pi
                    divider_low = divider_high - divider_high * 2**-53

                lower_part = sollya.Interval(sollya.inf(I), divider_low)
                upper_part = sollya.Interval(divider_high, sollya.sup(I))
                #print("  -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part))))
                #print("  -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part))))
                in_domains.append(lower_part)
                in_domains.append(upper_part)
            in_domains = out_domains

            # subdivide each section into 2**subd sections
            for _ in range(slivers):
                out_domains = list()
                for I in in_domains:
                    mid = sollya.mid(I)
                    out_domains.append(sollya.Interval(sollya.inf(I), mid))
                    out_domains.append(sollya.Interval(mid, sollya.sup(I)))
                in_domains = out_domains

            in_domains = set(in_domains)
            in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x)))
            in_domains = [
                d for d in in_domains if sollya.inf(d) != sollya.sup(d)
            ]
            return in_domains
コード例 #10
0
def addsub_signed_predicate(lhs, lhs_prec, rhs, rhs_prec, op=operator.__sub__, default=True):
    """ determine whether subtraction output on a signed or
        unsigned format """
    left_range = evaluate_range(lhs)
    right_range = evaluate_range(rhs)
    result_range = safe(op)(left_range, right_range)
    if result_range is None:
        return default
    elif sollya.inf(result_range) < 0:
        return True
    else:
        return False
コード例 #11
0
    def simplify(self, node):
        def get_node_input(index):
            # look for input into simpifield list
            # and return directly node input if simplified input is None
            return node.get_input(index)

        result = None
        if node in self.memoization_map:
            return self.memoization_map[node]
        else:
            if not is_leaf_node(node):
                for index, op in enumerate(node.inputs):
                    new_op = self.simplify(op)
                    # replacing modified inputs
                    if not new_op is None:
                        node.set_input(index, new_op)
            if is_simplifiable_to_cst(node):
                new_node = Constant(
                    sollya.inf(node.get_interval()),
                    precision=node.get_precision()
                )
                forward_attributes(node, new_node)
                result = new_node
            elif isinstance(node, Multiplication) and is_simplifiable_multiplication(node, get_node_input(0), get_node_input(1)):
                result = simplify_multiplication(node)
            elif isinstance(node, Min):
                simplified_min = is_simplifiable_min(node, get_node_input(0), get_node_input(1))
                if simplified_min:
                    result = simplified_min
            elif isinstance(node, Max):
                simplified_max = is_simplifiable_max(node, get_node_input(0), get_node_input(1))
                if simplified_max:
                    result = simplified_max
            elif isinstance(node, Comparison):
                cmp_value = is_simplifiable_cmp(node, get_node_input(0), get_node_input(1))
                if cmp_value is BooleanValue.AlwaysTrue:
                    result = generate_uniform_cst(True, node.get_precision())
                elif cmp_value is BooleanValue.AlwaysFalse:
                    result = generate_uniform_cst(False, node.get_precision())
            elif isinstance(node, Test):
                test_value = is_simplifiable_test(node, node.inputs)
                if test_value is BooleanValue.AlwaysTrue:
                    result = generate_uniform_cst(True, node.get_precision())
                elif test_value is BooleanValue.AlwaysFalse:
                    result = generate_uniform_cst(False, node.get_precision())
            elif isinstance(node, ConditionBlock):
                result = simplify_condition_block(node)
            elif isinstance(node, LogicOperation):
                result = simplify_logical_op(node)
            if not result is None:
                Log.report(LOG_VERBOSE_NUMERICAL_SIMPLIFICATION, "{} has been simplified to {}", node, result)
            self.memoization_map[node] = result
            return result
コード例 #12
0
ファイル: code_object.py プロジェクト: metalibm/metalibm
 def get_value_str(self, value):
     if value is Gappa_Unknown:
         return "?"
     elif isinstance(value, MetaInterval):
         return self.get_value_str(value.interval)
     elif isinstance(value, MetaIntervalList):
         # MetaIntervalList should have been catched early and 
         # should have generated a disjonction of cases
         raise NotImplementedError
     elif isinstance(value, sollya.SollyaObject) and value.is_range():
         return "[%s, %s]" % (sollya.inf(value), sollya.sup(value))
     else:
         return str(value)
コード例 #13
0
ファイル: Gabarit.py プロジェクト: fixif/fixif
def findMaxIssue(res):
	"""
	Find the issue with the maximum error
	Parameters:
	- res: (sollya object) result from the checkModulusFilterInSpecification function

	Returns the maximum value (0 if not available)
	"""
	maxError = 0
	for b in dict(res)["results"]:  # for every band
		okay = dict(b)["okay"]
		if not okay:
			for i in dict(b)["issue"]:  # for every issues
				H = dict(i)["H"]
				betaInf = dict(dict(i)["specification"])["betaInf"]
				betaSup = dict(dict(i)["specification"])["betaSup"]
				if sollya.inf(H) > betaSup:
					maxError = sollya.max(maxError, sollya.sup(H) - betaSup)
				else:
					maxError = sollya.max(maxError, betaSup - sollya.inf(H))

	return maxError
コード例 #14
0
        def generate_json(errors, domain):
            errors = [err for err in errors if err[0] in domain]
            errors.sort(key=lambda err: err[2])
            epsilon = errors[0][2]
            delta = max(err[1] for err in errors)

            d = {
                "cname": self.function_name,
                "delta": float(delta),
                "domain": [float(sollya.inf(domain)),
                           float(sollya.sup(domain)),],
                "epsilon": float(epsilon),
                "operation": "log"
            }
            return d
コード例 #15
0
        def split_domain(starting_domain, slivers):
            in_domains = [starting_domain]

            out_domains = list()
            while len(in_domains) > 0:
                I = in_domains.pop()
                unround_e = sollya.log2(I)
                e_low = sollya.floor(sollya.inf(unround_e))
                e_high = sollya.floor(sollya.sup(unround_e))
                #print("in: [{}, {}] ({}, {})".format(float(sollya.inf(I)), float(sollya.sup(I)), int(e_low), int(e_high)))
                if e_low == e_high:
                    #print("  accepted")
                    out_domains.append(I)
                    continue
                e_range = sollya.Interval(e_low, e_low+1)
                I_range = 2**e_range
                for _ in range(100):
                    mid = sollya.mid(I_range)
                    e = sollya.floor(sollya.log2(mid))
                    if e == e_low:
                        I_range = sollya.Interval(mid, sollya.sup(I_range))
                    else:
                        I_range = sollya.Interval(sollya.inf(I_range), mid)

                    divider_high = sollya.sup(I_range)
                    divider_low = sollya.inf(I_range)

                lower_part = sollya.Interval(sollya.inf(I), divider_low)
                upper_part = sollya.Interval(divider_high, sollya.sup(I))
                #print("  -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part))))
                #print("  -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part))))
                in_domains.append(upper_part)
                in_domains.append(lower_part)
            in_domains = out_domains

            # subdivide each section into 2**subd sections
            for _ in range(slivers):
                out_domains = list()
                for I in in_domains:
                    mid = sollya.mid(I)
                    out_domains.append(sollya.Interval(sollya.inf(I), mid))
                    out_domains.append(sollya.Interval(mid, sollya.sup(I)))
                in_domains = out_domains

            in_domains = set(in_domains)
            in_domains = sorted(in_domains, key=lambda x:float(sollya.inf(x)))
            in_domains = [d for d in in_domains if sollya.inf(d) != sollya.sup(d)]
            return in_domains
コード例 #16
0
def solve_format_shift(optree):
    """ Legalize shift node """
    assert isinstance(optree, BitLogicRightShift) or isinstance(
        optree, BitLogicLeftShift)
    shift_input = optree.get_input(0)
    shift_input_precision = shift_input.get_precision()
    shift_amount = optree.get_input(1)

    shift_amount_prec = shift_amount.get_precision()
    if is_fixed_point(shift_amount_prec):
        sa_range = evaluate_range(shift_amount)
        if sollya.inf(sa_range) < 0:
            Log.report(
                Log.Error, "shift amount of {} may be negative {}\n".format(
                    optree, sa_range))
    if is_fixed_point(shift_input_precision):
        return shift_input_precision
    else:
        return optree.get_precision()
コード例 #17
0
ファイル: random_gen.py プロジェクト: metalibm/metalibm
def get_precision_rng(precision, value_range=None):
    if value_range is None:
        # default full-range value generation
        base_format = precision.get_base_format()
        if isinstance(base_format, ML_FP_MultiElementFormat):
            return MPFPRandomGen(precision)
        elif isinstance(base_format, ML_FP_Format):
            return FPRandomGen(precision, include_snan=False)
        elif isinstance(base_format, ML_Fixed_Format):
            return FixedPointRandomGen(precision)
        else:
            Log.report(Log.Error,
                       "unsupported format {}/{} in get_precision_rng",
                       precision, base_format)
    else:
        low_bound = sollya.inf(value_range)
        high_bound = sollya.sup(value_range)
        return get_precision_rng_with_defined_range(precision, low_bound,
                                                    high_bound)
コード例 #18
0
    def generate_scheme(self):
        ## convert @p value from an input floating-point precision
        #  @p in_precision to an output support format @p out_precision
        io_precision = self.precision

        # declaring main input variable
        vx = self.implementation.add_input_signal("x", io_precision)
        # rounding mode input
        rnd_mode = self.implementation.add_input_signal(
            "rnd_mode", rnd_mode_format)

        # size of most significant table index (for linear slope tabulation)
        alpha = self.alpha  # 6
        # size of medium significant table index (for initial value table index LSB)
        beta = self.beta  # 5
        # size of least significant table index (for linear offset tabulation)
        gamma = self.gamma  # 5

        guard_bits = self.guard_bits  # 3

        vx.set_interval(self.interval)

        range_hi = sollya.sup(self.interval)
        range_lo = sollya.inf(self.interval)
        f_hi = self.function(range_hi)
        f_lo = self.function(range_lo)
        # fixed by format used for reduced_x
        range_size = range_hi - range_lo
        range_size_log2 = int(sollya.log2(range_size))
        assert 2**range_size_log2 == range_size

        print("range_size_log2={}".format(range_size_log2))

        reduced_x = Conversion(BitLogicRightShift(vx - range_lo,
                                                  range_size_log2),
                               precision=fixed_point(0,
                                                     alpha + beta + gamma,
                                                     signed=False),
                               tag="reduced_x",
                               debug=debug_fixed)

        alpha_index = get_fixed_slice(reduced_x,
                                      0,
                                      alpha - 1,
                                      align_hi=FixedPointPosition.FromMSBToLSB,
                                      align_lo=FixedPointPosition.FromMSBToLSB,
                                      tag="alpha_index",
                                      debug=debug_std)
        gamma_index = get_fixed_slice(reduced_x,
                                      gamma - 1,
                                      0,
                                      align_hi=FixedPointPosition.FromLSBToLSB,
                                      align_lo=FixedPointPosition.FromLSBToLSB,
                                      tag="gamma_index",
                                      debug=debug_std)

        beta_index = get_fixed_slice(reduced_x,
                                     alpha,
                                     gamma,
                                     align_hi=FixedPointPosition.FromMSBToLSB,
                                     align_lo=FixedPointPosition.FromLSBToLSB,
                                     tag="beta_index",
                                     debug=debug_std)

        # Assuming monotonic function
        f_absmax = max(abs(f_hi), abs(f_lo))
        f_absmin = min(abs(f_hi), abs(f_lo))

        f_msb = int(sollya.ceil(sollya.log2(f_absmax))) + 1
        f_lsb = int(sollya.floor(sollya.log2(f_absmin)))
        storage_lsb = f_lsb - io_precision.get_bit_size() - guard_bits

        f_int_size = f_msb
        f_frac_size = -storage_lsb

        storage_format = fixed_point(f_int_size, f_frac_size, signed=False)
        Log.report(Log.Info, "storage_format is {}".format(storage_format))

        # table of initial value index
        tiv_index = Concatenation(alpha_index,
                                  beta_index,
                                  tag="tiv_index",
                                  debug=debug_std)
        # table of offset value index
        to_index = Concatenation(alpha_index,
                                 gamma_index,
                                 tag="to_index",
                                 debug=debug_std)

        tiv_index_size = alpha + beta
        to_index_size = alpha + gamma

        Log.report(Log.Info, "initial table structures")
        table_iv = ML_NewTable(dimensions=[2**tiv_index_size],
                               storage_precision=storage_format,
                               tag="tiv")
        table_offset = ML_NewTable(dimensions=[2**to_index_size],
                                   storage_precision=storage_format,
                                   tag="to")

        slope_table = [None] * (2**alpha)
        slope_delta = 1.0 / sollya.SollyaObject(2**alpha)
        delta_u = range_size * slope_delta * 2**-15
        Log.report(Log.Info, "computing slope value")
        for i in range(2**alpha):
            # slope is computed at the middle of range_size interval
            slope_x = range_lo + (i + 0.5) * range_size * slope_delta
            # TODO: gross approximation of derivatives
            f_xpu = self.function(slope_x + delta_u / 2)
            f_xmu = self.function(slope_x - delta_u / 2)
            slope = (f_xpu - f_xmu) / delta_u
            slope_table[i] = slope

        range_rcp_steps = 1.0 / sollya.SollyaObject(2**tiv_index_size)
        Log.report(Log.Info, "computing value for initial-value table")
        for i in range(2**tiv_index_size):
            slope_index = i / 2**beta
            iv_x = range_lo + i * range_rcp_steps * range_size
            offset_x = 0.5 * range_rcp_steps * range_size
            # initial value is computed so that the piecewise linear
            # approximation intersects the function at iv_x + offset_x
            iv_y = self.function(
                iv_x + offset_x) - offset_x * slope_table[int(slope_index)]
            initial_value = storage_format.round_sollya_object(iv_y)
            table_iv[i] = initial_value

        # determining table of initial value interval
        tiv_min = table_iv[0]
        tiv_max = table_iv[0]
        for i in range(1, 2**tiv_index_size):
            tiv_min = min(tiv_min, table_iv[i])
            tiv_max = max(tiv_max, table_iv[i])
        table_iv.set_interval(Interval(tiv_min, tiv_max))

        offset_step = range_size / S2**(alpha + beta + gamma)
        for i in range(2**alpha):
            Log.report(Log.Info,
                       "computing offset value for sub-table {}".format(i))
            for j in range(2**gamma):
                to_i = i * 2**gamma + j
                offset = slope_table[i] * j * offset_step
                table_offset[to_i] = offset

        # determining table of offset interval
        to_min = table_offset[0]
        to_max = table_offset[0]
        for i in range(1, 2**(alpha + gamma)):
            to_min = min(to_min, table_offset[i])
            to_max = max(to_max, table_offset[i])
        offset_interval = Interval(to_min, to_max)
        table_offset.set_interval(offset_interval)

        initial_value = TableLoad(table_iv,
                                  tiv_index,
                                  precision=storage_format,
                                  tag="initial_value",
                                  debug=debug_fixed)

        offset_precision = get_fixed_type_from_interval(offset_interval, 16)
        print("offset_precision is {} ({} bits)".format(
            offset_precision, offset_precision.get_bit_size()))
        table_offset.get_precision().storage_precision = offset_precision

        # rounding table value
        for i in range(1, 2**(alpha + gamma)):
            table_offset[i] = offset_precision.round_sollya_object(
                table_offset[i])

        offset_value = TableLoad(table_offset,
                                 to_index,
                                 precision=offset_precision,
                                 tag="offset_value",
                                 debug=debug_fixed)

        Log.report(
            Log.Verbose,
            "initial_value's interval: {}, offset_value's interval: {}".format(
                evaluate_range(initial_value), evaluate_range(offset_value)))

        final_add = initial_value + offset_value
        round_bit = final_add  # + FixedPointPosition(final_add, io_precision.get_bit_size(), align=FixedPointPosition.FromMSBToLSB)

        vr_out = Conversion(initial_value + offset_value,
                            precision=io_precision,
                            tag="vr_out",
                            debug=debug_fixed)

        self.implementation.add_output_signal("vr_out", vr_out)

        # Approximation error evaluation
        approx_error = 0.0
        for i in range(2**alpha):
            for j in range(2**beta):
                tiv_i = (i * 2**beta + j)
                # = range_lo + tiv_i * range_rcp_steps * range_size
                iv = table_iv[tiv_i]
                for k in range(2**gamma):
                    to_i = i * 2**gamma + k
                    offset = table_offset[to_i]
                    approx_value = offset + iv
                    table_x = range_lo + range_size * (
                        (i * 2**beta + j) * 2**gamma + k) / S2**(alpha + beta +
                                                                 gamma)
                    local_error = abs(1 / (table_x) - approx_value)
                    approx_error = max(approx_error, local_error)
        error_log2 = float(sollya.log2(approx_error))
        print("approx_error is {}, error_log2 is {}".format(
            float(approx_error), error_log2))

        # table size
        table_iv_size = 2**(alpha + beta)
        table_offset_size = 2**(alpha + gamma)
        print("tables' size are {} entries".format(table_iv_size +
                                                   table_offset_size))

        return [self.implementation]
コード例 #19
0
ファイル: ml_entity.py プロジェクト: hbrunie/metalibm
def random_log_sample(interval):
    lo = sollya.inf(interval)
    hi = sollya.sup(interval)
コード例 #20
0
    def generate_expr(
        self,
        code_object,
        optree,
        folded=False,
        result_var=None,
        initial=False,
        language=None,
        ## force to store result in a variable, wrapping CodeExpression
        #  in CodeVariable
        force_variable_storing=False):
        """ code generation function """
        language = self.language if language is None else language

        # search if <optree> has already been processed
        if self.has_memoization(optree):
            result = self.get_memoization(optree)
            if isinstance(result, CodeExpression) and force_variable_storing:
                # forcing storing and translation CodeExpression to CodeVariable
                # if force_variable_storing is set
                result_precision = result.precision
                prefix_tag = optree.get_tag(
                    default="var_result"
                ) if force_variable_storing else "tmp_result"
                final_var = result_var if result_var else code_object.get_free_var_name(
                    result_precision, prefix=prefix_tag, declare=True)
                code_object << self.generate_code_assignation(
                    code_object, final_var, result.get())
                result = CodeVariable(final_var, result_precision)
            return result

        result = None
        # implementation generation
        if isinstance(optree, CodeVariable):
            result = optree

        elif isinstance(optree, Variable):
            if optree.get_var_type() is Variable.Local:
                final_var = code_object.get_free_var_name(
                    optree.get_precision(),
                    prefix=optree.get_tag(),
                    declare=True,
                    var_ctor=Variable)
                result = CodeVariable(final_var, optree.get_precision())
            else:
                result = CodeVariable(optree.get_tag(), optree.get_precision())

        elif isinstance(optree, Signal):
            if optree.get_var_type() is Variable.Local:
                final_var = code_object.declare_signal(optree,
                                                       optree.get_precision(),
                                                       prefix=optree.get_tag())
                result = CodeVariable(final_var, optree.get_precision())
            else:
                result = CodeVariable(optree.get_tag(), optree.get_precision())

        elif isinstance(optree, Constant):
            precision = optree.get_precision()  # .get_base_format()
            if force_variable_storing or self.declare_cst or optree.get_precision(
            ).is_cst_decl_required():
                cst_prefix = "cst" if optree.get_tag(
                ) is None else optree.get_tag()
                cst_varname = code_object.declare_cst(optree,
                                                      prefix=cst_prefix)
                result = CodeVariable(cst_varname, precision)
            else:
                if precision is ML_Integer:
                    result = CodeExpression("%d" % optree.get_value(),
                                            precision)
                else:
                    try:
                        result = CodeExpression(
                            precision.get_cst(optree.get_value(),
                                              language=language), precision)
                    except:
                        result = CodeExpression(
                            precision.get_cst(optree.get_value(),
                                              language=language), precision)
                        Log.report(
                            Log.Error,
                            "Error during get_cst call for Constant: {} ",
                            optree)  # Exception print

        elif isinstance(optree, Assert):
            cond = optree.get_input(0)
            error_msg = optree.get_error_msg()
            severity = optree.get_severity()

            cond_code = self.generate_expr(code_object,
                                           cond,
                                           folded=False,
                                           language=language)

            code_object << " assert {cond} report {error_msg} severity {severity};\n".format(
                cond=cond_code.get(),
                error_msg=error_msg,
                severity=severity.descriptor)

            return None

        elif isinstance(optree, Wait):
            time_ns = optree.get_time_ns()
            code_object << "wait for {time_ns} ns;\n".format(time_ns=time_ns)
            return None

        elif isinstance(optree, SwitchBlock):
            switch_value = optree.inputs[0]

            # generating pre_statement
            self.generate_expr(code_object,
                               optree.get_pre_statement(),
                               folded=folded,
                               language=language)

            switch_value_code = self.generate_expr(code_object,
                                                   switch_value,
                                                   folded=folded,
                                                   language=language)
            case_map = optree.get_case_map()

            code_object << "\nswitch(%s) {\n" % switch_value_code.get()
            for case in case_map:
                case_value = case
                case_statement = case_map[case]
                if isinstance(case_value, tuple):
                    for sub_case in case:
                        code_object << "case %s:\n" % sub_case
                else:
                    code_object << "case %s:\n" % case
                code_object.open_level()
                self.generate_expr(code_object,
                                   case_statement,
                                   folded=folded,
                                   language=language)
                code_object.close_level()
            code_object << "}\n"

            return None

        elif isinstance(optree, ReferenceAssign):
            output_var = optree.inputs[0]
            result_value = optree.inputs[1]

            output_var_code = self.generate_expr(code_object,
                                                 output_var,
                                                 folded=False,
                                                 language=language)

            def get_assign_symbol(node):
                if isinstance(node, Signal):
                    assign_sign = "<="
                elif isinstance(node, Variable):
                    assign_sign = ":="
                else:
                    Log.report(Log.Error,
                               "unsupported node for assign symbol:\n {}",
                               node)
                return assign_sign

            if isinstance(output_var, Signal) or isinstance(
                    output_var, Variable):
                assign_sign = get_assign_symbol(output_var)
            elif isinstance(output_var, VectorElementSelection) or isinstance(
                    output_var, SubSignalSelection):
                select_input = output_var.get_input(0)
                assign_sign = get_assign_symbol(select_input)
            else:
                Log.report(Log.Error,
                           "unsupported node for assign symbol:\n {}", node)

            if isinstance(result_value, Constant):
                # generate assignation
                result_value_code = self.generate_expr(code_object,
                                                       result_value,
                                                       folded=folded,
                                                       language=language)
                code_object << self.generate_assignation(
                    output_var_code.get(),
                    result_value_code.get(),
                    assign_sign=assign_sign)
            else:
                #result_value_code = self.generate_expr(code_object, result_value, folded = True, force_variable_storing = True, language = language)
                result_value_code = self.generate_expr(code_object,
                                                       result_value,
                                                       folded=True,
                                                       language=language)
                code_object << self.generate_assignation(
                    output_var_code.get(),
                    result_value_code.get(),
                    assign_sign=assign_sign)
            if optree.get_debug() and not self.disable_debug:
                self.generate_debug_msg(result_value,
                                        result_value_code,
                                        code_object,
                                        debug_object=optree.get_debug())

            #code_object << self.generate_assignation(output_var_code.get(), result_value_code.get())
            #code_object << output_var.get_precision().generate_c_assignation(output_var_code, result_value_code)

            return None

        elif isinstance(optree, RangeLoop):
            iterator = optree.get_input(0)
            loop_body = optree.get_input(1)
            loop_range = optree.get_loop_range()
            specifier = optree.get_specifier()

            range_pattern = "{lower} to {upper}" if specifier is RangeLoop.Increasing else "{upper} dowto {lower}"
            range_code = range_pattern.format(lower=sollya.inf(loop_range),
                                              upper=sollya.sup(loop_range))

            iterator_code = self.generate_expr(code_object,
                                               iterator,
                                               folded=folded,
                                               language=language)

            code_object << "\n for {iterator} in {loop_range} loop\n".format(
                iterator=iterator_code.get(), loop_range=range_code)
            code_object.inc_level()
            body_code = self.generate_expr(code_object,
                                           loop_body,
                                           folded=folded,
                                           language=language)
            assert body_code is None
            code_object.dec_level()
            code_object << "end loop;\n"

            return None

        elif isinstance(optree, Loop):
            init_statement = optree.inputs[0]
            exit_condition = optree.inputs[1]
            loop_body = optree.inputs[2]

            self.generate_expr(code_object,
                               init_statement,
                               folded=folded,
                               language=language)
            code_object << "\nfor (;%s;)" % self.generate_expr(
                code_object, exit_condition, folded=False,
                language=language).get()
            code_object.open_level()
            self.generate_expr(code_object,
                               loop_body,
                               folded=folded,
                               language=language)
            code_object.close_level()

            return None

        elif isinstance(optree, Process):
            # generating pre_statement for process
            pre_statement = optree.get_pre_statement()
            self.generate_expr(code_object,
                               optree.get_pre_statement(),
                               folded=folded,
                               language=language)

            sensibility_list = [
                self.generate_expr(code_object,
                                   op,
                                   folded=True,
                                   language=language).get()
                for op in optree.get_sensibility_list()
            ]
            sensibility_list = "({})".format(", ".join(
                sensibility_list)) if len(sensibility_list) != 0 else ""
            code_object << "process{}\n".format(sensibility_list)
            self.open_memoization_level()
            code_object.open_level(
                extra_shared_tables=[MultiSymbolTable.SignalSymbol],
                var_ctor=Variable)
            for process_stat in optree.inputs:
                self.generate_expr(code_object,
                                   process_stat,
                                   folded=folded,
                                   initial=False,
                                   language=language)

            code_object.close_level()
            self.close_memoization_level()
            code_object << "end process;\n\n"
            return None

        elif isinstance(optree, PlaceHolder):
            first_input = optree.get_input(0)
            first_input_code = self.generate_expr(code_object,
                                                  first_input,
                                                  folded=folded,
                                                  language=language)
            for op in optree.get_inputs()[1:]:
                _ = self.generate_expr(code_object,
                                       op,
                                       folded=folded,
                                       language=language)

            result = first_input_code

        elif isinstance(optree, ComponentInstance):
            component_object = optree.get_component_object()
            component_name = component_object.get_name()
            code_object.declare_component(component_name, component_object)
            io_map = optree.get_io_map()
            component_tag = optree.get_tag()
            if component_tag is None:
                component_tag = "{component_name}_i{instance_id}".format(
                    component_name=component_name,
                    instance_id=optree.get_instance_id())
            # component tag uniquifying
            component_tag = code_object.get_free_name(component_object,
                                                      prefix=component_tag)
            mapped_io = {}
            for io_tag in io_map:
                mapped_io[io_tag] = self.generate_expr(code_object,
                                                       io_map[io_tag],
                                                       folded=True,
                                                       language=language)

            code_object << "\n{component_tag} : {component_name}\n".format(
                component_name=component_name, component_tag=component_tag)
            code_object << "  port map (\n"
            code_object << "  " + ", \n  ".join(
                "{} => {}".format(io_tag, mapped_io[io_tag].get())
                for io_tag in mapped_io)
            code_object << "\n);\n"

            return None

        elif isinstance(optree, ConditionBlock):
            condition = optree.inputs[0]
            if_branch = optree.inputs[1]
            else_branch = optree.inputs[2] if len(optree.inputs) > 2 else None

            # generating pre_statement
            self.generate_expr(code_object,
                               optree.get_pre_statement(),
                               folded=folded,
                               language=language)

            cond_code = self.generate_expr(code_object,
                                           condition,
                                           folded=False,
                                           language=language)
            try:
                cond_likely = condition.get_likely()
            except AttributeError:
                Log.report(
                    Log.Error,
                    "The following condition has no (usable) likely attribute: {}",
                    condition)
            code_object << "if %s then\n " % cond_code.get()
            code_object.inc_level()
            if_branch_code = self.generate_expr(code_object,
                                                if_branch,
                                                folded=False,
                                                language=language)
            code_object.dec_level()
            if else_branch:
                code_object << " else\n "
                code_object.inc_level()
                else_branch_code = self.generate_expr(code_object,
                                                      else_branch,
                                                      folded=True,
                                                      language=language)
                code_object.dec_level()
            else:
                #  code_object << "\n"
                pass
            code_object << "end if;\n"

            return None

        elif isinstance(optree, Select):
            # we go through all of select operands to
            # flatten the select tree
            def flatten_select(op, cond=None):
                """ Process recursively a Select operation to build a list
                   of tuple (result, condition) """
                if not isinstance(op, Select): return [(op, cond)]
                lcond = op.inputs[0] if cond is None else LogicalAnd(
                    op.inputs[0], cond, precision=cond.get_precision())
                return flatten_select(op.inputs[1], lcond) + flatten_select(
                    op.inputs[2], cond)

            def legalize_select_input(select_input):
                if select_input.get_precision().get_bit_size(
                ) != optree.get_precision().get_bit_size():
                    return Conversion(select_input,
                                      precision=optree.get_precision())
                else:
                    return select_input

            prefix = optree.get_tag(default="setmp")
            result_varname = result_var if result_var != None else code_object.get_free_var_name(
                optree.get_precision(), prefix=prefix)
            result = CodeVariable(result_varname, optree.get_precision())
            select_opcond_list = flatten_select(optree)
            if not select_opcond_list[-1][1] is None:
                Log.report(
                    Log.Error,
                    "last condition in flatten select differs from None")

            gen_list = []
            for op, cond in select_opcond_list:
                op = legalize_select_input(op)
                op_code = self.generate_expr(code_object,
                                             op,
                                             folded=folded,
                                             language=language)
                if not cond is None:
                    cond_code = self.generate_expr(code_object,
                                                   cond,
                                                   folded=True,
                                                   force_variable_storing=True,
                                                   language=language)
                    gen_list.append((op_code, cond_code))
                else:
                    gen_list.append((op_code, None))

            code_object << "{result} <= \n".format(result=result.get())
            code_object.inc_level()
            for op_code, cond_code in gen_list:
                if not cond_code is None:
                    code_object << "{op_code} when {cond_code} else\n".format(
                        op_code=op_code.get(), cond_code=cond_code.get())
                else:
                    code_object << "{op_code};\n".format(op_code=op_code.get())
            code_object.dec_level()

        elif isinstance(optree, TableLoad):
            table = optree.get_input(0)
            index = optree.get_input(1)
            index_code = self.generate_expr(code_object,
                                            index,
                                            folded=folded,
                                            language=language)
            prefix = optree.get_tag(default="table_value")
            result_varname = result_var if result_var != None else code_object.get_free_var_name(
                optree.get_precision(), prefix=prefix)
            result = CodeVariable(result_varname, optree.get_precision())
            code_object << "with {index} select {result} <=\n".format(
                index=index_code.get(), result=result.get())

            table_dimensions = table.get_precision().get_dimensions()
            assert len(table_dimensions) == 1
            table_size = table_dimensions[0]

            default_value = 0

            # linearizing table selection
            for tabid, value in enumerate(table.get_data()):
                code_object << "\t{} when {},\n".format(
                    table.get_precision().get_storage_precision().get_cst(
                        value),
                    index.get_precision().get_cst(tabid))

            code_object << "\t{} when others;\n".format(table.get_precision(
            ).get_storage_precision().get_cst(default_value))

            # result is set

        elif isinstance(optree, Return):
            return_result = optree.inputs[0]
            return_code = self.generate_expr(code_object,
                                             return_result,
                                             folded=folded,
                                             language=language)
            code_object << "return %s;\n" % return_code.get()
            return None  #return_code

        elif isinstance(optree, ExceptionOperation):
            if optree.get_specifier() in [
                    ExceptionOperation.RaiseException,
                    ExceptionOperation.ClearException,
                    ExceptionOperation.RaiseReturn
            ]:
                result_code = self.processor.generate_expr(
                    self,
                    code_object,
                    optree,
                    optree.inputs,
                    folded=False,
                    result_var=result_var,
                    language=language)
                code_object << "%s;\n" % result_code.get()
                if optree.get_specifier() == ExceptionOperation.RaiseReturn:
                    if self.libm_compliant:
                        # libm compliant exception management
                        code_object.add_header(
                            "support_lib/ml_libm_compatibility.h")
                        return_value = self.generate_expr(
                            code_object,
                            optree.get_return_value(),
                            folded=folded,
                            language=language)
                        arg_value = self.generate_expr(code_object,
                                                       optree.get_arg_value(),
                                                       folded=folded,
                                                       language=language)
                        function_name = optree.function_name
                        exception_list = [
                            op.get_value() for op in optree.inputs
                        ]
                        if ML_FPE_Inexact in exception_list:
                            exception_list.remove(ML_FPE_Inexact)

                        if len(exception_list) > 1:
                            raise NotImplementedError
                        if ML_FPE_Overflow in exception_list:
                            code_object << "return ml_raise_libm_overflowf(%s, %s, \"%s\");\n" % (
                                return_value.get(), arg_value.get(),
                                function_name)
                        elif ML_FPE_Underflow in exception_list:
                            code_object << "return ml_raise_libm_underflowf(%s, %s, \"%s\");\n" % (
                                return_value.get(), arg_value.get(),
                                function_name)
                        elif ML_FPE_Invalid in exception_list:
                            code_object << "return %s;\n" % return_value.get()
                    else:
                        return_precision = optree.get_return_value(
                        ).get_precision()
                        self.generate_expr(code_object,
                                           Return(optree.get_return_value(),
                                                  precision=return_precision),
                                           folded=folded,
                                           language=language)
                return None
            else:
                result = self.processor.generate_expr(self,
                                                      code_object,
                                                      optree,
                                                      optree.inputs,
                                                      folded=folded,
                                                      result_var=result_var,
                                                      language=language)

        elif isinstance(optree, NoResultOperation):
            result_code = self.processor.generate_expr(self,
                                                       code_object,
                                                       optree,
                                                       optree.inputs,
                                                       folded=False,
                                                       result_var=result_var,
                                                       language=language)
            code_object << "%s;\n" % result_code.get()
            return None

        elif isinstance(optree, Statement):
            for op in optree.inputs:
                if not self.has_memoization(op):
                    self.generate_expr(code_object,
                                       op,
                                       folded=folded,
                                       initial=True,
                                       language=language)

            return None

        else:
            # building ordered list of required node by depth
            working_list = [op for op in optree.get_inputs()]
            processing_list = [op for op in working_list]
            resolved = {}
            while working_list != []:
                op = working_list.pop(0)
                # node has already been processed: SKIP
                if op in resolved:
                    continue
                if isinstance(op, ML_Table):
                    # ML_Table instances are skipped (should be generated directly by TableLoad)
                    continue
                elif isinstance(op, ML_LeafNode):
                    processing_list.append(op)
                else:
                    memo = self.get_memoization(op)
                    if not memo is None:
                        # node has already been generated: STOP HERE
                        resolved[op] = memo
                    else:
                        # enqueue node to be processed
                        processing_list.append(op)
                        # enqueue node inputs
                        working_list += [op for op in op.get_inputs()]
                        resolved[op] = memo

            # processing list in reverse order (starting with deeper node to avoid too much recursion)
            for op in processing_list[::-1]:
                _ = self.generate_expr(code_object,
                                       op,
                                       folded=folded,
                                       initial=initial,
                                       language=language)

            # processing main node
            generate_pre_process = self.generate_clear_exception if optree.get_clearprevious(
            ) else None
            result = self.processor.generate_expr(
                self,
                code_object,
                optree,
                optree.inputs,
                generate_pre_process=generate_pre_process,
                folded=folded,
                result_var=result_var,
                language=language)

        # registering result into memoization table
        self.add_memoization(optree, result)

        # debug management
        if optree.get_debug() and not self.disable_debug:
            self.generate_debug_msg(optree, result, code_object)

        if (initial or force_variable_storing
                or result_too_long(result)) and not isinstance(
                    result, CodeVariable) and not result is None:
            # result could have been modified from initial optree
            result_precision = result.precision
            prefix_tag = optree.get_tag(
                default="var_result"
            ) if force_variable_storing else "tmp_result"
            final_var = result_var if result_var else code_object.get_free_var_name(
                result_precision, prefix=prefix_tag, declare=True)
            code_object << self.generate_code_assignation(
                code_object, final_var, result.get())
            return CodeVariable(final_var, result_precision)

        return result
コード例 #21
0
ファイル: ml2_wide_sin.py プロジェクト: IanBriggs/OpTuner
        def generate_reduction_fptaylor(x):
            # get sign and abs_x, must be the same at endpoints
            if sollya.sup(x) <= 0:
                sign_x_expr = "-1.0"
                abs_x_expr = "-x"
                abs_x = -x
            elif sollya.inf(x) >= 0:
                sign_x_expr = "1.0"
                abs_x_expr = "x"
                abs_x = x
            else:
                assert False, "Interval must not straddle 0"

            # get k, must be the same at endpoints
            unround_k = abs_x * n_invpi
            k_low = sollya.floor(sollya.inf(unround_k))
            k_high = sollya.floor(sollya.sup(unround_k))
            if k_low != k_high:
                assert False, "Interval must not straddle multples of pi"
            k = int(k_low)
            part = k % 2

            r_expr = "abs_x - whole"
            r = abs_x - k * n_pi

            z_expr = "r"
            z = r

            if part == 1:
                flipped_poly_expr = "-poly"
            else:
                flipped_poly_expr = "poly"

            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  abs_x rnd64= {};".format(abs_x_expr),
                "  whole rnd64= {} * {};".format(k, n_pi),
                "  r rnd64= abs_x - whole;", "  z rnd64= {};".format(z_expr),
                "  poly rnd64= {};".format(poly_expr),
                "  flipped_poly rnd64= {};".format(flipped_poly_expr),
                "  retval rnd64= flipped_poly*{};".format(sign_x_expr),
                "Expressions", "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), z, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), z, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err
コード例 #22
0
ファイル: ml2_wide_sin.py プロジェクト: IanBriggs/OpTuner
    def determine_error(self):
        sollya.settings.display = sollya.hexadecimal
        n_pi = self.precision.round_sollya_object(sollya.pi, sollya.RN)
        n_invpi = self.precision.round_sollya_object(1 / sollya.pi, sollya.RN)

        poly_expr = str(sollya.horner(self.poly_object.get_sollya_object()))
        poly_expr = poly_expr.replace("_x_", "z")
        poly_expr = poly_expr.replace("z^0x1p1", "z*z")

        config = fptaylor.CHECK_CONFIG.copy()
        del config["--abs-error"]
        config["--opt"] = "bb-eval"
        config["--rel-error-threshold"] = "0.0"
        config["--intermediate-opt"] = "false"
        config["--uncertainty"] = "false"

        def generate_fptaylor(x):
            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real z in [{},{}];".format(x_low, x_high),
                "Definitions", "  retval rnd64= {};".format(poly_expr),
                "Expressions", "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), x, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), x, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err

        def generate_reduction_fptaylor(x):
            # get sign and abs_x, must be the same at endpoints
            if sollya.sup(x) <= 0:
                sign_x_expr = "-1.0"
                abs_x_expr = "-x"
                abs_x = -x
            elif sollya.inf(x) >= 0:
                sign_x_expr = "1.0"
                abs_x_expr = "x"
                abs_x = x
            else:
                assert False, "Interval must not straddle 0"

            # get k, must be the same at endpoints
            unround_k = abs_x * n_invpi
            k_low = sollya.floor(sollya.inf(unround_k))
            k_high = sollya.floor(sollya.sup(unround_k))
            if k_low != k_high:
                assert False, "Interval must not straddle multples of pi"
            k = int(k_low)
            part = k % 2

            r_expr = "abs_x - whole"
            r = abs_x - k * n_pi

            z_expr = "r"
            z = r

            if part == 1:
                flipped_poly_expr = "-poly"
            else:
                flipped_poly_expr = "poly"

            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  abs_x rnd64= {};".format(abs_x_expr),
                "  whole rnd64= {} * {};".format(k, n_pi),
                "  r rnd64= abs_x - whole;", "  z rnd64= {};".format(z_expr),
                "  poly rnd64= {};".format(poly_expr),
                "  flipped_poly rnd64= {};".format(flipped_poly_expr),
                "  retval rnd64= flipped_poly*{};".format(sign_x_expr),
                "Expressions", "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), z, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.sin(sollya.x), z, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err

        def split_domain(starting_domain, slivers):
            in_domains = [starting_domain]

            # abs
            out_domains = list()
            for I in in_domains:
                if sollya.inf(I) < 0 and sollya.sup(I) > 0:
                    out_domains.append(sollya.Interval(sollya.inf(I), 0))
                    out_domains.append(sollya.Interval(0, sollya.sup(I)))
                else:
                    out_domains.append(I)
            in_domains = out_domains

            # k
            out_domains = list()
            while len(in_domains) > 0:
                I = in_domains.pop()
                #print("in: [{}, {}]".format(float(sollya.inf(I)), float(sollya.sup(I))))
                unround_mult = I * n_invpi
                mult_low = sollya.floor(sollya.inf(unround_mult))
                mult_high = sollya.floor(sollya.sup(unround_mult))
                if mult_low == mult_high or (mult_low == -1
                                             and mult_high == 0):
                    #print("  accepted")
                    out_domains.append(I)
                    continue
                if sollya.sup(I) <= 0:
                    divider_low = (mult_low + 1) * n_pi
                    divider_high = divider_low - divider_low * 2**-53
                else:
                    divider_high = (mult_low + 1) * n_pi
                    divider_low = divider_high - divider_high * 2**-53

                lower_part = sollya.Interval(sollya.inf(I), divider_low)
                upper_part = sollya.Interval(divider_high, sollya.sup(I))
                #print("  -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part))))
                #print("  -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part))))
                in_domains.append(lower_part)
                in_domains.append(upper_part)
            in_domains = out_domains

            # subdivide each section into 2**subd sections
            for _ in range(slivers):
                out_domains = list()
                for I in in_domains:
                    mid = sollya.mid(I)
                    out_domains.append(sollya.Interval(sollya.inf(I), mid))
                    out_domains.append(sollya.Interval(mid, sollya.sup(I)))
                in_domains = out_domains

            in_domains = set(in_domains)
            in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x)))
            in_domains = [
                d for d in in_domains if sollya.inf(d) != sollya.sup(d)
            ]
            return in_domains

        if self.skip_reduction:
            starting_domain = sollya.Interval(-n_pi - 2**-7, n_pi + 2**-7)
        else:
            reduction_k = 20
            starting_domain = sollya.Interval(-reduction_k * n_pi,
                                              reduction_k * n_pi)

        # analyse each piece
        in_domains = split_domain(starting_domain, self.slivers)
        errors = list()
        for I in in_domains:
            if self.skip_reduction:
                rel_err, abs_err = generate_fptaylor(I)
            else:
                rel_err, abs_err = generate_reduction_fptaylor(I)
            print("{}\t{}\t{}\t{}".format(float(sollya.inf(I)),
                                          float(sollya.sup(I)), float(abs_err),
                                          float(rel_err)))
            errors.append((I, abs_err, rel_err))

        def generate_json(errors, domain):
            errors = [err for err in errors if err[0] in domain]
            errors.sort(key=lambda err: err[2])
            epsilon = errors[0][2]
            delta = max(err[1] for err in errors)

            d = {
                "cname": self.function_name,
                "delta": float(delta),
                "domain": [
                    float(sollya.inf(domain)),
                    float(sollya.sup(domain)),
                ],
                "epsilon": float(epsilon),
                "operation": "sin"
            }
            return d

        if self.skip_reduction:
            d = generate_json(errors,
                              sollya.Interval(-n_pi - 2**-7, n_pi + 2**-7))
            json_str = json.dumps(d, sort_keys=True, indent=4)
            json_str = "spec: " + json_str.replace("\n", "\nspec: ")
            print(json_str)

        else:
            specs = list()
            for k in range(1, reduction_k):
                d = generate_json(errors, sollya.Interval(-k * n_pi, k * n_pi))
                specs.append(d)
            for i in range(len(specs)):
                d = specs[i]
                if i == len(specs) - 1:
                    json_str = json.dumps(d, sort_keys=True, indent=4)
                    json_str = "spec: " + json_str.replace("\n", "\nspec: ")
                    print(json_str)
                    break
                nd = specs[i + 1]
                if d["epsilon"] == nd["epsilon"] and d["delta"] == nd["delta"]:
                    continue
                json_str = json.dumps(d, sort_keys=True, indent=4)
                json_str = "spec: " + json_str.replace("\n", "\nspec: ")
                print(json_str)
コード例 #23
0
    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "expf.c", 
                 function_name = "expf"):

        # declaring target and instantiating optimization engine
        processor = target
        self.precision = precision
        opt_eng = OptimizationEngine(processor)
        gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True)

        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = exp_implementation.add_input_variable("x", self.precision) 


        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)


        test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
        test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
        test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

        test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
        return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax      = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax 
        exp_overflow_bound  = ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater)
        early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2 ** precision_emin
        exp_underflow_bound = floor(log(precision_min_value))


        early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less)
        early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision)))


        sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}


        # constant computation
        invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))


        log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision
        invlog2_cst = Constant(invlog2, precision = self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN) 
        log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f"))
        k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f"))
        ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact= True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact = True)
        r =  exact_hi_part - k * log2_lo
        r.set_tag("r")
        r.set_attributes(debug = ML_Debug(display_format = "%f"))

        opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma)

        tag_map = {}
        opt_eng.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx: Variable("x", precision = self.precision, interval = interval_vx),
            tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision)
        }
        #try:
        if 1:
            #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            Log.report(Log.Info, "eval error: %s" % eval_error)
        #except:
        #    Log.report(Log.Info, "gappa error evaluation failed")
        print r.get_str(depth = None, display_precision = True, display_attribute = True)
        print opt_r.get_str(depth = None, display_precision = True, display_attribute = True)

        approx_interval = Interval(-log(2)/2, log(2)/2)

        local_ulp = sup(ulp(exp(approx_interval), self.precision))
        print "ulp: ", local_ulp 
        error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1
        init_poly_degree = poly_degree

        return


        while 1: 
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m")
            poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision)
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            opt_poly = opt_eng.optimization_process(poly, self.precision)

            #print "poly: ", poly.get_str(depth = None, display_precision = True)
            #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval)
            poly_error_copy_map = {
                r.get_handle().get_node(): r_gappa_var
            }
            gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
            poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g")
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)

            global_poly_error = poly_eval_error + poly_approx_error
            global_rel_poly_error = global_poly_error / exp(approx_interval)
            print "global_poly_error: ", global_poly_error, global_rel_poly_error 
            flag = local_ulp > sup(abs(global_rel_poly_error))
            print "test: ", flag
            if flag: break
            else:
                if poly_degree > init_poly_degree + 5:
                    Log.report(Log.Error, "poly degree search did not converge")
                poly_degree += 1



        late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
        diff_k = ik - overflow_exp_offset 
        diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k")
        late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset)
        late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf)
        late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result))

        late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset)
        late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False)
        test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal)
        late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result))

        std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx)
        std_result.set_attributes(tag = "std_result", debug = debug_lftolx)
        result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result)))
        std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return)

        #print scheme.get_str(depth = None, display_precision = True)

        # fusing FMA
        if fuse_fma: 
            Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m")
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m")
        opt_eng.instantiate_abstract_precision(scheme, None)

        Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m")
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m")
        opt_eng.subexpression_sharing(scheme)

        Log.report(Log.Info, "\033[33;1m silencing operation \033[0m")
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        # check processor support
        Log.report(Log.Info, "\033[33;1m checking processor support \033[0m")
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        if fast_path_extract:
            Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m")
            opt_eng.factorize_fast_path(scheme)
        
        Log.report(Log.Info, "\033[33;1m generating source code \033[0m")
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        #self.result.add_header("support_lib/ml_types.h")
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree)
        self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object())
        if debug_flag:
            self.result.add_header("stdio.h")
            self.result.add_header("inttypes.h")
        output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
コード例 #24
0
ファイル: ml_expm1.py プロジェクト: templeblock/metalibm
  def generate_scheme(self):
    # declaring target and instantiating optimization engine

    vx = self.implementation.add_input_variable("x", self.precision)
    
    Log.set_dump_stdout(True)
    
    Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
    if self.debug_flag: 
        Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")
    
    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)
    
    C_m1 = Constant(-1, precision = self.precision)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool)
    test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool)
    test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False);
    
    #  Infnty input
    infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1)))
    #  non-std input (inf/nan)
    specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return)
    
    # Over/Underflow Tests
    
    precision_emax = self.precision.get_emax()
    precision_max_value = S2**(precision_emax + 1)
    expm1_overflow_bound = ceil(log(precision_max_value + 1))
    overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool)
    overflow_return = Statement(Return(FP_PlusInfty(self.precision)))
    
    precision_emin = self.precision.get_emin_subnormal()
    precision_min_value = S2** precision_emin
    expm1_underflow_bound = floor(log(precision_min_value) + 1)
    underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool)
    underflow_return = Statement(Return(C_m1))
    
    sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision]
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision]
    
    # Constants
    
    log_2 = round(log(2), sollya_precision, sollya.RN)
    invlog2 = round(1/log(2), sollya_precision, sollya.RN)
    log_2_cst = Constant(log_2, precision = self.precision)
    
    interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound)
    interval_fk = interval_vx * invlog2
    interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))
    
    log2_hi_precision = self.precision.get_field_size() - 6
    log2_hi = round(log(2), log2_hi_precision, sollya.RN)
    log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN)


    # Reduction
    unround_k = vx * invlog2
    ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik")
    k = Conversion(ik, precision = self.precision, tag = "k")
    
    red_coeff1 = Multiplication(k, log2_hi, precision = self.precision)
    red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision)
    
    pre_sub_mul = Subtraction(vx, red_coeff1, precision  = self.precision)
    
    s = Addition(pre_sub_mul, red_coeff2, precision = self.precision)
    z = Subtraction(s, pre_sub_mul, precision = self.precision)
    t = Subtraction(red_coeff2, z, precision = self.precision)
    
    r = Addition(s, t, precision = self.precision)
    
    r.set_attributes(tag = "r", debug = debug_multi)
    
    r_interval = Interval(-log_2/S2, log_2/S2)
    
    local_ulp = sup(ulp(exp(r_interval), self.precision))
    
    print("ulp: ", local_ulp)
    error_goal = S2**-1*local_ulp
    print("error goal: ", error_goal)
    
    
    # Polynomial Approx
    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
    Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n")
    
    poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1)
    
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
    poly_degree_list = range(0, poly_degree)
    
    precision_list = [self.precision] *(len(poly_degree_list) + 1)
    poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function)
    sub_poly = poly_object.sub_poly(start_index = 2)
    Log.report(Log.Info, "Poly : %s" % sub_poly)
    Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error))))
    pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision)
    poly = r + pre_sub_poly
    poly.set_attributes(tag = "poly", debug = debug_multi)
    
    exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision)
    exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision)
    
    diff = 1 - exp_mk
    diff.set_attributes(tag = "diff", debug = debug_multi) 
    
    # Late Tests
    late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test")
    
    overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
    diff_k = ik - overflow_exp_offset 
    
    exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi)
    exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi)
    
    late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0
    
    late_overflow_return = ConditionBlock(
        Test(late_overflow_result, specifier = Test.IsInfty, likely = False), 
        ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), 
        Return(late_overflow_result)
        )


    late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
    
    underflow_exp_offset = 2 * self.precision.get_field_size()
    corrected_coeff = ik + underflow_exp_offset
    
    exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision)
    exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision)
    
    late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0
    
    test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False)
    
    late_underflow_return = Statement(
        ConditionBlock(
            test_subnormal, 
            ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), 
            Return(late_underflow_result)
            )
    
    # Reconstruction
    
    std_result = exp_k * ( poly + diff )
    std_result.set_attributes(tag = "result", debug = debug_multi)
    
    result_scheme = ConditionBlock(
        late_overflow_test, 
        late_overflow_return, 
        ConditionBlock(
            late_underflow_test, 
            late_underflow_return, 
            Return(std_result)
            )
        )
        
    std_return = ConditionBlock(
        overflow_test, 
        overflow_return, 
        ConditionBlock(
            underflow_test, 
            underflow_return, 
            result_scheme)
        )
        
    scheme = ConditionBlock(
        test_NaN_or_inf, 
        Statement(specific_return), 
        std_return
        )

    return scheme
コード例 #25
0
def generic_poly_split(offset_fct, indexing, target_eps, coeff_precision, vx):
    """ generate the meta approximation for @p offset_fct over several
        intervals defined by @p indexing object
        For each sub-interval, a polynomial approximation with
        maximal_error @p target_eps is tabulated, and evaluated using format
        @p coeff_precision.
        The input variable is @p vx """
    # computing degree for a different polynomial approximation on each
    # sub-interval
    poly_degree_list = [
        int(sup(guessdegree(offset_fct(offset), sub_interval, target_eps)))
        for offset, sub_interval in indexing.get_offseted_sub_list()
    ]
    poly_max_degree = max(poly_degree_list)

    # tabulating polynomial coefficients on split_num sub-interval of interval
    poly_table = ML_NewTable(
        dimensions=[indexing.split_num, poly_max_degree + 1],
        storage_precision=coeff_precision,
        const=True)
    offset_table = ML_NewTable(dimensions=[indexing.split_num],
                               storage_precision=coeff_precision,
                               const=True)
    max_error = 0.0

    for sub_index in range(indexing.split_num):
        poly_degree = poly_degree_list[sub_index]
        offset, approx_interval = indexing.get_offseted_sub_interval(sub_index)
        offset_table[sub_index] = offset
        if poly_degree == 0:
            # managing constant approximation separately since it seems
            # to break sollya
            local_approx = coeff_precision.round_sollya_object(
                offset_fct(offset)(inf(approx_interval)))
            poly_table[sub_index][0] = local_approx
            for monomial_index in range(1, poly_max_degree + 1):
                poly_table[sub_index][monomial_index] = 0
            approx_error = sollya.infnorm(
                offset_fct(offset) - local_approx, approx_interval)

        else:
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                offset_fct(offset), poly_degree,
                [coeff_precision] * (poly_degree + 1), approx_interval,
                sollya.relative)

            for monomial_index in range(poly_max_degree + 1):
                if monomial_index <= poly_degree:
                    poly_table[sub_index][
                        monomial_index] = poly_object.coeff_map[monomial_index]
                else:
                    poly_table[sub_index][monomial_index] = 0
        max_error = max(approx_error, max_error)

    Log.report(Log.Debug, "max approx error is {}", max_error)

    # indexing function: derive index from input @p vx value
    poly_index = indexing.get_index_node(vx)
    poly_index.set_attributes(tag="poly_index", debug=debug_multi)

    ext_precision = get_extended_fp_precision(coeff_precision)

    # building polynomial evaluation scheme
    offset = TableLoad(offset_table,
                       poly_index,
                       precision=coeff_precision,
                       tag="offset",
                       debug=debug_multi)
    poly = TableLoad(poly_table,
                     poly_index,
                     poly_max_degree,
                     precision=coeff_precision,
                     tag="poly_init",
                     debug=debug_multi)
    red_vx = Subtraction(vx,
                         offset,
                         precision=vx.precision,
                         tag="red_vx",
                         debug=debug_multi)
    for monomial_index in range(poly_max_degree, -1, -1):
        coeff = TableLoad(poly_table,
                          poly_index,
                          monomial_index,
                          precision=coeff_precision,
                          tag="poly_%d" % monomial_index,
                          debug=debug_multi)
        #fma_precision = coeff_precision if monomial_index > 1 else ext_precision
        fma_precision = coeff_precision
        poly = FMA(red_vx, poly, coeff, precision=fma_precision)

    #return Conversion(poly, precision=coeff_precision)
    #return poly.hi
    return poly
コード例 #26
0
    def generate_test_tables(self,
                             test_num,
                             test_ranges=[Interval(-1.0, 1.0)]):
        """ Generate inputs and output table to be shared between auto test
            and max_error tests """
        random_sizes = self.generate_random_sizes()
        output_tensor_descriptor_list = self.generate_output_tensor_descriptors(
            random_sizes)
        input_tensor_descriptor_list = self.generate_innput_tensor_descriptors(
            random_sizes)

        index_range = self.test_index_range

        # number of arrays expected as inputs for tested_function
        NUM_INPUT_ARRAY = len(input_tensor_descriptor_list)
        # position of the input array in tested_function operands (generally
        # equals to 1 as to 0-th input is often the destination array)
        INPUT_INDEX_OFFSET = 1

        # concatenating standard test array at the beginning of randomly
        # generated array
        INPUT_ARRAY_SIZE = [
            td.get_bounding_size() for td in input_tensor_descriptor_list
        ]

        # TODO/FIXME: implement proper input range depending on input index
        # assuming a single input array
        input_precisions = [
            td.scalar_format for td in input_tensor_descriptor_list
        ]
        rng_map = [
            get_precision_rng(precision, inf(test_range), sup(test_range))
            for precision, test_range in zip(input_precisions, test_ranges)
        ]

        # generated table of inputs
        input_tables = [
            generate_1d_table(
                INPUT_ARRAY_SIZE[table_id],
                input_precisions[table_id],
                self.uniquify_name("input_table_arg%d" % table_id),
                value_gen=(
                    lambda _: input_precisions[table_id].round_sollya_object(
                        rng_map[table_id].get_new_value(), sollya.RN)))
            for table_id in range(NUM_INPUT_ARRAY)
        ]

        OUTPUT_ARRAY_SIZE = [
            td.get_bounding_size() for td in output_tensor_descriptor_list
        ]
        OUTPUT_PRECISION = [
            td.scalar_format for td in output_tensor_descriptor_list
        ]
        NUM_OUTPUT_ARRAY = len(output_tensor_descriptor_list)

        # generate output_array
        output_tables = [
            generate_1d_table(
                OUTPUT_ARRAY_SIZE[table_id],
                OUTPUT_PRECISION[table_id],
                self.uniquify_name("output_array_%d" % table_id),
                const=False,
                #value_gen=(lambda _: FP_QNaN(self.precision))
                value_gen=(lambda _: 0))
            for table_id in range(NUM_OUTPUT_ARRAY)
        ]
        tensor_descriptors = (input_tensor_descriptor_list,
                              output_tensor_descriptor_list)
        return tensor_descriptors, input_tables, output_tables
コード例 #27
0
ファイル: array_function.py プロジェクト: metalibm/metalibm
    def generate_bench_wrapper(self,
                               test_num=1,
                               loop_num=100000,
                               test_ranges=[Interval(-1.0, 1.0)],
                               debug=False):
        # interval where the array lenght is chosen from (randomly)
        index_range = self.test_index_range

        auto_test = CodeFunction("bench_wrapper", output_format=ML_Binary64)

        tested_function = self.implementation.get_function_object()
        function_name = self.implementation.get_name()

        failure_report_op = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void,
                                                 failure_report_op)

        printf_success_op = FunctionOperator(
            "printf",
            arg_map={0: "\"test successful %s\\n\"" % function_name},
            void_function=True)
        printf_success_function = FunctionObject("printf", [], ML_Void,
                                                 printf_success_op)

        output_precision = FormatAttributeWrapper(self.precision, ["volatile"])

        test_total = test_num

        # number of arrays expected as inputs for tested_function
        NUM_INPUT_ARRAY = 1
        # position of the input array in tested_function operands (generally
        # equals to 1 as to 0-th input is often the destination array)
        INPUT_INDEX_OFFSET = 1

        # concatenating standard test array at the beginning of randomly
        # generated array
        TABLE_SIZE_VALUES = [
            len(std_table) for std_table in self.standard_test_cases
        ] + [
            random.randrange(index_range[0], index_range[1] + 1)
            for i in range(test_num)
        ]
        OFFSET_VALUES = [sum(TABLE_SIZE_VALUES[:i]) for i in range(test_total)]

        table_size_offset_array = generate_2d_table(
            test_total,
            2,
            ML_UInt32,
            self.uniquify_name("table_size_array"),
            value_gen=(lambda row_id:
                       (TABLE_SIZE_VALUES[row_id], OFFSET_VALUES[row_id])))

        INPUT_ARRAY_SIZE = sum(TABLE_SIZE_VALUES)

        # TODO/FIXME: implement proper input range depending on input index
        # assuming a single input array
        input_precisions = [self.get_input_precision(1).get_data_precision()]
        rng_map = [
            get_precision_rng(precision, inf(test_range), sup(test_range))
            for precision, test_range in zip(input_precisions, test_ranges)
        ]

        # generated table of inputs
        input_tables = [
            generate_1d_table(
                INPUT_ARRAY_SIZE,
                self.get_input_precision(INPUT_INDEX_OFFSET +
                                         table_id).get_data_precision(),
                self.uniquify_name("input_table_arg%d" % table_id),
                value_gen=(
                    lambda _: input_precisions[table_id].round_sollya_object(
                        rng_map[table_id].get_new_value(), sollya.RN)))
            for table_id in range(NUM_INPUT_ARRAY)
        ]

        # generate output_array
        output_array = generate_1d_table(
            INPUT_ARRAY_SIZE,
            output_precision,
            self.uniquify_name("output_array"),
            #value_gen=(lambda _: FP_QNaN(self.precision))
            value_gen=(lambda _: None),
            const=False,
            empty=True)

        # accumulate element number
        acc_num = Variable("acc_num",
                           precision=ML_Int64,
                           var_type=Variable.Local)

        def empty_post_statement_gen(input_tables, output_array,
                                     table_size_offset_array, array_offset,
                                     array_len, test_id):
            return Statement()

        test_loop = self.get_array_test_wrapper(test_total, tested_function,
                                                table_size_offset_array,
                                                input_tables, output_array,
                                                acc_num,
                                                empty_post_statement_gen)

        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)
        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"%s %%\"PRIi64\" elts computed in %%\"PRIi64\" nanoseconds => %%.3f CPE \\n\""
                % function_name,
                1:
                FO_Arg(0),
                2:
                FO_Arg(1),
                3:
                FO_Arg(2)
            },
            void_function=True)
        printf_timing_function = FunctionObject(
            "printf", [ML_Int64, ML_Int64, ML_Binary64], ML_Void,
            printf_timing_op)

        vj = Variable("j", precision=ML_Int32, var_type=Variable.Local)
        loop_num_cst = Constant(loop_num, precision=ML_Int32, tag="loop_num")
        loop_increment = 1

        # bench measure of clock per element
        cpe_measure = Division(
            Conversion(timer, precision=ML_Binary64),
            Conversion(acc_num, precision=ML_Binary64),
            precision=ML_Binary64,
            tag="cpe_measure",
        )

        # common test scheme between scalar and vector functions
        test_scheme = Statement(
            self.processor.get_init_timestamp(),
            ReferenceAssign(timer, self.processor.get_current_timestamp()),
            ReferenceAssign(acc_num, 0),
            Loop(
                ReferenceAssign(vj, Constant(0, precision=ML_Int32)),
                vj < loop_num_cst,
                Statement(test_loop, ReferenceAssign(vj,
                                                     vj + loop_increment))),
            ReferenceAssign(
                timer,
                Subtraction(self.processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            printf_timing_function(
                Conversion(acc_num, precision=ML_Int64),
                timer,
                cpe_measure,
            ),
            Return(cpe_measure),
            # Return(Constant(0, precision = ML_Int32))
        )
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])
コード例 #28
0
    def generate_bench(self, processor, test_num=1000, unroll_factor=10):
        """ generate performance bench for self.op_class """
        initial_inputs = [
            Constant(random.uniform(inf(self.init_interval),
                                    sup(self.init_interval)),
                     precision=precision)
            for i, precision in enumerate(self.input_precisions)
        ]

        var_inputs = [
            Variable("var_%d" % i,
                     precision=FormatAttributeWrapper(precision, ["volatile"]),
                     var_type=Variable.Local)
            for i, precision in enumerate(self.input_precisions)
        ]

        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0: "\"%s[%s] %%lld elts computed "\
                   "in %%lld cycles =>\\n     %%.3f CPE \\n\"" %
                (
                    self.bench_name,
                    self.output_precision.get_display_format()
                ),
                1: FO_Arg(0),
                2: FO_Arg(1),
                3: FO_Arg(2),
                4: FO_Arg(3)
            }, void_function=True
        )
        printf_timing_function = FunctionObject(
            "printf", [self.output_precision, ML_Int64, ML_Int64, ML_Binary64],
            ML_Void, printf_timing_op)
        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)

        void_function_op = FunctionOperator("(void)",
                                            arity=1,
                                            void_function=True)
        void_function = FunctionObject("(void)", [self.output_precision],
                                       ML_Void, void_function_op)

        # initialization of operation inputs
        init_assign = metaop.Statement()
        for var_input, init_value in zip(var_inputs, initial_inputs):
            init_assign.push(ReferenceAssign(var_input, init_value))

        # test loop
        loop_i = Variable("i", precision=ML_Int64, var_type=Variable.Local)
        test_num_cst = Constant(test_num / unroll_factor,
                                precision=ML_Int64,
                                tag="test_num")

        # Goal build a chain of dependant operation to measure
        # elementary operation latency
        local_inputs = tuple(var_inputs)
        local_result = self.op_class(*local_inputs,
                                     precision=self.output_precision,
                                     unbreakable=True)
        for i in range(unroll_factor - 1):
            local_inputs = tuple([local_result] + var_inputs[1:])
            local_result = self.op_class(*local_inputs,
                                         precision=self.output_precision,
                                         unbreakable=True)
        # renormalisation
        local_result = self.renorm_function(local_result)

        # variable assignation to build dependency chain
        var_assign = Statement()
        var_assign.push(ReferenceAssign(var_inputs[0], local_result))
        final_value = var_inputs[0]

        # loop increment value
        loop_increment = 1

        test_loop = Loop(
            ReferenceAssign(loop_i, Constant(0, precision=ML_Int32)),
            loop_i < test_num_cst,
            Statement(var_assign,
                      ReferenceAssign(loop_i, loop_i + loop_increment)),
        )

        # bench scheme
        test_scheme = Statement(
            ReferenceAssign(timer, processor.get_current_timestamp()),
            init_assign,
            test_loop,
            ReferenceAssign(
                timer,
                Subtraction(processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            # prevent intermediary variable simplification
            void_function(final_value),
            printf_timing_function(
                final_value, Constant(test_num, precision=ML_Int64), timer,
                Division(Conversion(timer, precision=ML_Binary64),
                         Constant(test_num, precision=ML_Binary64),
                         precision=ML_Binary64))
            # ,Return(Constant(0, precision = ML_Int32))
        )

        return test_scheme
コード例 #29
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
コード例 #30
0
  def generate_argument_reduction(self, memory_limit):
    best_arg_reduc = None

    best_arg_reduc = self.eval_argument_reduction(6,10,12,13)
    best_arg_reduc['sizeof_tables'] = best_arg_reduc['sizeof_table1'] + best_arg_reduc['sizeof_table2']
    best_arg_reduc['degree_poly1'] = 4
    best_arg_reduc['degree_poly2'] = 8
    return best_arg_reduc
    # iterate through all possible parameters, and return the best argument reduction
    # the order of importance of the caracteristics of a good argument reduction is:
    #   1- the argument reduction is valid
    #   2- the degree of the polynomials obtains are minimals
    #   3- the memory used is minimal
    # An arument reduction is valid iff:
    #   - the memory used is less than memory_limit
    #   - y-1 and z-1  fit into a uint64_t
    #   - the second argument reduction should usefull (ie: it should add at least 1 bit to the argument reduction)
    # From thoses validity constraint we deduce some bound on the parameters to reduce the space of value searched:
    # (note that thoses bound are implied by, but not equivalents to the constraints)
    #   size1 <= log2(memory_limit/17)                                       (memory_limit on the first table)
    #   prec1 < 13 + size1                                                   (y-1 fits into a uint64_t)
    #   size2 <= log2((memory_limit - sizeof_table1)/17/midinterval)          (memory_limit on both tables)
    #   size2 >= 1 - log2(midinterval)                                       (second arg red should be usefull)
    #   prec2 < 12 - prec1 - log2((y-y1)/y1),  for all possible y            (z-1 fits into a uint64_t)
    # note: it is hard to deduce a tight bound on prec2 from the last inequality
    # a good approximation is  size2 ~= max[for y]( - log2((y-y1)/y1)), but using it may eliminate valid arg reduc

    #self.eval_argument_reduction(12, 20, 22, 14)

    min_size1 = 1
    max_size1 = floor(log(memory_limit/17)/log(2)).getConstantAsInt()
    for size1 in xrange(max_size1, min_size1-1, -1):
      
      min_prec1 = size1
      max_prec1 = 12 + size1
      for prec1 in xrange(min_prec1,max_prec1+1):
        
        # we need sizeof_table1 and mid_interval for the bound on size2 and prec2
        first_arg_reduc = self.eval_argument_reduction(size1, prec1, prec1, prec1)
        mid_interval = first_arg_reduc['mid_interval']
        sizeof_table1 = first_arg_reduc['sizeof_table1']

        if not(0 <= inf(mid_interval) and sup(mid_interval) < S2**(64 - 52 - prec1)):
          continue
        if not(first_arg_reduc['sizeof_table1'] < memory_limit):
          continue
        
        min_size2 = 1 - ceil(log(sup(mid_interval))/log(2)).getConstantAsInt()
        max_size2 = floor(log((memory_limit - sizeof_table1)/(17 * sup(mid_interval)))/log(2)).getConstantAsInt()
        # during execution of the prec2 loop, it can reduces the interval of valid values for prec2
        # so min_prec2 and max_prec2 are setted here and not before the the prec2 loop
        # (because they are modified inside the body of the loop, for the next iteration of size2)
        min_prec2 = 0
        max_prec2 = 12 + max_size2 - prec1
        for size2 in xrange(max_size2,min_size2-1,-1):
          
          max_prec2 = min(max_prec2, 12 + size2 - prec1)
          for prec2 in xrange(max_prec2,min_prec2-1,-1):
            
            #print '=====\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{})\t====='.format(size1,min_size1,max_size1,prec1,min_prec1,max_prec1,size2,min_size2,max_size2,prec2,min_prec2,max_prec2)
            #print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss #memory used by the programm

            arg_reduc = self.eval_argument_reduction(size1, prec1, size2, prec2)
            mid_interval = arg_reduc['mid_interval']
            out_interval = arg_reduc['out_interval']
            sizeof_tables = arg_reduc['sizeof_table1'] + arg_reduc['sizeof_table2']
            if not(0 <= inf(out_interval) and sup(out_interval) < S2**(64-52-prec1-prec2)):
              max_prec2 = prec2 - 1
              continue
            if memory_limit < sizeof_tables:
              continue
            #assert(prec2 < 12 + size2 - prec1) # test the approximation size2 ~= max[for y]( - log2((y-y1)/y1))

            # guess the degree of the two polynomials (relative error <= 2^-52 and absolute error <= 2^-120)
            # note: we exclude zero from out_interval to not perturb sollya (log(1+x)/x is not well defined on 0)
            sollya_out_interval = Interval(S2**(-52-prec1-prec2), sup(out_interval))
            guess_degree_poly1 = guessdegree(log(1+sollya.x)/sollya.x, sollya_out_interval, S2**-52)
            guess_degree_poly2 = guessdegree(log(1+sollya.x), sollya_out_interval, S2**-120)
            # TODO: detect when guessdegree return multiple possible degree, and find the right one
            if False and inf(guess_degree_poly1) <> sup(guess_degree_poly1):
              print "improvable guess_degree_poly1:", guess_degree_poly1
            if False and inf(guess_degree_poly2) <> sup(guess_degree_poly2):
              print "improvable guess_degree_poly2:", guess_degree_poly2
            degree_poly1 = sup(guess_degree_poly1).getConstantAsInt() + 1
            degree_poly2 = sup(guess_degree_poly2).getConstantAsInt()
            
            if ((best_arg_reduc is not None)
            and (best_arg_reduc['degree_poly1'] < degree_poly1 or best_arg_reduc['degree_poly2'] < degree_poly2)):
              min_prec2 = prec2 + 1
              break

            if ((best_arg_reduc is None)
             or (best_arg_reduc['degree_poly1'] > degree_poly1)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] > degree_poly2)
             or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] == degree_poly2 and best_arg_reduc['sizeof_tables'] > sizeof_tables)):
              arg_reduc['degree_poly1'] = degree_poly1
              arg_reduc['degree_poly2'] = degree_poly2
              arg_reduc['sizeof_tables'] = sizeof_tables
              best_arg_reduc = arg_reduc
              #print "\n   --new best--  \n", arg_reduc, "\n"
    #print "\nBest arg reduc: \n", best_arg_reduc, "\n"
    return best_arg_reduc