Example #1
0
    def generate_emulate(self, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        emulate_func_name = "mpfr_cos"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Result(0),
                                               1: FO_Arg(0),
                                               2: FO_Arg(1)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32],
                                      ML_Mpfr_t, emulate_func_op)
        mpfr_call = Statement(
            ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd)))

        return mpfr_call
Example #2
0
 def __init__(self, pattern, arity=1, exu=None, **kw):
     TemplateOperatorFormat.__init__(
         self,
         pattern,
         arg_map=({
             index: arg_obj
             for (index,
                  arg_obj) in [(0, FO_Result())] + [(i + 1, FO_Arg(i))
                                                    for i in range(arity)]
         }),
         *kw)
     MachineInstruction.__init__(self, exu)
Example #3
0
    def generate_emulate(self, result, mpfr_x, mpfr_rnd):
        """ generate the emulation code for ML_Log2 functions
        mpfr_x is a mpfr_t variable which should have the right precision
        mpfr_rnd is the rounding mode
    """
        #mpfr_x = emulate_implementation.add_input_variable("x", ML_Mpfr_t)
        #mpfr_rnd = emulate_implementation.add_input_variable("rnd", ML_Int32)
        emulate_func_name = "mpfr_log10"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Result(0),
                                               1: FO_Arg(0),
                                               2: FO_Arg(1)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32],
                                      ML_Mpfr_t, emulate_func_op)
        #emulate_func_op.declare_prototype = emulate_func
        mpfr_call = Statement(
            ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd)))

        return mpfr_call
Example #4
0
    def generate_scheme(self):
        #func_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = self.implementation.add_input_variable("x",
                                                    self.get_input_precision())

        mpfr_x = Conversion(vx, precision=ML_Mpfr_t)

        emulate_func_name = "mpfr_exp"
        emulate_func_op = FunctionOperator(emulate_func_name,
                                           arg_map={
                                               0: FO_Result(0),
                                               1: FO_Arg(0)
                                           },
                                           require_header=["mpfr.h"])
        emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t],
                                      ML_Mpfr_t, emulate_func_op)
        emulate_func_op.declare_prototype = emulate_func
        mpfr_call = Conversion(emulate_func(mpfr_x), precision=self.precision)

        scheme = Statement(Return(mpfr_call))

        return scheme
Example #5
0
      #switch_map[sub_half] = Return(-poly_scheme_vector[sub_half])
      #switch_map[half + sub_half] = Return(poly_scheme_vector[sub_half])
      switch_map[(sub_half, half + sub_half)] = Return(factor2 * poly_scheme_vector[sub_half])


    result = SwitchBlock(modk, switch_map)

    #######################################################################
    #                    LARGE ARGUMENT MANAGEMENT                        #
    #                 (lar: Large Argument Reduction)                     #
    #######################################################################

    # payne and hanek argument reduction for large arguments
    #red_func_name = "payne_hanek_cosfp32" # "payne_hanek_fp32_asm"
    red_func_name = "payne_hanek_fp32_asm"
    payne_hanek_func_op = FunctionOperator(red_func_name, arg_map = {0: FO_Arg(0)}, require_header = ["support_lib/ml_red_arg.h"]) 
    payne_hanek_func   = FunctionObject(red_func_name, [ML_Binary32], ML_Binary64, payne_hanek_func_op)
    payne_hanek_func_op.declare_prototype = payne_hanek_func
    #large_arg_red = FunctionCall(payne_hanek_func, vx)
    large_arg_red = payne_hanek_func(vx)
    red_bound     = S2**20
    
    cond = Abs(vx) >= red_bound
    cond.set_attributes(tag = "cond", likely = False)


    
    lar_neark = NearestInteger(large_arg_red, precision = ML_Int64)
    lar_modk = Modulo(lar_neark, Constant(16, precision = ML_Int64), tag = "lar_modk", debug = True) 
    # Modulo is supposed to be already performed (by payne_hanek_cosfp32)
    #lar_modk = NearestInteger(large_arg_red, precision = ML_Int64)
Example #6
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = Abs(self.implementation.add_input_variable("x", self.precision),
                 tag="vx")

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        debug_precision = {
            ML_Binary32: debug_ftox,
            ML_Binary64: debug_lftolx
        }[self.precision]

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)),
                           Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(test_signaling_nan, return_snan,
                           Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 3

        # argument reduction
        frac_pi_index = 3
        frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN)
        inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN)
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)
        # computing k = E(x * frac_pi)
        vx_pi = Multiplication(vx, frac_pi, precision=self.precision)
        k = NearestInteger(vx_pi, precision=ML_Int32, tag="k", debug=True)
        fk = Conversion(k, precision=self.precision, tag="fk")

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision)

        red_vx_hi = (vx - inv_frac_pi_cst * fk)
        red_vx_hi.set_attributes(tag="red_vx_hi",
                                 debug=debug_precision,
                                 precision=self.precision)
        red_vx_lo_sub = inv_frac_pi_lo_cst * fk
        red_vx_lo_sub.set_attributes(tag="red_vx_lo_sub",
                                     debug=debug_precision,
                                     unbreakable=True,
                                     precision=self.precision)
        vx_d = Conversion(vx, precision=ML_Binary64, tag="vx_d")
        pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk)
        pre_red_vx_d_hi.set_attributes(tag="pre_red_vx_d_hi",
                                       precision=ML_Binary64,
                                       debug=debug_lftolx)
        pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d.set_attributes(tag="pre_red_vx_d",
                                    debug=debug_lftolx,
                                    precision=ML_Binary64)

        modk = Modulo(k,
                      2**(frac_pi_index + 1),
                      precision=ML_Int32,
                      tag="switch_value",
                      debug=True)

        sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index - 1)),
                      2**(frac_pi_index - 1))
        red_vx = Select(sel_c, -pre_red_vx, pre_red_vx)
        red_vx.set_attributes(tag="red_vx",
                              debug=debug_precision,
                              precision=self.precision)

        red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d)
        red_vx_d.set_attributes(tag="red_vx_d",
                                debug=debug_lftolx,
                                precision=ML_Binary64)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        error_goal_approx = S2**-self.precision.get_precision()

        Log.report(Log.Info, "building mathematical polynomial")
        poly_degree_vector = [None] * 2**(frac_pi_index + 1)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        index_relative = []

        poly_object_vector = [None] * 2**(frac_pi_index + 1)
        for i in range(2**(frac_pi_index + 1)):
            sub_func = cos(sollya.x + i * pi / S2**frac_pi_index)
            degree = int(
                sup(guessdegree(sub_func, approx_interval,
                                error_goal_approx))) + 1

            degree_list = range(degree + 1)
            a_interval = approx_interval
            if i == 0:
                # ad-hoc, TODO: to be cleaned
                degree = 6
                degree_list = range(0, degree + 1, 2)
            elif i % 2**(frac_pi_index) == 2**(frac_pi_index - 1):
                # for pi/2 and 3pi/2, an approx to  sin=cos(pi/2+x)
                # must be generated
                degree_list = range(1, degree + 1, 2)

            if i == 3 or i == 5 or i == 7 or i == 9:
                precision_list = [sollya.binary64
                                  ] + [sollya.binary32] * (degree)
            else:
                precision_list = [sollya.binary32] * (degree + 1)

            poly_degree_vector[i] = degree

            constraint = sollya.absolute
            delta = (2**(frac_pi_index - 3))
            centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index - 1)
            if centered_i < delta and centered_i > -delta and centered_i != 0:
                constraint = sollya.relative
                index_relative.append(i)
            Log.report(
                Log.Info, "generating approximation for %d/%d" %
                (i, 2**(frac_pi_index + 1)))
            poly_object_vector[
                i], _ = Polynomial.build_from_approximation_with_error(
                    sub_func,
                    degree_list,
                    precision_list,
                    a_interval,
                    constraint,
                    error_function=error_function)

        # unified power map for red_sx^n
        upm = {}
        rel_error_list = []

        poly_scheme_vector = [None] * (2**(frac_pi_index + 1))

        for i in range(2**(frac_pi_index + 1)):
            poly_object = poly_object_vector[i]
            poly_precision = self.precision
            if i == 3 or i == 5 or i == 7 or i == 9:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * red_vx)
                poly_hi.set_precision(ML_Binary64)
                red_vx_d_2 = red_vx_d * red_vx_d
                poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2, offset=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_attributes(unbreakable=True)
            elif i == 4:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=ML_Binary64)
                poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_precision(ML_Binary64)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    red_vx,
                    unified_precision=poly_precision,
                    power_map_=upm)
            #if i == 3:
            #  c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision)
            #  c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
            #  poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

            poly_scheme.set_attributes(tag="poly_cos%dpi%d" %
                                       (i, 2**(frac_pi_index)),
                                       debug=debug_precision)
            poly_scheme_vector[i] = poly_scheme

            #try:
            if is_gappa_installed() and i == 3:
                opt_scheme = self.opt_engine.optimization_process(
                    poly_scheme,
                    self.precision,
                    copy=True,
                    fuse_fma=self.fuse_fma)

                tag_map = {}
                self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map)

                gappa_vx = Variable("red_vx",
                                    precision=self.precision,
                                    interval=approx_interval)

                cg_eval_error_copy_map = {
                    tag_map["red_vx"]: gappa_vx,
                    tag_map["red_vx_d"]: gappa_vx,
                }

                print "opt_scheme"
                print opt_scheme.get_str(depth=None,
                                         display_precision=True,
                                         memoization_map={})

                eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_scheme,
                    cg_eval_error_copy_map,
                    gappa_filename="red_arg_%d.g" % i)
                poly_range = cos(approx_interval + i * pi / S2**frac_pi_index)
                rel_error_list.append(eval_error / poly_range)

        #for rel_error in rel_error_list:
        #  print sup(abs(rel_error))

        #return

        # case 17
        #poly17 = poly_object_vector[17]
        #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision)
        #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision)
        #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

        half = 2**frac_pi_index
        sub_half = 2**(frac_pi_index - 1)

        # determine if the reduced input is within the second and third quarter (not first nor fourth)
        # to negate the cosine output
        factor_cond = BitLogicAnd(BitLogicXor(
            BitLogicRightShift(modk, frac_pi_index),
            BitLogicRightShift(modk, frac_pi_index - 1)),
                                  1,
                                  tag="factor_cond",
                                  debug=True)

        CM1 = Constant(-1, precision=self.precision)
        C1 = Constant(1, precision=self.precision)
        factor = Select(factor_cond,
                        CM1,
                        C1,
                        tag="factor",
                        debug=debug_precision)
        factor2 = Select(Equal(modk, Constant(sub_half)),
                         CM1,
                         C1,
                         tag="factor2",
                         debug=debug_precision)

        switch_map = {}
        if 0:
            for i in range(2**(frac_pi_index + 1)):
                switch_map[i] = Return(poly_scheme_vector[i])
        else:
            for i in range(2**(frac_pi_index - 1)):
                switch_case = (i, half - i)
                #switch_map[i]      = Return(poly_scheme_vector[i])
                #switch_map[half-i] = Return(-poly_scheme_vector[i])
                if i != 0:
                    switch_case = switch_case + (half + i, 2 * half - i)
                    #switch_map[half+i] = Return(-poly_scheme_vector[i])
                    #switch_map[2*half-i] = Return(poly_scheme_vector[i])
                if poly_scheme_vector[i].get_precision() != self.precision:
                    poly_result = Conversion(poly_scheme_vector[i],
                                             precision=self.precision)
                else:
                    poly_result = poly_scheme_vector[i]
                switch_map[switch_case] = Return(factor * poly_result)
            #switch_map[sub_half] = Return(-poly_scheme_vector[sub_half])
            #switch_map[half + sub_half] = Return(poly_scheme_vector[sub_half])
            switch_map[(sub_half, half + sub_half)] = Return(
                factor2 * poly_scheme_vector[sub_half])

        result = SwitchBlock(modk, switch_map)

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################

        # payne and hanek argument reduction for large arguments
        #red_func_name = "payne_hanek_cosfp32" # "payne_hanek_fp32_asm"
        red_func_name = "payne_hanek_fp32_asm"
        payne_hanek_func_op = FunctionOperator(
            red_func_name,
            arg_map={0: FO_Arg(0)},
            require_header=["support_lib/ml_red_arg.h"])
        payne_hanek_func = FunctionObject(red_func_name, [ML_Binary32],
                                          ML_Binary64, payne_hanek_func_op)
        payne_hanek_func_op.declare_prototype = payne_hanek_func
        #large_arg_red = FunctionCall(payne_hanek_func, vx)
        large_arg_red = payne_hanek_func(vx)
        red_bound = S2**20

        cond = Abs(vx) >= red_bound
        cond.set_attributes(tag="cond", likely=False)

        lar_neark = NearestInteger(large_arg_red, precision=ML_Int64)
        lar_modk = Modulo(lar_neark,
                          Constant(16, precision=ML_Int64),
                          tag="lar_modk",
                          debug=True)
        # Modulo is supposed to be already performed (by payne_hanek_cosfp32)
        #lar_modk = NearestInteger(large_arg_red, precision = ML_Int64)
        pre_lar_red_vx = large_arg_red - Conversion(lar_neark,
                                                    precision=ML_Binary64)
        pre_lar_red_vx.set_attributes(precision=ML_Binary64,
                                      debug=debug_lftolx,
                                      tag="pre_lar_red_vx")
        lar_red_vx = Conversion(pre_lar_red_vx,
                                precision=self.precision,
                                debug=debug_precision,
                                tag="lar_red_vx")
        lar_red_vx_lo = Conversion(
            pre_lar_red_vx - Conversion(lar_red_vx, precision=ML_Binary64),
            precision=self.precision)
        lar_red_vx_lo.set_attributes(tag="lar_red_vx_lo",
                                     precision=self.precision)

        lar_k = 3
        # large arg reduction Universal Power Map
        lar_upm = {}
        lar_switch_map = {}
        approx_interval = Interval(-0.5, 0.5)
        for i in range(2**(lar_k + 1)):
            frac_pi = pi / S2**lar_k
            func = cos(frac_pi * i + frac_pi * sollya.x)

            degree = 6
            error_mode = sollya.absolute
            if i % 2**(lar_k) == 2**(lar_k - 1):
                # close to sin(x) cases
                func = -sin(frac_pi * x) if i == 2**(lar_k -
                                                     1) else sin(frac_pi * x)
                degree_list = range(0, degree + 1, 2)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func / x, degree_list, precision_list, approx_interval,
                    error_mode)
                poly_object = poly_object.sub_poly(offset=-1)
            else:
                degree_list = range(degree + 1)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func, degree_list, precision_list, approx_interval,
                    error_mode)

            if i == 3 or i == 5 or i == 7 or i == 9 or i == 11 or i == 13:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * lar_red_vx)
                poly_hi.set_precision(ML_Binary64)
                pre_poly_scheme = poly_hi + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                pre_poly_scheme.set_attributes(precision=ML_Binary64)
                poly_scheme = Conversion(pre_poly_scheme,
                                         precision=self.precision)
            elif i == 4 or i == 12:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                c3 = Constant(coeff(poly_object.get_sollya_object(), 3),
                              precision=self.precision)
                c5 = Constant(coeff(poly_object.get_sollya_object(), 5),
                              precision=self.precision)
                poly_hi = polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=3),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                poly_hi.set_attributes(tag="poly_lar_%d_hi" % i,
                                       precision=ML_Binary64)
                poly_scheme = Conversion(FusedMultiplyAdd(
                    c1, lar_red_vx, poly_hi, precision=ML_Binary64) +
                                         c1 * lar_red_vx_lo,
                                         precision=self.precision)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
            # poly_scheme = polynomial_scheme_builder(poly_object, lar_red_vx, unified_precision = self.precision, power_map_ = lar_upm)
            poly_scheme.set_attributes(tag="lar_poly_%d" % i,
                                       debug=debug_precision)
            lar_switch_map[(i, )] = Return(poly_scheme)

        lar_result = SwitchBlock(lar_modk, lar_switch_map)

        # main scheme
        #Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        # scheme = Statement(ConditionBlock(cond, lar_result, result))

        Log.report(Log.Info, "Construction of the initial MDL scheme")
        scheme = Statement(pre_red_vx_d, red_vx_lo_sub,
                           ConditionBlock(cond, lar_result, result))

        return scheme
Example #7
0
    def generate_bench(self, processor, test_num=1000, unroll_factor=10):
        """ generate performance bench for self.op_class """
        initial_inputs = [
            Constant(random.uniform(inf(self.init_interval),
                                    sup(self.init_interval)),
                     precision=precision)
            for i, precision in enumerate(self.input_precisions)
        ]

        var_inputs = [
            Variable("var_%d" % i,
                     precision=FormatAttributeWrapper(precision, ["volatile"]),
                     var_type=Variable.Local)
            for i, precision in enumerate(self.input_precisions)
        ]

        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0: "\"%s[%s] %%lld elts computed "\
                   "in %%lld cycles =>\\n     %%.3f CPE \\n\"" %
                (
                    self.bench_name,
                    self.output_precision.get_display_format()
                ),
                1: FO_Arg(0),
                2: FO_Arg(1),
                3: FO_Arg(2),
                4: FO_Arg(3)
            }, void_function=True
        )
        printf_timing_function = FunctionObject(
            "printf", [self.output_precision, ML_Int64, ML_Int64, ML_Binary64],
            ML_Void, printf_timing_op)
        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)

        void_function_op = FunctionOperator("(void)",
                                            arity=1,
                                            void_function=True)
        void_function = FunctionObject("(void)", [self.output_precision],
                                       ML_Void, void_function_op)

        # initialization of operation inputs
        init_assign = metaop.Statement()
        for var_input, init_value in zip(var_inputs, initial_inputs):
            init_assign.push(ReferenceAssign(var_input, init_value))

        # test loop
        loop_i = Variable("i", precision=ML_Int64, var_type=Variable.Local)
        test_num_cst = Constant(test_num / unroll_factor,
                                precision=ML_Int64,
                                tag="test_num")

        # Goal build a chain of dependant operation to measure
        # elementary operation latency
        local_inputs = tuple(var_inputs)
        local_result = self.op_class(*local_inputs,
                                     precision=self.output_precision,
                                     unbreakable=True)
        for i in range(unroll_factor - 1):
            local_inputs = tuple([local_result] + var_inputs[1:])
            local_result = self.op_class(*local_inputs,
                                         precision=self.output_precision,
                                         unbreakable=True)
        # renormalisation
        local_result = self.renorm_function(local_result)

        # variable assignation to build dependency chain
        var_assign = Statement()
        var_assign.push(ReferenceAssign(var_inputs[0], local_result))
        final_value = var_inputs[0]

        # loop increment value
        loop_increment = 1

        test_loop = Loop(
            ReferenceAssign(loop_i, Constant(0, precision=ML_Int32)),
            loop_i < test_num_cst,
            Statement(var_assign,
                      ReferenceAssign(loop_i, loop_i + loop_increment)),
        )

        # bench scheme
        test_scheme = Statement(
            ReferenceAssign(timer, processor.get_current_timestamp()),
            init_assign,
            test_loop,
            ReferenceAssign(
                timer,
                Subtraction(processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            # prevent intermediary variable simplification
            void_function(final_value),
            printf_timing_function(
                final_value, Constant(test_num, precision=ML_Int64), timer,
                Division(Conversion(timer, precision=ML_Binary64),
                         Constant(test_num, precision=ML_Binary64),
                         precision=ML_Binary64))
            # ,Return(Constant(0, precision = ML_Int32))
        )

        return test_scheme
Example #8
0
    def generate_bench_wrapper(self,
                               test_num=1,
                               loop_num=100000,
                               test_ranges=[Interval(-1.0, 1.0)],
                               debug=False):
        # interval where the array lenght is chosen from (randomly)
        index_range = self.test_index_range

        auto_test = CodeFunction("bench_wrapper", output_format=ML_Binary64)

        tested_function = self.implementation.get_function_object()
        function_name = self.implementation.get_name()

        failure_report_op = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void,
                                                 failure_report_op)

        printf_success_op = FunctionOperator(
            "printf",
            arg_map={0: "\"test successful %s\\n\"" % function_name},
            void_function=True)
        printf_success_function = FunctionObject("printf", [], ML_Void,
                                                 printf_success_op)

        output_precision = FormatAttributeWrapper(self.precision, ["volatile"])

        test_total = test_num

        # number of arrays expected as inputs for tested_function
        NUM_INPUT_ARRAY = 1
        # position of the input array in tested_function operands (generally
        # equals to 1 as to 0-th input is often the destination array)
        INPUT_INDEX_OFFSET = 1

        # concatenating standard test array at the beginning of randomly
        # generated array
        TABLE_SIZE_VALUES = [
            len(std_table) for std_table in self.standard_test_cases
        ] + [
            random.randrange(index_range[0], index_range[1] + 1)
            for i in range(test_num)
        ]
        OFFSET_VALUES = [sum(TABLE_SIZE_VALUES[:i]) for i in range(test_total)]

        table_size_offset_array = generate_2d_table(
            test_total,
            2,
            ML_UInt32,
            self.uniquify_name("table_size_array"),
            value_gen=(lambda row_id:
                       (TABLE_SIZE_VALUES[row_id], OFFSET_VALUES[row_id])))

        INPUT_ARRAY_SIZE = sum(TABLE_SIZE_VALUES)

        # TODO/FIXME: implement proper input range depending on input index
        # assuming a single input array
        input_precisions = [self.get_input_precision(1).get_data_precision()]
        rng_map = [
            get_precision_rng(precision, inf(test_range), sup(test_range))
            for precision, test_range in zip(input_precisions, test_ranges)
        ]

        # generated table of inputs
        input_tables = [
            generate_1d_table(
                INPUT_ARRAY_SIZE,
                self.get_input_precision(INPUT_INDEX_OFFSET +
                                         table_id).get_data_precision(),
                self.uniquify_name("input_table_arg%d" % table_id),
                value_gen=(
                    lambda _: input_precisions[table_id].round_sollya_object(
                        rng_map[table_id].get_new_value(), sollya.RN)))
            for table_id in range(NUM_INPUT_ARRAY)
        ]

        # generate output_array
        output_array = generate_1d_table(
            INPUT_ARRAY_SIZE,
            output_precision,
            self.uniquify_name("output_array"),
            #value_gen=(lambda _: FP_QNaN(self.precision))
            value_gen=(lambda _: None),
            const=False,
            empty=True)

        # accumulate element number
        acc_num = Variable("acc_num",
                           precision=ML_Int64,
                           var_type=Variable.Local)

        def empty_post_statement_gen(input_tables, output_array,
                                     table_size_offset_array, array_offset,
                                     array_len, test_id):
            return Statement()

        test_loop = self.get_array_test_wrapper(test_total, tested_function,
                                                table_size_offset_array,
                                                input_tables, output_array,
                                                acc_num,
                                                empty_post_statement_gen)

        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)
        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"%s %%\"PRIi64\" elts computed in %%\"PRIi64\" nanoseconds => %%.3f CPE \\n\""
                % function_name,
                1:
                FO_Arg(0),
                2:
                FO_Arg(1),
                3:
                FO_Arg(2)
            },
            void_function=True)
        printf_timing_function = FunctionObject(
            "printf", [ML_Int64, ML_Int64, ML_Binary64], ML_Void,
            printf_timing_op)

        vj = Variable("j", precision=ML_Int32, var_type=Variable.Local)
        loop_num_cst = Constant(loop_num, precision=ML_Int32, tag="loop_num")
        loop_increment = 1

        # bench measure of clock per element
        cpe_measure = Division(
            Conversion(timer, precision=ML_Binary64),
            Conversion(acc_num, precision=ML_Binary64),
            precision=ML_Binary64,
            tag="cpe_measure",
        )

        # common test scheme between scalar and vector functions
        test_scheme = Statement(
            self.processor.get_init_timestamp(),
            ReferenceAssign(timer, self.processor.get_current_timestamp()),
            ReferenceAssign(acc_num, 0),
            Loop(
                ReferenceAssign(vj, Constant(0, precision=ML_Int32)),
                vj < loop_num_cst,
                Statement(test_loop, ReferenceAssign(vj,
                                                     vj + loop_increment))),
            ReferenceAssign(
                timer,
                Subtraction(self.processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            printf_timing_function(
                Conversion(acc_num, precision=ML_Int64),
                timer,
                cpe_measure,
            ),
            Return(cpe_measure),
            # Return(Constant(0, precision = ML_Int32))
        )
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])
Example #9
0
    def generate_array_check_loop(self, input_tables, output_array,
                                  table_size_offset_array, array_offset,
                                  array_len, test_id):
        # internal array iterator index
        vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local)

        printf_input_function = self.get_printf_input_function()

        printf_error_template = "printf(\"max %s error is %s \\n\", %s)" % (
            self.function_name,
            self.precision.get_display_format().format_string,
            self.precision.get_display_format().pre_process_fct("{0}"))
        printf_error_op = TemplateOperatorFormat(printf_error_template,
                                                 arity=1,
                                                 void_function=True,
                                                 require_header=["stdio.h"])

        printf_error_function = FunctionObject("printf", [self.precision],
                                               ML_Void, printf_error_op)

        printf_max_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"max %s error is reached at input number %s \\n \"" %
                (self.function_name, "%d"),
                1:
                FO_Arg(0)
            },
            void_function=True,
            require_header=["stdio.h"])
        printf_max_function = FunctionObject("printf", [self.precision],
                                             ML_Void, printf_max_op)

        NUM_INPUT_ARRAY = len(input_tables)

        # generate the expected table for the whole multi-array
        expected_table = self.generate_expected_table(input_tables,
                                                      table_size_offset_array)

        # inputs for the (vj)-th entry of the sub-arrat
        local_inputs = tuple(
            TableLoad(input_tables[in_id], array_offset + vj)
            for in_id in range(NUM_INPUT_ARRAY))
        # expected values for the (vj)-th entry of the sub-arrat
        expected_values = [
            TableLoad(expected_table, array_offset + vj, i)
            for i in range(self.accuracy.get_num_output_value())
        ]
        # local result for the (vj)-th entry of the sub-arrat
        local_result = TableLoad(output_array, array_offset + vj)

        if self.break_error:
            return_statement_break = Statement(
                printf_input_function(*((vj, ) + local_inputs +
                                        (local_result, ))),
                self.accuracy.get_output_print_call(self.function_name,
                                                    output_values))
        else:
            return_statement_break = Statement(
                printf_input_function(*((vj, ) + local_inputs +
                                        (local_result, ))),
                self.accuracy.get_output_print_call(self.function_name,
                                                    expected_values),
                Return(Constant(1, precision=ML_Int32)))

        # loop implementation to check sub-array array_offset
        # results validity
        check_array_loop = Loop(
            ReferenceAssign(vj, 0), vj < array_len,
            Statement(
                ConditionBlock(
                    self.accuracy.get_output_check_test(
                        local_result, expected_values),
                    return_statement_break),
                ReferenceAssign(vj, vj + 1),
            ))
        return check_array_loop
Example #10
0
             type_strict_match(v4float32, v4float32, v4float32):
             StandardAsmOperator("faddwq {} = {}, {}", arity=2, exu=DA_FPU),
         },
     },
 },
 Subtraction: {
     None: {
         lambda _: True: {
             type_strict_match_list([ML_Int32, ML_UInt32], [
                                        ML_UInt32, ML_Int32
                                    ], [ML_Int32, ML_UInt32]):
             # subtract from: op1 - op0 (reverse)
             AdvancedAsmOperator("sbfw {} = {}, {}",
                                 arg_map={
                                     0: FO_Result(),
                                     1: FO_Arg(1),
                                     2: FO_Arg(0)
                                 },
                                 arity=2,
                                 exu=DA_ALU),
             type_strict_match(ML_Binary32, ML_Binary32, ML_Binary32):
             # subtract from: op1 - op0 (reverse)
             AdvancedAsmOperator("fsbfw {} = {}, {}",
                                 arg_map={
                                     0: FO_Result(),
                                     1: FO_Arg(1),
                                     2: FO_Arg(0)
                                 },
                                 arity=2,
                                 exu=DA_FPU),
             type_strict_match(ML_Binary64, ML_Binary64, ML_Binary64):
Example #11
0
  def generate_datafile_testbench(self, tc_list, io_map, input_signals, output_signals, time_step, test_fname="test.input"):
    """ Generate testbench with input and output data externalized in
        a data file """
    # textio function to read hexadecimal text
    def FCT_HexaRead_gen(input_format):
        legalized_input_format = input_format
        FCT_HexaRead = FunctionObject("hread", [HDL_LINE, legalized_input_format], ML_Void, FunctionOperator("hread", void_function=True, arity=2))
        return FCT_HexaRead
    # textio function to read binary text
    FCT_Read = FunctionObject("read", [HDL_LINE, ML_StdLogic], ML_Void, FunctionOperator("read", void_function=True, arity=2))
    input_line = Variable("input_line", precision=HDL_LINE, var_type=Variable.Local)

    # building ordered list of input and output signal names
    input_signal_list = [sname for sname in input_signals.keys()]
    input_statement = Statement()
    for input_name in input_signal_list:
        input_format = input_signals[input_name].precision
        input_var = Variable(
            "v_" + input_name,
            precision=input_format,
            var_type=Variable.Local)
        if input_format is ML_StdLogic:
            input_statement.add(FCT_Read(input_line, input_var))
        else:
            input_statement.add(FCT_HexaRead_gen(input_format)(input_line, input_var))
        input_statement.add(ReferenceAssign(input_signals[input_name], input_var))

    output_signal_list = [sname for sname in output_signals.keys()]
    output_statement = Statement()
    for output_name in output_signal_list:
        output_format = output_signals[output_name].precision
        output_var = Variable(
            "v_" + output_name,
            precision=output_format,
            var_type=Variable.Local)
        if output_format is ML_StdLogic:
            output_statement.add(FCT_Read(input_line, output_var))
        else:
            output_statement.add(FCT_HexaRead_gen(output_format)(input_line, output_var))

        output_signal = output_signals[output_name]
        #value_msg = get_output_value_msg(output_signal, output_value)
        test_pass_cond, check_statement = get_output_check_statement(output_signal, output_name, output_var)

        input_msg = multi_Concatenation(*tuple(sum([[" %s=" % input_tag, signal_str_conversion(input_signals[input_tag], input_signals[input_tag].precision)] for input_tag in input_signal_list], [])))

        output_statement.add(check_statement)
        assert_statement = Assert(
            test_pass_cond,
            multi_Concatenation(
                "unexpected value for inputs ",
                input_msg,
                " expecting :",
                signal_str_conversion(output_var, output_format),
                " got :",
                signal_str_conversion(output_signal, output_format),
               precision = ML_String
            ),
            severity=Assert.Failure
        )
        output_statement.add(assert_statement)

    self_component = self.implementation.get_component_object()
    self_instance = self_component(io_map = io_map, tag = "tested_entity")
    test_statement = Statement()

    DATA_FILE_NAME = test_fname

    with open(DATA_FILE_NAME, "w") as data_file:
        # dumping column tags
        data_file.write("# " + " ".join(input_signal_list + output_signal_list) + "\n")

        def get_raw_cst_string(cst_format, cst_value):
            size = int((cst_format.get_bit_size() + 3) / 4)
            return ("{:x}").format(cst_format.get_base_format().get_integer_coding(cst_value)).zfill(size)

        for input_values, output_values in tc_list:
            # TODO; generate test data file
            cst_list = []
            for input_name in input_signal_list:
                input_value = input_values[input_name]
                input_format = input_signals[input_name].get_precision()
                cst_list.append(get_raw_cst_string(input_format, input_value))

            for output_name in output_signal_list:
                output_value = output_values[output_name]
                output_format = output_signals[output_name].get_precision()
                cst_list.append(get_raw_cst_string(output_format, output_value))
            # dumping line into file
            data_file.write(" ".join(cst_list) + "\n")

    input_stream = Variable("data_file", precision=HDL_FILE, var_type=Variable.Local)
    file_status = Variable("file_status", precision=HDL_OPEN_FILE_STATUS, var_type=Variable.Local)
    FCT_EndFile = FunctionObject("endfile", [HDL_FILE], ML_Bool, FunctionOperator("endfile", arity=1)) 
    FCT_OpenFile = FunctionObject(
        "FILE_OPEN", [HDL_OPEN_FILE_STATUS, HDL_FILE, ML_String], ML_Void,
        FunctionOperator(
            "FILE_OPEN",
            arg_map={0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2), 3: "READ_MODE"},
            void_function=True))
    FCT_ReadLine =  FunctionObject(
        "readline", [HDL_FILE, HDL_LINE], ML_Void,
        FunctionOperator("readline", void_function=True, arity=2))

    reset_statement = self.get_reset_statement(io_map, time_step)
    OPEN_OK = Constant("OPEN_OK", precision=HDL_OPEN_FILE_STATUS)

    testbench = CodeEntity("testbench")
    test_process = Process(
        reset_statement,
        FCT_OpenFile(file_status, input_stream, DATA_FILE_NAME),
        ConditionBlock(
            Comparison(file_status, OPEN_OK, specifier=Comparison.NotEqual),
          Assert(
            Constant(0, precision=ML_Bool),
            " \"failed to open file {}\"".format(DATA_FILE_NAME),
            severity=Assert.Failure
          )
        ),
        # consume legend line
        FCT_ReadLine(input_stream, input_line),
        WhileLoop(
            LogicalNot(FCT_EndFile(input_stream)),
            Statement(
                FCT_ReadLine(input_stream, input_line),
                input_statement,
                Wait(time_step * (self.stage_num + 2)),
                output_statement,
            ),
        ),
      # end of test
      Assert(
        Constant(0, precision = ML_Bool),
        " \"end of test, no error encountered \"",
        severity = Assert.Warning
      ),
      # infinite end loop
        WhileLoop(
            Constant(1, precision=ML_Bool),
            Statement(
                Wait(time_step * (self.stage_num + 2)),
            )
        )
    )

    testbench_scheme = Statement(
      self_instance,
      test_process
    )

    if self.pipelined:
        half_time_step = time_step / 2
        assert (half_time_step * 2) == time_step
        # adding clock process for pipelined bench
        clk_process = Process(
            Statement(
                ReferenceAssign(
                    io_map["clk"],
                    Constant(1, precision = ML_StdLogic)
                ),
                Wait(half_time_step),
                ReferenceAssign(
                    io_map["clk"],
                    Constant(0, precision = ML_StdLogic)
                ),
                Wait(half_time_step),
            )
        )
        testbench_scheme.push(clk_process)

    testbench.add_process(testbench_scheme)

    return [testbench]