Ejemplo n.º 1
0
    def generate_scheme(self):
        """ main scheme generation """

        int_size = 3
        frac_size = self.width - int_size

        input_precision = hdl_precision_parser("FU%d.%d" %
                                               (int_size, frac_size))
        output_precision = hdl_precision_parser("FS%d.%d" %
                                                (int_size, frac_size))

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)

        var_y = self.implementation.add_input_signal("y", input_precision)

        var_z = self.implementation.add_input_signal("z", input_precision)

        abstract_formulae = var_x

        anchor = FixedPointPosition(abstract_formulae,
                                    -3,
                                    align=FixedPointPosition.FromPointToMSB,
                                    tag="anchor")

        comp = abstract_formulae > anchor

        result = Select(comp, Conversion(var_x, precision=self.precision),
                        Conversion(var_y, precision=self.precision))

        self.implementation.add_output_signal("result", result)

        return [self.implementation]
Ejemplo n.º 2
0
def legalize_Select(optree):
    """ legalize Select operation node by converting if and else inputs to
        Select output format if the bit sizes do not match """
    cond = optree.get_input(0)
    op0 = optree.get_input(1)
    op1 = optree.get_input(2)
    precision = optree.get_precision()
    if precision is None:
        Log.report(Log.Error, "None precision for Select:\n{}", optree)
    if op0.get_precision().get_bit_size() != precision.get_bit_size():
        optree.set_input(
            1,
            Conversion(
                op0,
                precision = precision
            )
        )
    if op1.get_precision().get_bit_size() != precision.get_bit_size():
        optree.set_input(
            2,
            Conversion(
                op1,
                precision = optree.get_precision()
            )
        )
    return optree
Ejemplo n.º 3
0
def legalize_invsqrt_seed(optree):
    """ Legalize an InverseSquareRootSeed optree """
    assert isinstance(optree, ReciprocalSquareRootSeed)
    op_prec = optree.get_precision()
    # input = 1.m_hi-m_lo * 2^e
    # approx = 2^(-int(e/2)) * approx_insqrt(1.m_hi) * (e % 2 ? 1.0 : ~2**-0.5)
    op_input = optree.get_input(0)
    convert_back = False
    approx_prec = ML_Binary32

    if op_prec != approx_prec:
        op_input = Conversion(op_input, precision=ML_Binary32)
        convert_back = True

    # TODO: fix integer precision selection
    #       as we are in a late code generation stage, every node's precision
    #       must be set
    op_exp = ExponentExtraction(op_input,
                                tag="op_exp",
                                debug=debug_multi,
                                precision=ML_Int32)
    neg_half_exp = Division(Negation(op_exp, precision=ML_Int32),
                            Constant(2, precision=ML_Int32),
                            precision=ML_Int32)
    approx_exp = ExponentInsertion(neg_half_exp,
                                   tag="approx_exp",
                                   debug=debug_multi,
                                   precision=approx_prec)
    op_exp_parity = Modulo(op_exp,
                           Constant(2, precision=ML_Int32),
                           precision=ML_Int32)
    approx_exp_correction = Select(Equal(op_exp_parity,
                                         Constant(0, precision=ML_Int32)),
                                   Constant(1.0, precision=approx_prec),
                                   Select(Equal(
                                       op_exp_parity,
                                       Constant(-1, precision=ML_Int32)),
                                          Constant(S2**0.5,
                                                   precision=approx_prec),
                                          Constant(S2**-0.5,
                                                   precision=approx_prec),
                                          precision=approx_prec),
                                   precision=approx_prec,
                                   tag="approx_exp_correction",
                                   debug=debug_multi)
    table_index = invsqrt_approx_table.get_index_function()(op_input)
    table_index.set_attributes(tag="invsqrt_index", debug=debug_multi)
    approx = Multiplication(TableLoad(invsqrt_approx_table,
                                      table_index,
                                      precision=approx_prec),
                            Multiplication(approx_exp_correction,
                                           approx_exp,
                                           precision=approx_prec),
                            tag="invsqrt_approx",
                            debug=debug_multi,
                            precision=approx_prec)
    if approx_prec != op_prec:
        return Conversion(approx, precision=op_prec)
    else:
        return approx
Ejemplo n.º 4
0
def sw_legalize_concatenation(node):
    """ Legalize a RTL Concatenation node into a sub-graph
        of operation compatible with software implementation """
    assert len(node.inputs) == 2
    lhs = node.get_input(0)
    rhs = node.get_input(1)
    return BitLogicOr(BitLogicLeftShift(Conversion(
        lhs, precision=node.get_precision()),
                                        rhs.get_precision().get_bit_size(),
                                        precision=node.get_precision()),
                      Conversion(rhs, precision=node.get_precision()),
                      precision=node.get_precision())
Ejemplo n.º 5
0
def legalize_reciprocal_seed(optree):
    """ Legalize an ReciprocalSeed optree """
    assert isinstance(optree, ReciprocalSeed)
    op_prec = optree.get_precision()
    initial_prec = op_prec
    back_convert = False
    op_input = optree.get_input(0)

    INV_APPROX_TABLE_FORMAT = generic_inv_approx_table.get_storage_precision()

    if op_prec != INV_APPROX_TABLE_FORMAT:
        op_input = Conversion(op_input, precision=INV_APPROX_TABLE_FORMAT)
        op_prec = INV_APPROX_TABLE_FORMAT
        back_convert = True
    # input = 1.m_hi-m_lo * 2^e
    # approx = 2^(-int(e/2)) * approx_insqrt(1.m_hi) * (e % 2 ? 1.0 : ~2**-0.5)

    # TODO: fix integer precision selection
    #       as we are in a late code generation stage, every node's precision
    #       must be set
    int_prec = op_prec.get_integer_format()
    op_sign = CopySign(op_input,
                       Constant(1.0, precision=op_prec),
                       precision=op_prec)
    op_exp = ExponentExtraction(op_input,
                                tag="op_exp",
                                debug=debug_multi,
                                precision=int_prec)
    neg_exp = Negation(op_exp, precision=int_prec)
    approx_exp = ExponentInsertion(neg_exp,
                                   tag="approx_exp",
                                   debug=debug_multi,
                                   precision=op_prec)
    table_index = generic_inv_approx_table.get_index_function()(op_input)
    table_index.set_attributes(tag="inv_index", debug=debug_multi)
    approx = Multiplication(TableLoad(generic_inv_approx_table,
                                      table_index,
                                      precision=op_prec),
                            Multiplication(approx_exp,
                                           op_sign,
                                           precision=op_prec),
                            tag="inv_approx",
                            debug=debug_multi,
                            precision=op_prec)
    if back_convert:
        return Conversion(approx, precision=initial_prec)
    else:
        return approx
Ejemplo n.º 6
0
def propagate_format_to_input(new_format, optree, input_index_list):
    """ Propgate new_format to @p optree's input whose index is listed in
        @p input_index_list """
    for op_index in input_index_list:
        op_input = optree.get_input(op_index)
        if op_input.get_precision() is None:
            op_input.set_precision(new_format)
            index_list = does_node_propagate_format(op_input)
            propagate_format_to_input(new_format, op_input, index_list)
        elif not test_format_equality(new_format, op_input.get_precision()):
            if is_constant(op_input):
                if not is_fixed_point(new_format):
                    Log.report(
                        Log.Error,
                        "format {} during propagation to input {} of {} is not a fixed-point format",
                        new_format, op_input, optree)
                elif format_does_fit(op_input, new_format):
                    Log.report(
                        Log.Info,
                        "Simplify Constant Conversion {} to larger Constant: {}",
                        op_input.get_str(display_precision=True)
                        if Log.is_level_enabled(Log.Info) else "",
                        str(new_format))
                    new_input = op_input.copy()
                    new_input.set_precision(new_format)
                    optree.set_input(op_index, new_input)
                else:
                    Log.report(
                        Log.Error,
                        "Constant is about to be reduced to a too constrained format: {}",
                        op_input.get_str(display_precision=True)
                        if Log.is_level_enabled(Log.Error) else "")
            else:
                new_input = Conversion(op_input, precision=new_format)
                optree.set_input(op_index, new_input)
Ejemplo n.º 7
0
    def generate_scheme(self):
        """ main scheme generation """
        Log.report(Log.Info,
                   "input_precision is {}".format(self.input_precision))
        Log.report(Log.Info,
                   "output_precision is {}".format(self.output_precision))

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", self.input_precision)
        var_y = self.implementation.add_input_signal("y", self.input_precision)
        var_x.set_attributes(debug=debug_fixed)
        var_y.set_attributes(debug=debug_fixed)

        self.implementation.start_new_stage()

        add = var_x + var_y

        self.implementation.start_new_stage()

        sub = add - var_y

        self.implementation.start_new_stage()

        pre_result = sub - var_x

        self.implementation.start_new_stage()

        post_result = pre_result + var_x

        result = Conversion(pre_result, precision=self.output_precision)

        self.implementation.add_output_signal("vr_out", result)

        return [self.implementation]
Ejemplo n.º 8
0
    def generate_dummy_scheme(self):
        Log.report(
            Log.Info,
            "generating MultArray with output precision {precision}".format(
                precision=self.precision))

        acc = None
        a_inputs = {}
        b_inputs = {}

        stage_map = self.instanciate_inputs()

        stage_index_list = sorted(stage_map.keys())
        for stage_id in stage_index_list:
            # synchronizing pipeline stage
            if stage_id is None:
                pass
            else:
                while stage_id > self.implementation.get_current_stage():
                    self.implementation.start_new_stage()
            operation_list = stage_map[stage_id]
            for ctor, operand_list in operation_list:
                new_term = ctor(*tuple(operand_list))
                if acc is None:
                    acc = new_term
                else:
                    acc = Addition(acc, new_term)

        result = Conversion(acc, precision=self.precision)
        self.implementation.add_output_signal("result_o", result)

        return [self.implementation]
Ejemplo n.º 9
0
    def generate_scheme(self):
        """ main scheme generation """
        Log.report(Log.Info, "width parameter is {}".format(self.width))
        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        var_y = self.implementation.add_input_signal("y", input_precision)
        var_x.set_attributes(debug = debug_fixed)
        var_y.set_attributes(debug = debug_fixed)

        sub = var_x - var_y
        c = Constant(0)

        self.implementation.start_new_stage()

        #pre_result = Select(
        #    c > sub,
        #    c,
        #    sub
        #)
        pre_result = Max(0, sub)

        self.implementation.start_new_stage()

        result = Conversion(pre_result + var_x, precision=output_precision)

        self.implementation.add_output_signal("vr_out", result)

        return [self.implementation]
Ejemplo n.º 10
0
    def generate_scheme(self):
        """ main scheme generation """
        Log.report(Log.Info, "width parameter is {}".format(self.width))
        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        var_y = self.implementation.add_input_signal("y", input_precision)
        var_x.set_attributes(debug = debug_fixed)
        var_y.set_attributes(debug = debug_fixed)

        test = (var_x > 1)
        test.set_attributes(tag = "test", debug = debug_std)

        large_add = (var_x + var_y)

        pre_result = Select(
            test,
            1,
            large_add,
            tag = "pre_result",
            debug = debug_fixed
        )

        result = Conversion(pre_result, precision=output_precision)

        self.implementation.add_output_signal("vr_out", result)

        return [self.implementation]
Ejemplo n.º 11
0
def get_output_check_statement(output_signal, output_tag, output_value):
    """ Generate output value check statement """
    test_pass_cond = Comparison(
        output_signal,
        output_value,
        specifier=Comparison.Equal,
        precision=ML_Bool
    )

    check_statement = ConditionBlock(
        LogicalNot(
            test_pass_cond,
            precision = ML_Bool
        ),
        Report(
            Concatenation(
                " result for {}: ".format(output_tag),
                Conversion(
                    output_signal if output_signal.get_precision() is ML_StdLogic else
                    TypeCast(
                        output_signal,
                        precision=ML_StdLogicVectorFormat(
                            output_signal.get_precision().get_bit_size()
                        )
                     ),
                    precision = ML_String
                    ),
                precision = ML_String
            )
        )
    )
    return test_pass_cond, check_statement
Ejemplo n.º 12
0
def legalize_integer_nearest(optree):
    """ transform a NearestInteger node floating-point to integer 
        into a sequence of floating-point NearestInteger and Conversion.
        This conversion is lossy """
    op_input = optree.get_input(0)
    int_precision = {
        v4float32: v4int32,
        ML_Binary32: ML_Int32
    }[optree.get_precision()]
    return Conversion(NearestInteger(op_input, precision=int_precision),
                      precision=optree.get_precision())
Ejemplo n.º 13
0
def signal_str_conversion(optree, op_format):
    """ converision of @p optree from op_format to ML_String """
    return Conversion(
        optree if op_format is ML_StdLogic else
        TypeCast(
            optree,
            precision=ML_StdLogicVectorFormat(
                op_format.get_bit_size()
            )
         ),
        precision=ML_String
    )
Ejemplo n.º 14
0
def simplify_inverse(optree, processor):
    dummy_var = Variable("dummy_var_seed", precision = optree.get_precision())
    dummy_div_seed = DivisionSeed(dummy_var, precision = optree.get_precision())
    inv_approx_table = processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map)

    seed_input = optree.inputs[0]
    c0 = Constant(0, precision = ML_Int32)

    if optree.get_precision() == inv_approx_table.get_storage_precision():
        return TableLoad(inv_approx_table, inv_approx_table.get_index_function()(seed_input), c0, precision = optree.get_precision()) 
    else:
        return Conversion(TableLoad(inv_approx_table, inv_approx_table.get_index_function()(seed_input), c0, precision = inv_approx_table.get_storage_precision()), precision = optree.get_precision()) 
Ejemplo n.º 15
0
def insert_conversion_when_required(op_input, final_precision):
    """ Generate a conversion of op_input to format final_precision if required 

        :param op_input: input operation node
        :type op_input: ML_Operation
        :param final_precision: target format
        :type final_precision: ML_Format
        :return: op_input converted to final_precision if required
        :rtype: ML_Operation """
    # assert not final_precision is None
    if op_input.get_precision() != final_precision:
        return Conversion(op_input, precision=final_precision)
    else:
        return op_input
Ejemplo n.º 16
0
    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", self.get_input_precision(0))

        bf16_params = ML_NewTable(dimensions=[self.table_size], storage_precision=BFloat16)
        for i in range(self.table_size):
            bf16_params[i] = 1.1**i

        conv_vx = Conversion(TableLoad(bf16_params, vx), precision=ML_Binary32, tag="conv_vx", debug=debug_multi)

        result = conv_vx

        scheme = Return(result, precision=self.precision, debug=debug_multi)

        return scheme
Ejemplo n.º 17
0
    def generate_scheme(self):
        """ main scheme generation """
        Log.report(Log.Info, "width parameter is {}".format(self.width))
        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        var_y = self.implementation.add_input_signal("y", input_precision)
        var_x.set_attributes(debug=debug_fixed)
        var_y.set_attributes(debug=debug_fixed)

        test = (var_x > 1)
        test.set_attributes(tag="test", debug=debug_std)

        sub = var_x - var_y
        c = Constant(0)

        pre_result_select = Select(c > sub,
                                   Select(c < var_y,
                                          sub,
                                          Select(LogicalAnd(
                                              c > var_x,
                                              c < var_y,
                                              tag="last_lev_cond"),
                                                 var_x,
                                                 c,
                                                 tag="last_lev_sel"),
                                          tag="pre_select"),
                                   var_y,
                                   tag="pre_result_select")
        pre_result = Max(0, var_x - var_y, tag="pre_result")

        result = Conversion(Addition(pre_result, pre_result_select, tag="add"),
                            precision=output_precision)

        self.implementation.add_output_signal("vr_out", result)

        return [self.implementation]
Ejemplo n.º 18
0
def legalize_vector_reduction_test(optree):
    """ Legalize a vector test (e.g. IsMaskNotAnyZero) to a sub-graph
        of basic operations """
    op_input = optree.get_input(0)
    vector_size = op_input.get_precision().get_vector_size()
    conv_format = {
        2: v2int32,
        4: v4int32,
        8: v8int32,
    }[vector_size]

    cast_format = {
        2: ML_Int64,
        4: ML_Int128,
        8: ML_Int256,
    }[vector_size]
    return Comparison(TypeCast(Conversion(op_input, precision=conv_format),
                               precision=cast_format),
                      Constant(0, precision=cast_format),
                      specifier=Comparison.Equal,
                      precision=ML_Bool)
Ejemplo n.º 19
0
def comp_3to2(a, b, c):
    """ 3 digits to 2 digits compressor """
    s = BitLogicXor(a,
                    BitLogicXor(b, c, precision=ML_StdLogic),
                    precision=ML_StdLogic)
    c = BitLogicOr(BitLogicAnd(a, b, precision=ML_StdLogic),
                   BitLogicOr(BitLogicAnd(a, c, precision=ML_StdLogic),
                              BitLogicAnd(c, b, precision=ML_StdLogic),
                              precision=ML_StdLogic),
                   precision=ML_StdLogic)
    return c, s

    a = TypeCast(a, precision=fixed_point(1, 0, signed=False))
    b = TypeCast(b, precision=fixed_point(1, 0, signed=False))
    c = TypeCast(c, precision=fixed_point(1, 0, signed=False))

    full = TypeCast(Conversion(a + b + c,
                               precision=fixed_point(2, 0, signed=False)),
                    precision=ML_StdLogicVectorFormat(2))
    carry = BitSelection(full, 1)
    digit = BitSelection(full, 0)
    return carry, digit
Ejemplo n.º 20
0
def generate_bitfield_extraction(target_format, input_node, lo_index,
                                 hi_index):
    shift = lo_index
    mask_size = hi_index - lo_index + 1

    input_format = input_node.get_precision().get_base_format()
    if is_fixed_point(input_format) and is_fixed_point(target_format):
        frac_size = target_format.get_frac_size()
        int_size = input_format.get_bit_size() - frac_size
        cast_format = ML_Custom_FixedPoint_Format(int_size,
                                                  frac_size,
                                                  signed=False)
    else:
        cast_format = None

    # 1st step: shifting the input node the right amount
    shifted_node = input_node if shift == 0 else BitLogicRightShift(
        input_node,
        Constant(shift, precision=ML_Int32),
        precision=input_format)
    raw_format = ML_Custom_FixedPoint_Format(input_format.get_bit_size(),
                                             0,
                                             signed=False)
    # 2nd step: masking the input node
    # TODO/FIXME: check thast mask does not overflow or wrap-around
    masked_node = BitLogicAnd(TypeCast(shifted_node, precision=raw_format),
                              Constant((2**mask_size - 1),
                                       precision=raw_format),
                              precision=raw_format)

    if not cast_format is None:
        casted_node = TypeCast(masked_node, precision=cast_format)
    else:
        casted_node = masked_node

    converted_node = Conversion(casted_node, precision=target_format)
    return converted_node
Ejemplo n.º 21
0
def generate_exp_insertion(optree, result_precision):
    """ generate the expanded version of ExponentInsertion
        with @p optree as input and assuming @p result_precision
        as output precision """
    if result_precision.is_vector_format():
        scalar_format = optree.precision.get_scalar_format()
        vector_size = optree.precision.get_vector_size()
        # determine the working format (for expression)
        work_format = VECTOR_TYPE_MAP[result_precision.get_scalar_format().
                                      get_integer_format()][vector_size]
        bias_cst = [-result_precision.get_scalar_format().get_bias()
                    ] * vector_size
        shift_cst = [result_precision.get_scalar_format().get_field_size()
                     ] * vector_size
    else:
        scalar_format = optree.precision
        work_format = result_precision.get_integer_format()
        bias_cst = -result_precision.get_bias()
        shift_cst = result_precision.get_field_size()
    if not is_std_integer_format(scalar_format):
        Log.report(
            Log.Error,
            "{} should be a std integer format in generate_exp_insertion {} with precision {}",
            scalar_format, optree, result_precision)
    assert is_std_integer_format(scalar_format)
    biased_exponent = Addition(Conversion(optree, precision=work_format) if
                               not optree.precision is work_format else optree,
                               Constant(bias_cst, precision=work_format),
                               precision=work_format)
    result = BitLogicLeftShift(biased_exponent,
                               Constant(
                                   shift_cst,
                                   precision=work_format,
                               ),
                               precision=work_format)
    return TypeCast(result, precision=result_precision)
Ejemplo n.º 22
0
    def generate_scalar_scheme(self, vx, n):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        n.set_attributes(tag="n")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # rootn(x, n) = x^(1/n)
        #             = exp(1/n * log(x))
        #             = 2^(1/n * log2(x))
        #             = 2^(1/n * (log2(m) + e))
        #

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        use_reciprocal = False

        # non-scaled vx used to compute vx^1
        unmodified_vx = vx

        is_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="is_subnormal")
        exp_correction_factor = self.precision.get_mantissa_size()
        mantissa_factor = Constant(2**exp_correction_factor,
                                   tag="mantissa_factor")
        vx = Select(is_subnormal, vx * mantissa_factor, vx, tag="corrected_vx")

        m = MantissaExtraction(vx, tag="m", precision=self.precision)
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        e = Select(is_subnormal,
                   e - exp_correction_factor,
                   e,
                   tag="corrected_e")

        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision,
                                                     basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(
            log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(
            Abs(m, precision=self.precision), log_f, inv_approx_table,
            log_table)
        # floating-point version of n
        n_f = Conversion(n, precision=self.precision, tag="n_f")
        inv_n = Division(Constant(1, precision=self.precision), n_f)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision),
                            log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        if use_reciprocal:
            r = Multiplication(log_approx, inv_n, tag="r", debug=debug_multi)
        else:
            r = Division(log_approx, n_f, tag="r", debug=debug_multi)

        # e_n ~ e / n
        e_f = Conversion(e, precision=self.precision, tag="e_f")
        if use_reciprocal:
            e_n = Multiplication(e_f, inv_n, tag="e_n")
        else:
            e_n = Division(e_f, n_f, tag="e_n")
        error_e_n = FMA(e_n, -n_f, e_f, tag="error_e_n")
        e_n_int = NearestInteger(e_n, precision=self.precision, tag="e_n_int")
        pre_e_n_frac = e_n - e_n_int
        pre_e_n_frac.set_attributes(tag="pre_e_n_frac")
        e_n_frac = pre_e_n_frac + error_e_n * inv_n
        e_n_frac.set_attributes(tag="e_n_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)
        exp2_r = ml_exp2.generate_scalar_scheme(r, inline_select=True)
        exp2_r.set_attributes(tag="exp2_r", debug=debug_multi)

        exp2_e_n_frac = ml_exp2.generate_scalar_scheme(e_n_frac,
                                                       inline_select=True)
        exp2_e_n_frac.set_attributes(tag="exp2_e_n_frac", debug=debug_multi)

        exp2_e_n_int = ExponentInsertion(Conversion(e_n_int,
                                                    precision=int_precision),
                                         precision=self.precision,
                                         tag="exp2_e_n_int")

        n_is_even = Equal(Modulo(n, 2), 0, tag="n_is_even", debug=debug_multi)
        n_is_odd = LogicalNot(n_is_even, tag="n_is_odd")
        result_sign = Select(
            n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        # managing n == -1
        if self.expand_div:
            ml_division_args = ML_Division.get_default_args(
                precision=self.precision, input_formats=[self.precision] * 2)
            ml_division = ML_Division(ml_division_args)
            self.division_implementation = ml_division.implementation
            self.division_implementation.set_scheme(
                ml_division.generate_scheme())
            ml_division_fct = self.division_implementation.get_function_object(
            )
        else:
            ml_division_fct = Division

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = ConditionBlock(
            LogicalOr(LogicalOr(Test(vx, specifier=Test.IsNaN), Equal(n, 0)),
                      LogicalAnd(n_is_even, vx < 0)),
            Return(FP_QNaN(self.precision)),
            Statement(
                ConditionBlock(
                    Equal(n, -1, tag="n_is_mone"),
                    #Return(Division(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)),
                    Return(
                        ml_division_fct(Constant(1, precision=self.precision),
                                        unmodified_vx,
                                        tag="div_res",
                                        precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±inf, n) is +∞ for even n< 0.
                    Test(vx, specifier=Test.IsInfty),
                    Statement(
                        ConditionBlock(
                            n < 0,
                            #LogicalAnd(n_is_odd, n < 0),
                            Return(
                                Select(Test(vx,
                                            specifier=Test.IsPositiveInfty),
                                       Constant(FP_PlusZero(self.precision),
                                                precision=self.precision),
                                       Constant(FP_MinusZero(self.precision),
                                                precision=self.precision),
                                       precision=self.precision)),
                            Return(vx),
                        ), ),
                ),
                ConditionBlock(
                    # rootn(±0, n) is ±∞ for odd n < 0.
                    LogicalAnd(LogicalAnd(n_is_odd, n < 0),
                               Equal(vx, 0),
                               tag="n_is_odd_and_neg"),
                    Return(
                        Select(Test(vx, specifier=Test.IsPositiveZero),
                               Constant(FP_PlusInfty(self.precision),
                                        precision=self.precision),
                               Constant(FP_MinusInfty(self.precision),
                                        precision=self.precision),
                               precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±0, n) is +∞ for even n< 0.
                    LogicalAnd(LogicalAnd(n_is_even, n < 0), Equal(vx, 0)),
                    Return(FP_PlusInfty(self.precision))),
                ConditionBlock(
                    # rootn(±0, n) is +0 for even n > 0.
                    LogicalAnd(n_is_even, Equal(vx, 0)),
                    Return(vx)),
                ConditionBlock(
                    Equal(n, 1), Return(unmodified_vx),
                    Return(result_sign * exp2_r * exp2_e_n_int *
                           exp2_e_n_frac))))
        return result
Ejemplo n.º 23
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=S2**-24,
                            odd=False,
                            even=False):
    """ Generate a piecewise approximation

        :param function: function to be approximated
        :type function: SollyaObject
        :param variable: input variable
        :type variable: Variable
        :param precision: variable's format
        :type precision: ML_Format
        :param bound_low: lower bound for the approximation interval
        :param bound_high: upper bound for the approximation interval
        :param num_intervals: number of sub-interval / sub-division of the main interval
        :param max_degree: maximum degree for an approximation on any sub-interval
        :param error_threshold: error bound for an approximation on any sub-interval

        :return: pair (scheme, error) where scheme is a graph node for an
            approximation scheme of function evaluated at variable, and error
            is the maximum approximation error encountered
        :rtype tuple(ML_Operation, SollyaObject): """

    degree_generator = piecewise_approximation_degree_generator(
        function,
        bound_low,
        bound_high,
        num_intervals=num_intervals,
        error_threshold=error_threshold,
    )
    degree_list = list(degree_generator)

    # if max_degree is None then we determine it locally
    if max_degree is None:
        max_degree = max(degree_list)
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(
        dimensions=[num_intervals, max_degree + 1],
        storage_precision=precision,
        tag="coeff_table",
        const=True  # by default all approximation coeff table are const
    )

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = degree_list[i]
        if local_degree > max_degree:
            Log.report(
                Log.Warning,
                "local_degree {} exceeds max_degree bound ({}) in piecewise_approximation",
                local_degree, max_degree)
        # as max_degree defines the size of the table we can use
        # it as the degree for each sub-interval polynomial
        # as there is nothing to gain (yet) by using a smaller polynomial
        degree = max_degree  # min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            local_poly_degree_list = list(
                range(1 if even else 0, degree + 1, 2 if odd or even else 1))
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x) / sollya.x,
                local_poly_degree_list,
                [precision] * len(local_poly_degree_list),
                Interval(-subint_high * 0.95, subint_high),
                sollya.absolute,
                error_function=error_function)
            # multiply by sollya.x
            poly_object = poly_object.sub_poly(offset=-1)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                # try to see if function is constant on the interval (possible
                # failure cause for fpminmax)
                cst_value = precision.round_sollya_object(
                    function(subint_low), sollya.RN)
                accuracy = error_threshold
                diff_with_cst_range = sollya.supnorm(cst_value, local_function,
                                                     local_interval,
                                                     sollya.absolute, accuracy)
                diff_with_cst = sup(abs(diff_with_cst_range))
                if diff_with_cst < error_threshold:
                    Log.report(Log.Info, "constant polynomial detected")
                    poly_object = Polynomial([function(subint_low)] +
                                             [0] * degree)
                    approx_error = diff_with_cst
                else:
                    Log.report(
                        Log.error,
                        "degree: {} for index {}, diff_with_cst={} (vs error_threshold={}) ",
                        degree,
                        i,
                        diff_with_cst,
                        error_threshold,
                        error=err)
        for ci in range(max_degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        if approx_error > error_threshold:
            Log.report(
                Log.Warning,
                "piecewise_approximation on index {} exceeds error threshold: {} > {}",
                i, approx_error, error_threshold)
        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       debug=debug_multi,
                       precision=precision)
    int_prec = precision.get_integer_format()

    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(
                        Multiplication(diff, delta_ratio, precision=precision),
                        precision=int_prec,
                    ), num_intervals - 1),
                tag="index",
                debug=debug_multi,
                precision=int_prec)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=debug_multi)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(max_degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error
Ejemplo n.º 24
0
    def check_processor_support(self,
                                optree,
                                memoization_map=None,
                                debug=False,
                                language=C_Code):
        """ check if all precision-instantiated operation are supported by the processor """
        memoization_map = memoization_map if not memoization_map is None else {}
        if debug:
            print("checking processor support: ",
                  self.processor.__class__)  # Debug print
        if optree in memoization_map:
            return True
        if not isinstance(optree, ML_LeafNode):
            for inp in optree.inputs:
                self.check_processor_support(inp,
                                             memoization_map,
                                             debug=debug,
                                             language=language)

            if isinstance(optree, ConditionBlock):
                self.check_processor_support(optree.get_pre_statement(),
                                             memoization_map,
                                             debug=debug,
                                             language=language)
                pass
            elif isinstance(optree, Statement):
                pass
            elif isinstance(optree, Loop):
                pass
            elif isinstance(optree, Return):
                pass
            elif isinstance(optree, ReferenceAssign):
                pass
            elif isinstance(optree, PlaceHolder):
                pass
            elif isinstance(optree, SwitchBlock):
                #self.check_processor_support(optree.get_pre_statement(), memoization_map)

                for op in optree.get_extra_inputs():
                    # TODO: assert case is integer constant
                    self.check_processor_support(op,
                                                 memoization_map,
                                                 debug=debug,
                                                 language=language)
            elif not self.processor.is_supported_operation(
                    optree, debug=debug, language=language):
                # trying operand format escalation
                init_optree = optree
                old_list = optree.inputs
                while False:  #optree.__class__ in type_escalation:
                    match_found = False
                    for result_type_cond in type_escalation[optree.__class__]:
                        if result_type_cond(optree.get_precision()):
                            for op_index in range(len(optree.inputs)):
                                op = optree.inputs[op_index]
                                for op_type_cond in type_escalation[
                                        optree.__class__][result_type_cond]:
                                    if op_type_cond(op.get_precision()):
                                        new_type = type_escalation[
                                            optree.
                                            __class__][result_type_cond][
                                                op_type_cond](optree)
                                        if op.get_precision() != new_type:
                                            # conversion insertion
                                            input_list = list(optree.inputs)
                                            input_list[op_index] = Conversion(
                                                op, precision=new_type)
                                            optree.inputs = tuple(input_list)
                                            match_found = True
                                            break
                            break
                    if not match_found:
                        break
                # checking final processor support
                if not self.processor.is_supported_operation(optree):
                    # look for possible simplification
                    if self.has_support_simplification(optree):
                        simplified_tree = self.get_support_simplification(
                            optree)
                        Log.report(
                            Log.Verbose, "simplifying %s" %
                            optree.get_str(depth=2, display_precision=True))
                        Log.report(
                            Log.Verbose, "into %s" % simplified_tree.get_str(
                                depth=2, display_precision=True))
                        optree.change_to(simplified_tree)
                        if self.processor.is_supported_operation(optree):
                            memoization_map[optree] = True
                            return True

                    print(optree)  # Error print
                    print("pre escalation: ", old_list)  # Error print
                    print(self.processor.get_operation_keys(
                        optree))  # Error print
                    print(
                        optree.get_str(display_precision=True,
                                       display_id=True,
                                       memoization_map={}))  # Error print
                    Log.report(Log.Error, "unsupported operation\n")
        # memoization
        memoization_map[optree] = True
        return True
Ejemplo n.º 25
0
        def recursive_support_check(optree):
            if optree in memoization_map:
                return True

            elif not isinstance(optree, ML_LeafNode):
                # memoization
                memoization_map[optree] = True
                for inp in optree.inputs:
                    recursive_support_check(inp)

                if isinstance(optree, ConditionBlock):
                    pass
                elif isinstance(optree, Statement):
                    pass
                elif isinstance(optree, ConditionalBranch):
                    pass
                elif isinstance(optree, UnconditionalBranch):
                    pass
                elif isinstance(optree, BasicBlock):
                    pass
                elif isinstance(optree, PhiNode):
                    pass
                elif isinstance(optree, Loop):
                    pass
                elif isinstance(optree, Return):
                    pass
                elif isinstance(optree, ReferenceAssign):
                    pass
                elif isinstance(optree, PlaceHolder):
                    pass
                elif isinstance(optree, SwitchBlock):

                    for op in optree.get_extra_inputs():
                        # TODO: assert case is integer constant
                        recursive_support_check(op)
                elif not processor.is_supported_operation(
                        optree, debug=debug, language=language):
                    # trying operand format escalation
                    init_optree = optree
                    old_list = optree.inputs
                    while False:  #optree.__class__ in type_escalation:
                        match_found = False
                        for result_type_cond in type_escalation[
                                optree.__class__]:
                            if result_type_cond(optree.get_precision()):
                                for op_index in range(len(optree.inputs)):
                                    op = optree.inputs[op_index]
                                    for op_type_cond in type_escalation[
                                            optree.
                                            __class__][result_type_cond]:
                                        if op_type_cond(op.get_precision()):
                                            new_type = type_escalation[
                                                optree.
                                                __class__][result_type_cond][
                                                    op_type_cond](optree)
                                            if op.get_precision() != new_type:
                                                # conversion insertion
                                                input_list = list(
                                                    optree.inputs)
                                                input_list[
                                                    op_index] = Conversion(
                                                        op, precision=new_type)
                                                optree.inputs = tuple(
                                                    input_list)
                                                match_found = True
                                                break
                                break
                        if not match_found:
                            break
                    # checking final processor support
                    if not processor.is_supported_operation(optree,
                                                            language=language):
                        # look for possible simplification
                        if has_support_simplification(optree):
                            simplified_tree = get_support_simplification(
                                optree, processor)
                            Log.report(
                                Log.Verbose, "simplifying %s" % optree.get_str(
                                    depth=2, display_precision=True))
                            Log.report(
                                Log.Verbose,
                                "into %s" % simplified_tree.get_str(
                                    depth=2, display_precision=True))
                            optree.change_to(simplified_tree)
                            if processor.is_supported_operation(
                                    optree, language=language):
                                memoization_map[optree] = True
                                return True
                        print("pre escalation node is: ",
                              old_list)  # Error print
                        print("languages is {}".format(language))
                        print("Operation' keys are: {}".format(
                            processor.get_operation_keys(
                                optree)))  # Error print
                        print("Operation tree is: \n",
                              optree.get_str(
                                  display_precision=True,
                                  depth=1,
                                  display_id=True,
                                  memoization_map=None))  # Error print
                        Log.report(
                            Log.Error,
                            "unsupported operation in PassCheckProcessorSupport's check_processor_support {}:\n{}",
                            processor, optree)
            else:
                # memoization
                memoization_map[optree] = True
            return True
Ejemplo n.º 26
0
    def generate_scheme(self):
        # declaring function input variable
        v_x = [
            self.implementation.add_input_variable(
                "x%d" % index, self.get_input_precision(index))
            for index in range(self.arity)
        ]

        double_format = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble
        }[self.precision]

        # testing Add211
        exact_add = Addition(v_x[0],
                             v_x[1],
                             precision=double_format,
                             tag="exact_add")
        # testing Mul211
        exact_mul = Multiplication(v_x[0],
                                   v_x[1],
                                   precision=double_format,
                                   tag="exact_mul")
        # testing Sub211
        exact_sub = Subtraction(v_x[1],
                                v_x[0],
                                precision=double_format,
                                tag="exact_sub")
        # testing Add222
        multi_add = Addition(exact_add,
                             exact_sub,
                             precision=double_format,
                             tag="multi_add")
        # testing Mul222
        multi_mul = Multiplication(multi_add,
                                   exact_mul,
                                   precision=double_format,
                                   tag="multi_mul")
        # testing Add221 and Add212 and Sub222
        multi_sub = Subtraction(Addition(exact_sub,
                                         v_x[1],
                                         precision=double_format,
                                         tag="add221"),
                                Addition(v_x[0],
                                         multi_mul,
                                         precision=double_format,
                                         tag="add212"),
                                precision=double_format,
                                tag="sub222")
        # testing Mul212 and Mul221
        mul212 = Multiplication(multi_sub,
                                v_x[0],
                                precision=double_format,
                                tag="mul212")
        mul221 = Multiplication(exact_mul,
                                v_x[1],
                                precision=double_format,
                                tag="mul221")
        # testing Sub221 and Sub212
        sub221 = Subtraction(mul212,
                             mul221.hi,
                             precision=double_format,
                             tag="sub221")
        sub212 = Subtraction(sub221,
                             mul212.lo,
                             precision=double_format,
                             tag="sub212")
        # testing FMA2111
        fma2111 = FMA(sub221.lo,
                      sub212.hi,
                      mul221.hi,
                      precision=double_format,
                      tag="fma2111")
        # testing FMA2112
        fma2112 = FMA(fma2111.lo,
                      fma2111.hi,
                      fma2111,
                      precision=double_format,
                      tag="fma2112")
        # testing FMA2212
        fma2212 = FMA(fma2112,
                      fma2112.hi,
                      fma2112,
                      precision=double_format,
                      tag="fma2212")
        # testing FMA2122
        fma2122 = FMA(fma2212.lo,
                      fma2212,
                      fma2212,
                      precision=double_format,
                      tag="fma2122")
        # testing FMA22222
        fma2222 = FMA(fma2122,
                      fma2212,
                      fma2111,
                      precision=double_format,
                      tag="fma2222")
        # testing Add122
        add122 = Addition(fma2222,
                          fma2222,
                          precision=self.precision,
                          tag="add122")
        # testing Add112
        add112 = Addition(add122,
                          fma2222,
                          precision=self.precision,
                          tag="add112")
        # testing Add121
        add121 = Addition(fma2222,
                          add112,
                          precision=self.precision,
                          tag="add121")
        # testing subnormalization
        multi_subnormalize = SpecificOperation(
            Addition(add121, add112, precision=double_format),
            Constant(3, precision=self.precision.get_integer_format()),
            specifier=SpecificOperation.Subnormalize,
            precision=double_format,
            tag="multi_subnormalize")
        result = Conversion(multi_subnormalize, precision=self.precision)

        scheme = Statement(Return(result))

        return scheme
Ejemplo n.º 27
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_intervals[0]))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_NewTable(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in range(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.get_output_precision()
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(sollya.x), [0, 2],
            [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.get_output_precision())

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme
Ejemplo n.º 28
0
def insert_conversion_when_required(op_input, final_precision):
    if op_input.get_precision() != final_precision:
        return Conversion(op_input, precision=final_precision)
    else:
        return op_input
Ejemplo n.º 29
0
    def generate_scalar_scheme(self, vx, vy):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        vy.set_attributes(tag="y")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # pow(x, n) = x^(y)
        #             = exp(y * log(x))
        #             = 2^(y * log2(x))
        #             = 2^(y * (log2(m) + e))
        #
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        m = MantissaExtraction(vx, tag="m", precision=self.precision)

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed, language=None,
            table_getter= lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x) # /sollya.log(self.basis)



        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(Abs(m, precision=self.precision), log_f, inv_approx_table, log_table)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        r = Multiplication(log_approx, vy, tag="r", debug=debug_multi)


        # 2^(y * (log2(m) + e)) = 2^(y * log2(m)) * 2^(y * e)
        #
        # log_approx = log2(Abs(m))
        # r = y * log_approx ~ y * log2(m)
        #
        # NOTES: manage cases where e is negative and
        # (y * log2(m)) AND (y * e) could cancel out
        # if e positive, whichever the sign of y (y * log2(m)) and (y * e) CANNOT
        # be of opposite signs

        # log2(m) in [0, 1[ so cancellation can occur only if e == -1
        # we split 2^x in 2^x = 2^t0 * 2^t1
        # if e < 0: t0 = y * (log2(m) + e), t1=0
        # else:     t0 = y * log2(m), t1 = y * e

        t_cond = e < 0

        # e_y ~ e * y
        e_f = Conversion(e, precision=self.precision)
        #t0 = Select(t_cond, (e_f + log_approx) * vy, Multiplication(e_f, vy), tag="t0")
        #NearestInteger(t0, precision=self.precision, tag="t0_int")

        EY = NearestInteger(e_f * vy, tag="EY", precision=self.precision)
        LY = NearestInteger(log_approx * vy, tag="LY", precision=self.precision)
        t0_int = Select(t_cond, EY + LY, EY, tag="t0_int")
        t0_frac = Select(t_cond, FMA(e_f, vy, -EY) + FMA(log_approx, vy, -LY) ,EY - t0_int, tag="t0_frac")
        #t0_frac.set_attributes(tag="t0_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)

        exp2_t0_frac = ml_exp2.generate_scalar_scheme(t0_frac, inline_select=True)
        exp2_t0_frac.set_attributes(tag="exp2_t0_frac", debug=debug_multi)

        exp2_t0_int = ExponentInsertion(Conversion(t0_int, precision=int_precision), precision=self.precision, tag="exp2_t0_int")

        t1 = Select(t_cond, Constant(0, precision=self.precision), r)
        exp2_t1 = ml_exp2.generate_scalar_scheme(t1, inline_select=True)
        exp2_t1.set_attributes(tag="exp2_t1", debug=debug_multi)

        result_sign = Constant(1.0, precision=self.precision) # Select(n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        y_int = NearestInteger(vy, precision=self.precision)
        y_is_integer = Equal(y_int, vy)
        y_is_even = LogicalOr(
            # if y is a number (exc. inf) greater than 2**mantissa_size * 2,
            # then it is an integer multiple of 2 => even
            Abs(vy) >= 2**(self.precision.get_mantissa_size()+1),
            LogicalAnd(
                y_is_integer and Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                # we want to limit the modulo computation to an integer input
                Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 0)
            )
        )
        y_is_odd = LogicalAnd(
            LogicalAnd(
                Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                y_is_integer
            ),
            Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 1)
        )


        # special cases management
        special_case_results = Statement(
            # x is sNaN OR y is sNaN
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsSignalingNaN), Test(vy, specifier=Test.IsSignalingNaN)),
                Return(FP_QNaN(self.precision))
            ),
            # pow(x, ±0) is 1 if x is not a signaling NaN
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Return(Constant(1.0, precision=self.precision))
            ),
            # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy < 0)),
                Return(Select(Test(vx, specifier=Test.IsPositiveZero), FP_PlusInfty(self.precision), FP_MinusInfty(self.precision))),
            ),
            # pow(±0, −∞) is +∞ with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(±0, +∞) is +0 with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is ±0 for finite y>0 an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy > 0)),
                Return(vx),
            ),
            # pow(−1, ±∞) is 1 with no exception
            ConditionBlock(
                LogicalAnd(Equal(vx, -1), Test(vy, specifier=Test.IsInfty)),
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(+1, y) is 1 for any y (even a quiet NaN)
            ConditionBlock(
                vx == 1,
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(x, +∞) is +0 for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +∞ for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +0 for a number y < 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy < 0),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +∞ for a number y > 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy > 0),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(−∞, y) is −0 for finite y < 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy < 0)),
                Return(FP_MinusZero(self.precision)),
            ),
            # pow(−∞, y) is −∞ for finite y > 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy > 0)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(−∞, y) is +0 for finite y < 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusZero(self.precision)),
            ),
            # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer
            # TODO: signal divideByZero exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +0 for finite y>0 and not an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusZero(self.precision)),
            ),
        )

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = Statement(
            special_case_results,
            # fallback default cases
            Return(result_sign * exp2_t1 * exp2_t0_int * exp2_t0_frac))
        return result
Ejemplo n.º 30
0
    def generate_bench(self, processor, test_num=1000, unroll_factor=10):
        """ generate performance bench for self.op_class """
        initial_inputs = [
            Constant(random.uniform(inf(self.init_interval),
                                    sup(self.init_interval)),
                     precision=precision)
            for i, precision in enumerate(self.input_precisions)
        ]

        var_inputs = [
            Variable("var_%d" % i,
                     precision=FormatAttributeWrapper(precision, ["volatile"]),
                     var_type=Variable.Local)
            for i, precision in enumerate(self.input_precisions)
        ]

        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0: "\"%s[%s] %%lld elts computed "\
                   "in %%lld cycles =>\\n     %%.3f CPE \\n\"" %
                (
                    self.bench_name,
                    self.output_precision.get_display_format()
                ),
                1: FO_Arg(0),
                2: FO_Arg(1),
                3: FO_Arg(2),
                4: FO_Arg(3)
            }, void_function=True
        )
        printf_timing_function = FunctionObject(
            "printf", [self.output_precision, ML_Int64, ML_Int64, ML_Binary64],
            ML_Void, printf_timing_op)
        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)

        void_function_op = FunctionOperator("(void)",
                                            arity=1,
                                            void_function=True)
        void_function = FunctionObject("(void)", [self.output_precision],
                                       ML_Void, void_function_op)

        # initialization of operation inputs
        init_assign = metaop.Statement()
        for var_input, init_value in zip(var_inputs, initial_inputs):
            init_assign.push(ReferenceAssign(var_input, init_value))

        # test loop
        loop_i = Variable("i", precision=ML_Int64, var_type=Variable.Local)
        test_num_cst = Constant(test_num / unroll_factor,
                                precision=ML_Int64,
                                tag="test_num")

        # Goal build a chain of dependant operation to measure
        # elementary operation latency
        local_inputs = tuple(var_inputs)
        local_result = self.op_class(*local_inputs,
                                     precision=self.output_precision,
                                     unbreakable=True)
        for i in range(unroll_factor - 1):
            local_inputs = tuple([local_result] + var_inputs[1:])
            local_result = self.op_class(*local_inputs,
                                         precision=self.output_precision,
                                         unbreakable=True)
        # renormalisation
        local_result = self.renorm_function(local_result)

        # variable assignation to build dependency chain
        var_assign = Statement()
        var_assign.push(ReferenceAssign(var_inputs[0], local_result))
        final_value = var_inputs[0]

        # loop increment value
        loop_increment = 1

        test_loop = Loop(
            ReferenceAssign(loop_i, Constant(0, precision=ML_Int32)),
            loop_i < test_num_cst,
            Statement(var_assign,
                      ReferenceAssign(loop_i, loop_i + loop_increment)),
        )

        # bench scheme
        test_scheme = Statement(
            ReferenceAssign(timer, processor.get_current_timestamp()),
            init_assign,
            test_loop,
            ReferenceAssign(
                timer,
                Subtraction(processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            # prevent intermediary variable simplification
            void_function(final_value),
            printf_timing_function(
                final_value, Constant(test_num, precision=ML_Int64), timer,
                Division(Conversion(timer, precision=ML_Binary64),
                         Constant(test_num, precision=ML_Binary64),
                         precision=ML_Binary64))
            # ,Return(Constant(0, precision = ML_Int32))
        )

        return test_scheme