Ejemplo n.º 1
0
  def generate_embedded_testbench(self, tc_list, io_map, input_signals, output_signals, time_step, test_fname="test.input"):
    """ Generate testbench with embedded input and output data """
    self_component = self.implementation.get_component_object()
    self_instance = self_component(io_map = io_map, tag = "tested_entity")
    test_statement = Statement()

    for index, (input_values, output_values) in enumerate(tc_list):
      test_statement.add(
          self.implement_test_case(io_map, input_values, output_signals, output_values, time_step, index=index)
      )

    reset_statement = self.get_reset_statement(io_map, time_step)

    testbench = CodeEntity("testbench")
    test_process = Process(
      reset_statement,
      test_statement,
      # end of test
      Assert(
        Constant(0, precision = ML_Bool),
        " \"end of test, no error encountered \"",
        severity = Assert.Warning
      ),
      # infinite end loop
        WhileLoop(
            Constant(1, precision=ML_Bool),
            Statement(
                Wait(time_step * (self.stage_num + 2)),
            )
        )
    )

    testbench_scheme = Statement(
      self_instance,
      test_process
    )

    if self.pipelined:
        half_time_step = time_step / 2
        assert (half_time_step * 2) == time_step
        # adding clock process for pipelined bench
        clk_process = Process(
            Statement(
                ReferenceAssign(
                    io_map["clk"],
                    Constant(1, precision = ML_StdLogic)
                ),
                Wait(half_time_step),
                ReferenceAssign(
                    io_map["clk"],
                    Constant(0, precision = ML_StdLogic)
                ),
                Wait(half_time_step),
            )
        )
        testbench_scheme.push(clk_process)

    testbench.add_process(testbench_scheme)

    return [testbench]
Ejemplo n.º 2
0
    def test_ref_assign(self):
        """ test behavior of StaticVectorizer on predicated ReferenceAssign """
        va = Variable("a")
        vb = Variable("b")
        vc = Variable("c")
        scheme = Statement(
            ReferenceAssign(va, Constant(3)),
            ConditionBlock(
                (va > vb).modify_attributes(likely=True),
                Statement(ReferenceAssign(vb, va),
                          ReferenceAssign(va, Constant(11)), Return(va)),
            ), ReferenceAssign(va, Constant(7)), Return(vb))
        vectorized_path = StaticVectorizer().extract_vectorizable_path(
            scheme, fallback_policy)

        linearized_most_likely_path = instanciate_variable(
            vectorized_path.linearized_optree,
            vectorized_path.variable_mapping)
        test_result = (isinstance(linearized_most_likely_path, Constant)
                       and linearized_most_likely_path.get_value() == 11)
        if not test_result:
            print("test UT_StaticVectorizer failure")
            print("scheme: {}".format(scheme.get_str()))
            print("linearized_most_likely_path: {}".format(
                linearized_most_likely_path))
        self.assertTrue(test_result)
 def expand_sub_ndrange(var_range_list, kernel):
     if len(var_range_list) == 0:
         pre_expanded_kernel = expand_kernel_expr(kernel)
         expanded_kernel, statement_list = extract_placeholder(
             pre_expanded_kernel)
         expanded_statement = Statement(*tuple(statement_list))
         print("expand_ndrange: ", expanded_kernel, statement_list)
         if not expanded_kernel is None:
             # append expanded_kernel at the Statement's end once
             # every PlaceHolder's dependency has been resolved
             expanded_statement.add(expanded_kernel)
         return expanded_statement
     else:
         var_range = var_range_list.pop(0)
         scheme = Loop(
             # init statement
             ReferenceAssign(var_range.var_index, var_range.first_index),
             # exit condition
             var_range.var_index <= var_range.last_index,
             # loop body
             Statement(
                 expand_sub_ndrange(var_range_list, kernel),
                 # loop iterator increment
                 ReferenceAssign(var_range.var_index, var_range.var_index +
                                 var_range.index_step)),
         )
     return scheme
    def generate_tensor_check_loop(self, tensor_descriptors, input_tables,
                                   output_tables):
        # unpack tensor descriptors tuple
        (input_tensor_descriptor_list,
         output_tensor_descriptor_list) = tensor_descriptors
        # internal array iterator index
        vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local)

        printf_error_detail_function = self.get_printf_error_detail_fct(
            output_tensor_descriptor_list[0])

        NUM_INPUT_ARRAY = len(input_tables)

        # generate the expected table for the whole multi-array
        expected_tables = self.generate_expected_table(tensor_descriptors,
                                                       input_tables)

        # global statement to list all checks
        check_statement = Statement()

        # implement check for each output tensor
        for out_id, out_td in enumerate(output_tensor_descriptor_list):
            # expected values for the (vj)-th entry of the sub-array
            expected_values = [
                TableLoad(expected_tables[out_id], vj, i)
                for i in range(self.accuracy.get_num_output_value())
            ]
            # local result for the (vj)-th entry of the sub-array
            local_result = TableLoad(output_tables[out_id], vj)

            array_len = out_td.get_bounding_size()

            if self.break_error:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj, ) + (local_result, ))),
                    self.accuracy.get_output_print_call(
                        self.function_name, output_values))
            else:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj, ) + (local_result, ))),
                    self.accuracy.get_output_print_call(
                        self.function_name, expected_values),
                    Return(Constant(1, precision=ML_Int32)))
            check_array_loop = Loop(
                ReferenceAssign(vj, 0), vj < array_len,
                Statement(
                    ConditionBlock(
                        self.accuracy.get_output_check_test(
                            local_result, expected_values),
                        return_statement_break),
                    ReferenceAssign(vj, vj + 1),
                ))
            check_statement.add(check_array_loop)
        return check_statement
Ejemplo n.º 5
0
 def get_reset_statement(self, io_map, time_step):
   reset_statement = Statement()
   if self.reset_pipeline:
       # TODO: fix pipeline register reset
       reset_value = 0 if self.negate_reset else 1
       unreset_value = 1 - reset_value
       reset_signal = io_map[self.reset_name]
       reset_statement.add(ReferenceAssign(reset_signal, Constant(reset_value, precision=ML_StdLogic)))
       # to account for synchronous reset
       reset_statement.add(Wait(time_step * 3))
       reset_statement.add(ReferenceAssign(reset_signal, Constant(unreset_value, precision=ML_StdLogic)))
       reset_statement.add(Wait(time_step * 3))
       for recirculate_signal in self.recirculate_signal_map.values():
           reset_statement.add(ReferenceAssign(io_map[recirculate_signal.get_tag()], Constant(0, precision=ML_StdLogic)))
   return reset_statement
Ejemplo n.º 6
0
def get_input_assign(input_signal, input_value):
    """ Get input assignation statement """
    input_assign = ReferenceAssign(
        input_signal,
        Constant(input_value, precision=input_signal.get_precision())
    )
    return input_assign
Ejemplo n.º 7
0
 def add_stage_forward(self, op_dst, op_src, stage):
     Log.report(
         Log.Verbose,
         " adding stage forward {op_src} to {op_dst} @ stage {stage}".
         format(op_src=op_src, op_dst=op_dst, stage=stage))
     if not stage in self.stage_forward:
         self.stage_forward[stage] = []
     self.stage_forward[stage].append(ReferenceAssign(op_dst, op_src))
     self.pre_statement.add(op_src)
def expand_kernel_expr(kernel, iterator_format=ML_Int32):
    """ Expand a kernel expression into the corresponding MDL graph """
    if isinstance(kernel, NDRange):
        return expand_ndrange(kernel)
    elif isinstance(kernel, Sum):
        var_iter = kernel.index_iter_range.var_index
        # TODO/FIXME to be uniquified
        acc = Variable("acc",
                       var_type=Variable.Local,
                       precision=kernel.precision)
        # TODO/FIXME implement proper acc init
        if kernel.precision.is_vector_format():
            C0 = Constant([0] * kernel.precision.get_vector_size(),
                          precision=kernel.precision)
        else:
            C0 = Constant(0, precision=kernel.precision)
        scheme = Loop(
            Statement(
                ReferenceAssign(var_iter, kernel.index_iter_range.first_index),
                ReferenceAssign(acc, C0)),
            var_iter <= kernel.index_iter_range.last_index,
            Statement(
                ReferenceAssign(
                    acc,
                    Addition(acc,
                             expand_kernel_expr(kernel.elt_operation),
                             precision=kernel.precision)),
                # loop iterator increment
                ReferenceAssign(var_iter, var_iter +
                                kernel.index_iter_range.index_step)))
        return PlaceHolder(acc, scheme)
    elif isinstance(kernel, (ReadAccessor, WriteAccessor)):
        return expand_accessor(kernel)
    elif is_leaf_node(kernel):
        return kernel
    else:
        # vanilla metalibm ops are left unmodified (except
        # recursive expansion)
        for index, op in enumerate(kernel.inputs):
            new_op = expand_kernel_expr(op)
            kernel.set_input(index, new_op)
        return kernel
Ejemplo n.º 9
0
    def generate_scalar_scheme(self, vx, vy):
        div = Division(vx, vy, precision=self.precision)
        div_if = Trunc(div, precision=self.precision)
        rem = Variable("rem",
                       var_type=Variable.Local,
                       precision=self.precision)
        qi = Variable("qi", var_type=Variable.Local, precision=self.precision)
        qi_bound = Constant(S2**self.precision.get_mantissa_size())
        init_rem = FusedMultiplyAdd(-div_if, vy, vx)

        # factorizing 1 / vy to save time
        # NOTES: it makes rem / vy approximate
        # shared_rcp = Division(1, vy, precision=self.precision)

        iterative_fmod = Loop(
            Statement(
                ReferenceAssign(rem, init_rem),
                ReferenceAssign(qi, div_if),
            ),
            Abs(qi) > qi_bound,
            Statement(
                ReferenceAssign(
                    qi,
                    #Trunc(shared_rcp * rem, precision=self.precision)
                    Trunc(rem / vy, precision=self.precision)),
                ReferenceAssign(rem, FMA(-qi, vy, rem))))
        scheme = Statement(
            rem,
            # shared_rcp,
            iterative_fmod,
            ConditionBlock(
                # if rem's sign and vx sign mismatch
                (rem * vx < 0.0).modify_attributes(tag="update_cond",
                                                   debug=debug_multi),
                Return(rem + vy),
                Return(rem),
            ))
        return scheme
Ejemplo n.º 10
0
def convert_bit_heap_to_fixed_point(current_bit_heap, signed=False):
    # final propagating sum
    op_index = 0
    op_list = []
    op_statement = Statement()
    while current_bit_heap.max_count() > 0:
        op_size = current_bit_heap.max_index - current_bit_heap.min_index + 1
        op_format = ML_StdLogicVectorFormat(op_size)
        op_reduce = Signal("op_%d" % op_index,
                           precision=op_format,
                           var_type=Variable.Local)

        offset_index = current_bit_heap.min_index

        for index in range(current_bit_heap.min_index,
                           current_bit_heap.max_index + 1):
            out_index = index - offset_index
            bit_list = current_bit_heap.pop_bits(index, 1)
            if len(bit_list) == 0:
                op_statement.push(
                    ReferenceAssign(BitSelection(op_reduce, out_index),
                                    Constant(0, precision=ML_StdLogic)))
            else:
                assert len(bit_list) == 1
                op_statement.push(
                    ReferenceAssign(BitSelection(op_reduce, out_index),
                                    bit_list[0]))

        op_precision = fixed_point(op_size + offset_index,
                                   -offset_index,
                                   signed=signed)
        op_list.append(
            PlaceHolder(TypeCast(op_reduce, precision=op_precision),
                        op_statement))
        op_index += 1
    return op_list, op_statement
Ejemplo n.º 11
0
    def add_stage_forward(self, op_dst, op_src, stage):
        """ Adding a node to forward op_src to op_dst at stage index

            :param op_dst: stage output node (register output)
            :type op_dst: ML_Operation
            :param op_src: stage input node (register entry)
            :type op_src: ML_Operation
            :param stage: destination stage index
            :type stage: int
        """
        Log.report(Log.Verbose, " adding stage forward {op_src} to {op_dst} @ stage {stage}",
            op_src=op_src, op_dst=op_dst, stage=stage)
        if not stage in self.stage_forward:
            self.stage_forward[stage] = []
        self.stage_forward[stage].append(
            ReferenceAssign(op_dst, op_src)
        )
        self.register_count += op_dst.get_precision().get_bit_size()
        self.pre_statement.add(op_src)
Ejemplo n.º 12
0
 def recursive_inline(node):
     if node in memoization_map:
         return memoization_map[node]
     elif node in inputs_var2value:
         input_value = inputs_var2value[node]
         memoization_map[node] = input_value
         return input_value
     elif isinstance(node, Return):
         node_value = recursive_inline(node.get_input(0))
         if not node_value is dst_var:
             new_node = ReferenceAssign(dst_var, node_value)
             memoization_map[node] = new_node
             return new_node
         else:
             return node_value
     elif isinstance(node, ML_LeafNode):
         memoization_map[node] = node
         return node
     else:
         for i, op in enumerate(node.inputs):
             node.set_input(i, recursive_inline(op))
         memoization_map[node] = node
         return node
Ejemplo n.º 13
0
  def generate_datafile_testbench(self, tc_list, io_map, input_signals, output_signals, time_step, test_fname="test.input"):
    """ Generate testbench with input and output data externalized in
        a data file """
    # textio function to read hexadecimal text
    def FCT_HexaRead_gen(input_format):
        legalized_input_format = input_format
        FCT_HexaRead = FunctionObject("hread", [HDL_LINE, legalized_input_format], ML_Void, FunctionOperator("hread", void_function=True, arity=2))
        return FCT_HexaRead
    # textio function to read binary text
    FCT_Read = FunctionObject("read", [HDL_LINE, ML_StdLogic], ML_Void, FunctionOperator("read", void_function=True, arity=2))
    input_line = Variable("input_line", precision=HDL_LINE, var_type=Variable.Local)

    # building ordered list of input and output signal names
    input_signal_list = [sname for sname in input_signals.keys()]
    input_statement = Statement()
    for input_name in input_signal_list:
        input_format = input_signals[input_name].precision
        input_var = Variable(
            "v_" + input_name,
            precision=input_format,
            var_type=Variable.Local)
        if input_format is ML_StdLogic:
            input_statement.add(FCT_Read(input_line, input_var))
        else:
            input_statement.add(FCT_HexaRead_gen(input_format)(input_line, input_var))
        input_statement.add(ReferenceAssign(input_signals[input_name], input_var))

    output_signal_list = [sname for sname in output_signals.keys()]
    output_statement = Statement()
    for output_name in output_signal_list:
        output_format = output_signals[output_name].precision
        output_var = Variable(
            "v_" + output_name,
            precision=output_format,
            var_type=Variable.Local)
        if output_format is ML_StdLogic:
            output_statement.add(FCT_Read(input_line, output_var))
        else:
            output_statement.add(FCT_HexaRead_gen(output_format)(input_line, output_var))

        output_signal = output_signals[output_name]
        #value_msg = get_output_value_msg(output_signal, output_value)
        test_pass_cond, check_statement = get_output_check_statement(output_signal, output_name, output_var)

        input_msg = multi_Concatenation(*tuple(sum([[" %s=" % input_tag, signal_str_conversion(input_signals[input_tag], input_signals[input_tag].precision)] for input_tag in input_signal_list], [])))

        output_statement.add(check_statement)
        assert_statement = Assert(
            test_pass_cond,
            multi_Concatenation(
                "unexpected value for inputs ",
                input_msg,
                " expecting :",
                signal_str_conversion(output_var, output_format),
                " got :",
                signal_str_conversion(output_signal, output_format),
               precision = ML_String
            ),
            severity=Assert.Failure
        )
        output_statement.add(assert_statement)

    self_component = self.implementation.get_component_object()
    self_instance = self_component(io_map = io_map, tag = "tested_entity")
    test_statement = Statement()

    DATA_FILE_NAME = test_fname

    with open(DATA_FILE_NAME, "w") as data_file:
        # dumping column tags
        data_file.write("# " + " ".join(input_signal_list + output_signal_list) + "\n")

        def get_raw_cst_string(cst_format, cst_value):
            size = int((cst_format.get_bit_size() + 3) / 4)
            return ("{:x}").format(cst_format.get_base_format().get_integer_coding(cst_value)).zfill(size)

        for input_values, output_values in tc_list:
            # TODO; generate test data file
            cst_list = []
            for input_name in input_signal_list:
                input_value = input_values[input_name]
                input_format = input_signals[input_name].get_precision()
                cst_list.append(get_raw_cst_string(input_format, input_value))

            for output_name in output_signal_list:
                output_value = output_values[output_name]
                output_format = output_signals[output_name].get_precision()
                cst_list.append(get_raw_cst_string(output_format, output_value))
            # dumping line into file
            data_file.write(" ".join(cst_list) + "\n")

    input_stream = Variable("data_file", precision=HDL_FILE, var_type=Variable.Local)
    file_status = Variable("file_status", precision=HDL_OPEN_FILE_STATUS, var_type=Variable.Local)
    FCT_EndFile = FunctionObject("endfile", [HDL_FILE], ML_Bool, FunctionOperator("endfile", arity=1)) 
    FCT_OpenFile = FunctionObject(
        "FILE_OPEN", [HDL_OPEN_FILE_STATUS, HDL_FILE, ML_String], ML_Void,
        FunctionOperator(
            "FILE_OPEN",
            arg_map={0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2), 3: "READ_MODE"},
            void_function=True))
    FCT_ReadLine =  FunctionObject(
        "readline", [HDL_FILE, HDL_LINE], ML_Void,
        FunctionOperator("readline", void_function=True, arity=2))

    reset_statement = self.get_reset_statement(io_map, time_step)
    OPEN_OK = Constant("OPEN_OK", precision=HDL_OPEN_FILE_STATUS)

    testbench = CodeEntity("testbench")
    test_process = Process(
        reset_statement,
        FCT_OpenFile(file_status, input_stream, DATA_FILE_NAME),
        ConditionBlock(
            Comparison(file_status, OPEN_OK, specifier=Comparison.NotEqual),
          Assert(
            Constant(0, precision=ML_Bool),
            " \"failed to open file {}\"".format(DATA_FILE_NAME),
            severity=Assert.Failure
          )
        ),
        # consume legend line
        FCT_ReadLine(input_stream, input_line),
        WhileLoop(
            LogicalNot(FCT_EndFile(input_stream)),
            Statement(
                FCT_ReadLine(input_stream, input_line),
                input_statement,
                Wait(time_step * (self.stage_num + 2)),
                output_statement,
            ),
        ),
      # end of test
      Assert(
        Constant(0, precision = ML_Bool),
        " \"end of test, no error encountered \"",
        severity = Assert.Warning
      ),
      # infinite end loop
        WhileLoop(
            Constant(1, precision=ML_Bool),
            Statement(
                Wait(time_step * (self.stage_num + 2)),
            )
        )
    )

    testbench_scheme = Statement(
      self_instance,
      test_process
    )

    if self.pipelined:
        half_time_step = time_step / 2
        assert (half_time_step * 2) == time_step
        # adding clock process for pipelined bench
        clk_process = Process(
            Statement(
                ReferenceAssign(
                    io_map["clk"],
                    Constant(1, precision = ML_StdLogic)
                ),
                Wait(half_time_step),
                ReferenceAssign(
                    io_map["clk"],
                    Constant(0, precision = ML_StdLogic)
                ),
                Wait(half_time_step),
            )
        )
        testbench_scheme.push(clk_process)

    testbench.add_process(testbench_scheme)

    return [testbench]
Ejemplo n.º 14
0
def generate_pipeline_stage(entity,
                            reset=False,
                            recirculate=False,
                            one_process_per_stage=True):
    """ Process a entity to generate pipeline stages required """
    retiming_map = {}
    retime_map = RetimeMap()
    output_assign_list = entity.implementation.get_output_assign()
    for output in output_assign_list:
        Log.report(Log.Verbose, "generating pipeline from output {} ", output)
        retime_op(output, retime_map)
    for recirculate_stage in entity.recirculate_signal_map:
        recirculate_ctrl = entity.recirculate_signal_map[recirculate_stage]
        Log.report(Log.Verbose,
                   "generating pipeline from recirculation control signal {}",
                   recirculate_ctrl)
        retime_op(recirculate_ctrl, retime_map)

    process_statement = Statement()

    # adding stage forward process
    clk = entity.get_clk_input()
    clock_statement = Statement()
    # handle towards the first clock Process (in generation order)
    # which must be the one whose pre_statement is filled with
    # signal required to be generated outside the processes
    first_process = False
    for stage_id in sorted(retime_map.stage_forward.keys()):
        stage_statement = Statement(*tuple(
            assign for assign in retime_map.stage_forward[stage_id]))

        if reset:
            reset_statement = Statement()
            for assign in retime_map.stage_forward[stage_id]:
                target = assign.get_input(0)
                reset_value = Constant(0, precision=target.get_precision())
                reset_statement.push(ReferenceAssign(target, reset_value))

            if recirculate:
                # inserting recirculation condition
                recirculate_signal = entity.get_recirculate_signal(stage_id)
                stage_statement = ConditionBlock(
                    Comparison(
                        recirculate_signal,
                        Constant(0,
                                 precision=recirculate_signal.get_precision()),
                        specifier=Comparison.Equal,
                        precision=ML_Bool), stage_statement)

            stage_statement = ConditionBlock(
                Comparison(entity.reset_signal,
                           Constant(1, precision=ML_StdLogic),
                           specifier=Comparison.Equal,
                           precision=ML_Bool), reset_statement,
                stage_statement)

        # To meet simulation / synthesis tools, we build
        # a single if clock predicate block per stage
        clock_block = ConditionBlock(
            LogicalAnd(Event(clk, precision=ML_Bool),
                       Comparison(clk,
                                  Constant(1, precision=ML_StdLogic),
                                  specifier=Comparison.Equal,
                                  precision=ML_Bool),
                       precision=ML_Bool), stage_statement)

        if one_process_per_stage:
            clock_process = Process(clock_block, sensibility_list=[clk])
            entity.implementation.add_process(clock_process)
            first_process = first_process or clock_process
        else:
            clock_statement.add(clock_block)
    if one_process_per_stage:
        pass
    else:
        process_statement.add(clock_statement)
        pipeline_process = Process(process_statement, sensibility_list=[clk])
        entity.implementation.add_process(pipeline_process)
        first_process = pipeline_process
    # statement that gather signals which must be pre-computed
    for op in retime_map.pre_statement:
        first_process.add_to_pre_statement(op)
    stage_num = len(retime_map.stage_forward.keys())
    #print "there are %d pipeline stages" % (stage_num)
    return stage_num
Ejemplo n.º 15
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        precision_ptr = self.get_input_precision(0)
        index_format = self.get_input_precision(2)
        multi_elt_num = self.multi_elt_num

        dst = self.implementation.add_input_variable("dst", precision_ptr)
        src = self.implementation.add_input_variable("src", precision_ptr)
        n = self.implementation.add_input_variable("len", index_format)

        i = Variable("i", precision=index_format, var_type=Variable.Local)
        CU0 = Constant(0, precision=index_format)

        element_format = self.precision

        self.function_list = []

        if multi_elt_num > 1:
            element_format = VECTOR_TYPE_MAP[self.precision][multi_elt_num]

        elt_input = TableLoad(src, i, precision=element_format)

        local_exp = Variable("local_exp",
                             precision=element_format,
                             var_type=Variable.Local)

        if self.use_libm_function:
            libm_fct_operator = FunctionOperator(self.use_libm_function,
                                                 arity=1)
            libm_fct = FunctionObject(self.use_libm_function, [ML_Binary32],
                                      ML_Binary32, libm_fct_operator)

            if multi_elt_num > 1:
                result_list = [
                    libm_fct(
                        VectorElementSelection(elt_input,
                                               Constant(elt_id,
                                                        precision=ML_Integer),
                                               precision=self.precision))
                    for elt_id in range(multi_elt_num)
                ]
                result = VectorAssembling(*result_list,
                                          precision=element_format)
            else:
                result = libm_fct(elt_input)
            elt_result = ReferenceAssign(local_exp, result)
        else:
            if multi_elt_num > 1:
                scalar_result = Variable("scalar_result",
                                         precision=self.precision,
                                         var_type=Variable.Local)
                fct_ctor_args = self.function_ctor.get_default_args(
                    precision=self.precision,
                    libm_compliant=False,
                )

                meta_function = self.function_ctor(fct_ctor_args)
                exponential_scheme = meta_function.generate_scheme()

                # instanciating required passes for typing
                pass_inst_abstract_prec = PassInstantiateAbstractPrecision(
                    self.processor)
                pass_inst_prec = PassInstantiatePrecision(
                    self.processor, default_precision=None)

                # exectuting format instanciation passes on optree
                exponential_scheme = pass_inst_abstract_prec.execute_on_optree(
                    exponential_scheme)
                exponential_scheme = pass_inst_prec.execute_on_optree(
                    exponential_scheme)

                vectorizer = StaticVectorizer()

                # extracting scalar argument from meta_exponential meta function
                scalar_input = meta_function.implementation.arg_list[0]

                # vectorize scalar scheme
                vector_result, vec_arg_list, vector_scheme, scalar_callback, scalar_callback_fct = vectorize_function_scheme(
                    vectorizer,
                    self.get_main_code_object(), exponential_scheme,
                    element_format.get_scalar_format(), [scalar_input],
                    multi_elt_num)

                elt_result = inline_function(vector_scheme, vector_result,
                                             {vec_arg_list[0]: elt_input})

                local_exp = vector_result

                self.function_list.append(scalar_callback_fct)
                libm_fct = scalar_callback

            else:
                scalar_input = elt_input
                scalar_result = local_exp

                elt_result = generate_inline_fct_scheme(
                    self.function_ctor, scalar_result, [scalar_input], {
                        "precision": self.precision,
                        "libm_compliant": False
                    })

        CU1 = Constant(1, precision=index_format)

        local_exp_init_value = Constant(0, precision=self.precision)
        if multi_elt_num > 1:
            local_exp_init_value = Constant([0] * multi_elt_num,
                                            precision=element_format)
            remain_n = Modulo(n, multi_elt_num, precision=index_format)
            iter_n = n - remain_n
            CU_ELTNUM = Constant(multi_elt_num, precision=index_format)
            inc = i + CU_ELTNUM
        else:
            remain_n = None
            iter_n = n
            inc = i + CU1

        # main loop processing multi_elt_num element(s) per iteration
        main_loop = Loop(
            ReferenceAssign(i, CU0),
            i < iter_n,
            Statement(ReferenceAssign(local_exp, local_exp_init_value),
                      elt_result,
                      TableStore(local_exp, dst, i, precision=ML_Void),
                      ReferenceAssign(i, inc)),
        )
        # epilog to process remaining item (when the length is not a multiple
        # of multi_elt_num)
        if not remain_n is None:
            # TODO/FIXME: try alternative method for processing epilog
            #             by using full vector length and mask
            epilog_loop = Loop(
                Statement(), i < n,
                Statement(
                    TableStore(libm_fct(
                        TableLoad(src, i, precision=self.precision)),
                               dst,
                               i,
                               precision=ML_Void),
                    ReferenceAssign(i, i + CU1),
                ))
            main_loop = Statement(main_loop, epilog_loop)

        return main_loop
Ejemplo n.º 16
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        precision_ptr = self.get_input_precision(0)
        index_format = self.get_input_precision(2)

        dst = self.implementation.add_input_variable("dst", precision_ptr)
        src = self.implementation.add_input_variable("src", precision_ptr)
        n = self.implementation.add_input_variable("len", index_format)

        i = Variable("i", precision=index_format, var_type=Variable.Local)
        CU1 = Constant(1, precision=index_format)
        CU0 = Constant(0, precision=index_format)
        inc = i + CU1

        elt_input = TableLoad(src, i, precision=self.precision)

        local_exp = Variable("local_exp",
                             precision=self.precision,
                             var_type=Variable.Local)

        if self.use_libm_function:
            libm_exp_operator = FunctionOperator("expf", arity=1)
            libm_exp = FunctionObject("expf", [ML_Binary32], ML_Binary32,
                                      libm_exp_operator)

            elt_result = ReferenceAssign(local_exp, libm_exp(elt_input))
        else:
            exponential_args = ML_Exponential.get_default_args(
                precision=self.precision,
                libm_compliant=False,
                debug=False,
            )

            meta_exponential = ML_Exponential(exponential_args)
            exponential_scheme = meta_exponential.generate_scheme()

            elt_result = inline_function(
                exponential_scheme,
                local_exp,
                {meta_exponential.implementation.arg_list[0]: elt_input},
            )

        elt_acc = Variable("elt_acc",
                           precision=self.precision,
                           var_type=Variable.Local)

        exp_loop = Loop(
            ReferenceAssign(i, CU0),
            i < n,
            Statement(ReferenceAssign(local_exp, 0), elt_result,
                      TableStore(local_exp, dst, i, precision=ML_Void),
                      ReferenceAssign(elt_acc, elt_acc + local_exp),
                      ReferenceAssign(i, i + CU1)),
        )

        sum_rcp = Division(1,
                           elt_acc,
                           precision=self.precision,
                           tag="sum_rcp",
                           debug=debug_multi)

        div_loop = Loop(
            ReferenceAssign(i, CU0),
            i < n,
            Statement(
                TableStore(Multiplication(
                    TableLoad(dst, i, precision=self.precision), sum_rcp),
                           dst,
                           i,
                           precision=ML_Void), ReferenceAssign(i, inc)),
        )

        main_scheme = Statement(ReferenceAssign(elt_acc, 0), exp_loop, sum_rcp,
                                div_loop)

        return main_scheme
Ejemplo n.º 17
0
    def generate_bench(self, processor, test_num=1000, unroll_factor=10):
        """ generate performance bench for self.op_class """
        initial_inputs = [
            Constant(random.uniform(inf(self.init_interval),
                                    sup(self.init_interval)),
                     precision=precision)
            for i, precision in enumerate(self.input_precisions)
        ]

        var_inputs = [
            Variable("var_%d" % i,
                     precision=FormatAttributeWrapper(precision, ["volatile"]),
                     var_type=Variable.Local)
            for i, precision in enumerate(self.input_precisions)
        ]

        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0: "\"%s[%s] %%lld elts computed "\
                   "in %%lld cycles =>\\n     %%.3f CPE \\n\"" %
                (
                    self.bench_name,
                    self.output_precision.get_display_format()
                ),
                1: FO_Arg(0),
                2: FO_Arg(1),
                3: FO_Arg(2),
                4: FO_Arg(3)
            }, void_function=True
        )
        printf_timing_function = FunctionObject(
            "printf", [self.output_precision, ML_Int64, ML_Int64, ML_Binary64],
            ML_Void, printf_timing_op)
        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)

        void_function_op = FunctionOperator("(void)",
                                            arity=1,
                                            void_function=True)
        void_function = FunctionObject("(void)", [self.output_precision],
                                       ML_Void, void_function_op)

        # initialization of operation inputs
        init_assign = metaop.Statement()
        for var_input, init_value in zip(var_inputs, initial_inputs):
            init_assign.push(ReferenceAssign(var_input, init_value))

        # test loop
        loop_i = Variable("i", precision=ML_Int64, var_type=Variable.Local)
        test_num_cst = Constant(test_num / unroll_factor,
                                precision=ML_Int64,
                                tag="test_num")

        # Goal build a chain of dependant operation to measure
        # elementary operation latency
        local_inputs = tuple(var_inputs)
        local_result = self.op_class(*local_inputs,
                                     precision=self.output_precision,
                                     unbreakable=True)
        for i in range(unroll_factor - 1):
            local_inputs = tuple([local_result] + var_inputs[1:])
            local_result = self.op_class(*local_inputs,
                                         precision=self.output_precision,
                                         unbreakable=True)
        # renormalisation
        local_result = self.renorm_function(local_result)

        # variable assignation to build dependency chain
        var_assign = Statement()
        var_assign.push(ReferenceAssign(var_inputs[0], local_result))
        final_value = var_inputs[0]

        # loop increment value
        loop_increment = 1

        test_loop = Loop(
            ReferenceAssign(loop_i, Constant(0, precision=ML_Int32)),
            loop_i < test_num_cst,
            Statement(var_assign,
                      ReferenceAssign(loop_i, loop_i + loop_increment)),
        )

        # bench scheme
        test_scheme = Statement(
            ReferenceAssign(timer, processor.get_current_timestamp()),
            init_assign,
            test_loop,
            ReferenceAssign(
                timer,
                Subtraction(processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            # prevent intermediary variable simplification
            void_function(final_value),
            printf_timing_function(
                final_value, Constant(test_num, precision=ML_Int64), timer,
                Division(Conversion(timer, precision=ML_Binary64),
                         Constant(test_num, precision=ML_Binary64),
                         precision=ML_Binary64))
            # ,Return(Constant(0, precision = ML_Int32))
        )

        return test_scheme
Ejemplo n.º 18
0
    def get_array_test_wrapper(self,
                               test_num,
                               tested_function,
                               table_size_offset_array,
                               input_tables,
                               output_array,
                               acc_num,
                               post_statement_generator,
                               NUM_INPUT_ARRAY=1):
        """ generate a test loop for multi-array tests
             @param test_num number of elementary array tests to be executed
             @param tested_function FunctionObject to be tested
             @param table_size_offset_array ML_NewTable object containing
                    (table-size, offset) pairs for multi-array testing
             @param input_table ML_NewTable containing multi-array test inputs
             @param output_table ML_NewTable containing multi-array test outputs
             @param post_statement_generator is generator used to generate
                    a statement executed at the end of the test of one of the
                    arrays of the multi-test. It expects 6 arguments:
                    (input_tables, output_array, table_size_offset_array,
                     array_offset, array_len, test_id)
             @param printf_function FunctionObject to print error case
        """
        test_id = Variable("test_id",
                           precision=ML_Int32,
                           var_type=Variable.Local)
        test_num_cst = Constant(test_num, precision=ML_Int32, tag="test_num")

        array_len = Variable("len",
                             precision=ML_UInt32,
                             var_type=Variable.Local)

        array_offset = TableLoad(table_size_offset_array, test_id, 1)

        def pointer_add(table_addr, offset):
            pointer_format = table_addr.get_precision_as_pointer_format()
            return Addition(table_addr, offset, precision=pointer_format)

        array_inputs = tuple(
            pointer_add(input_tables[in_id], array_offset)
            for in_id in range(NUM_INPUT_ARRAY))
        function_call = tested_function(
            *((pointer_add(output_array, array_offset), ) + array_inputs +
              (array_len, )))

        post_statement = post_statement_generator(input_tables, output_array,
                                                  table_size_offset_array,
                                                  array_offset, array_len,
                                                  test_id)

        loop_increment = 1

        test_loop = Loop(
            ReferenceAssign(test_id, Constant(0, precision=ML_Int32)),
            test_id < test_num_cst,
            Statement(
                ReferenceAssign(array_len,
                                TableLoad(table_size_offset_array, test_id,
                                          0)),
                function_call,
                post_statement,
                ReferenceAssign(
                    acc_num, acc_num +
                    Conversion(array_len, precision=acc_num.precision)),
                ReferenceAssign(test_id, test_id + loop_increment),
            ),
        )

        test_statement = Statement()

        # adding functional test_loop to test statement
        test_statement.add(test_loop)

        return test_statement
Ejemplo n.º 19
0
    def generate_scheme(self):
        int_precision = self.precision.get_integer_format()
        # We wish to compute vx / vy
        vx = self.implementation.add_input_variable("x", self.precision, interval=self.input_intervals[0])
        vy = self.implementation.add_input_variable("y", self.precision, interval=self.input_intervals[1])
        if self.mode is FULL_MODE:
            quo = self.implementation.add_input_variable("quo", ML_Pointer_Format(int_precision))

        i = Variable("i", precision=int_precision, var_type=Variable.Local)
        q = Variable("q", precision=int_precision, var_type=Variable.Local)

        CI = lambda v: Constant(v, precision=int_precision)
        CF = lambda v: Constant(v, precision=self.precision)

        vx_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="vx_subnormal")
        vy_subnormal = Test(vy, specifier=Test.IsSubnormal, tag="vy_subnormal")

        DELTA_EXP = self.precision.get_mantissa_size()
        scale_factor = Constant(2.0**DELTA_EXP, precision=self.precision)
        inv_scale_factor = Constant(2.0**-DELTA_EXP, precision=self.precision)

        normalized_vx = Select(vx_subnormal, vx * scale_factor, vx, tag="scaled_vx")
        normalized_vy = Select(vy_subnormal, vy * scale_factor, vy, tag="scaled_vy")

        real_ex = ExponentExtraction(vx, tag="real_ex", precision=int_precision)
        real_ey = ExponentExtraction(vy, tag="real_ey", precision=int_precision)

        # if real_e<x/y> is +1023 then it may Overflow in -real_ex for ExponentInsertion
        # which only supports downto -1022 before falling into subnormal numbers (which are
        # not supported by ExponentInsertion)
        real_ex_h0 = real_ex / 2
        real_ex_h1 = real_ex - real_ex_h0

        real_ey_h0 = real_ey / 2
        real_ey_h1 = real_ey - real_ey_h0

        EI = lambda v: ExponentInsertion(v, precision=self.precision)

        mx = Abs((vx * EI(-real_ex_h0)) * EI(-real_ex_h1), tag="mx")
        my = Abs((vy * EI(-real_ey_h0)) * EI(-real_ey_h1), tag="pre_my")

        # scale_ey is used to regain the unscaling of mx in the first loop
        # if real_ey >= real_ex, the first loop is never executed
        # so a different scaling is required
        mx_unscaling = Select(real_ey < real_ex, real_ey, real_ex)
        ey_half0 = (mx_unscaling) / 2
        ey_half1 = (mx_unscaling) - ey_half0

        scale_ey_half0 = ExponentInsertion(ey_half0, precision=self.precision, tag="scale_ey_half0")
        scale_ey_half1 = ExponentInsertion(ey_half1, precision=self.precision, tag="scale_ey_half1")

        # if only vy is subnormal we want to normalize it
        #normal_cond = LogicalAnd(vy_subnormal, LogicalNot(vx_subnormal))
        normal_cond = vy_subnormal #LogicalAnd(vy_subnormal, LogicalNot(vx_subnormal))
        my = Select(normal_cond, Abs(MantissaExtraction(vy * scale_factor)), my, tag="my")


        # vx / vy = vx * 2^-ex * 2^(ex-ey) / (vy * 2^-ey)
        # vx % vy

        post_mx = Variable("post_mx", precision=self.precision, var_type=Variable.Local)

        # scaling for half comparison
        VY_SCALING = Select(vy_subnormal, 1.0, 0.5, precision=self.precision)
        VX_SCALING = Select(vy_subnormal, 2.0, 1.0, precision=self.precision)

        def LogicalXor(a, b):
            return LogicalOr(LogicalAnd(a, LogicalNot(b)), LogicalAnd(LogicalNot(a), b))

        rem_sign = Select(vx < 0, CF(-1), CF(1), precision=self.precision, tag="rem_sign")
        quo_sign = Select(LogicalXor(vx <0, vy < 0), CI(-1), CI(1), precision=int_precision, tag="quo_sign")

        loop_watchdog = Variable("loop_watchdog", precision=ML_Int32, var_type=Variable.Local)

        loop = Statement(
            real_ex, real_ey, mx, my, loop_watchdog,
            ReferenceAssign(loop_watchdog, 5000),
            ReferenceAssign(q, CI(0)),
            Loop(
                ReferenceAssign(i, CI(0)), i < (real_ex - real_ey),
                Statement(
                    ReferenceAssign(i, i+CI(1)),
                    ReferenceAssign(q, ((q << 1) + Select(mx >= my, CI(1), CI(0))).modify_attributes(tag="step1_q")),
                    ReferenceAssign(mx, (CF(2) * (mx - Select(mx >= my, my, CF(0)))).modify_attributes(tag="step1_mx")),
                    # loop watchdog
                    ReferenceAssign(loop_watchdog, loop_watchdog - 1),
                    ConditionBlock(loop_watchdog < 0, Return(-1)),
                ),
            ),
            # unscaling remainder
            ReferenceAssign(mx, ((mx * scale_ey_half0) * scale_ey_half1).modify_attributes(tag="scaled_rem")),
            ReferenceAssign(my, ((my * scale_ey_half0) * scale_ey_half1).modify_attributes(tag="scaled_rem_my")),
            Loop(
                Statement(), (my > Abs(vy)),
                Statement(
                    ReferenceAssign(q, ((q << 1) + Select(mx >= Abs(my), CI(1), CI(0))).modify_attributes(tag="step2_q")),
                    ReferenceAssign(mx, (mx - Select(mx >= Abs(my), Abs(my), CF(0))).modify_attributes(tag="step2_mx")),
                    ReferenceAssign(my, (my * 0.5).modify_attributes(tag="step2_my")),
                    # loop watchdog
                    ReferenceAssign(loop_watchdog, loop_watchdog - 1),
                    ConditionBlock(loop_watchdog < 0, Return(-1)),
                ),
            ),
            ReferenceAssign(q, q << 1),
            Loop(
                ReferenceAssign(i, CI(0)), mx > Abs(vy),
                Statement(
                    ReferenceAssign(q, (q + Select(mx > Abs(vy), CI(1), CI(0))).modify_attributes(tag="step3_q")),
                    ReferenceAssign(mx, (mx - Select(mx > Abs(vy), Abs(vy), CF(0))).modify_attributes(tag="step3_mx")),
                    # loop watchdog
                    ReferenceAssign(loop_watchdog, loop_watchdog - 1),
                    ConditionBlock(loop_watchdog < 0, Return(-1)),
                ),
            ),
            ReferenceAssign(q, q + Select(mx >= Abs(vy), CI(1), CI(0))),
            ReferenceAssign(mx, (mx - Select(mx >= Abs(vy), Abs(vy), CF(0))).modify_attributes(tag="pre_half_mx")),
            ConditionBlock(
                # actual comparison is mx > | abs(vy * 0.5) | to avoid rounding effect when
                # vy is subnormal we mulitply both side by 2.0**60
                ((mx * VX_SCALING) > Abs(vy * VY_SCALING)).modify_attributes(tag="half_test"),
                Statement(
                    ReferenceAssign(q, q + CI(1)),
                    ReferenceAssign(mx, (mx - Abs(vy)))
                )
            ),
            ConditionBlock(
                # if the remainder is exactly half the dividend
                # we need to make sure the quotient is even
                LogicalAnd(
                    Equal(mx * VX_SCALING, Abs(vy * VY_SCALING)),
                    Equal(Modulo(q, CI(2)), CI(1)),
                ),
                Statement(
                    ReferenceAssign(q, q + CI(1)),
                    ReferenceAssign(mx, (mx - Abs(vy)))
                )
            ),
            ReferenceAssign(mx, rem_sign * mx),
            ReferenceAssign(q,
                Modulo(TypeCast(q, precision=self.precision.get_unsigned_integer_format()), Constant(2**self.quotient_size, precision=self.precision.get_unsigned_integer_format()), tag="mod_q")
            ),
            ReferenceAssign(q, quo_sign * q),
        )

        # NOTES: Warning QuotientReturn must always preceeds RemainderReturn
        if self.mode is QUOTIENT_MODE:
            #
            QuotientReturn = Return
            RemainderReturn = lambda _: Statement()
        elif self.mode is REMAINDER_MODE:
            QuotientReturn = lambda _: Statement()
            RemainderReturn = Return
        elif self.mode is FULL_MODE:
            QuotientReturn = lambda v: ReferenceAssign(Dereference(quo, precision=int_precision), v) 
            RemainderReturn = Return
        else:
            raise NotImplemented

        # quotient invalid value
        QUO_INVALID_VALUE = 0

        mod_scheme = Statement(
            # x or y is NaN, a NaN is returned
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsNaN), Test(vy, specifier=Test.IsNaN)),
                Statement(
                    QuotientReturn(QUO_INVALID_VALUE),
                    RemainderReturn(FP_QNaN(self.precision))
                ),
            ),
            #
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Statement(
                    QuotientReturn(QUO_INVALID_VALUE),
                    RemainderReturn(FP_QNaN(self.precision))
                ),
            ),
            ConditionBlock(
                Test(vx, specifier=Test.IsZero),
                Statement(
                    QuotientReturn(0),
                    RemainderReturn(vx)
                ),
            ),
            ConditionBlock(
                Test(vx, specifier=Test.IsInfty),
                Statement(
                    QuotientReturn(QUO_INVALID_VALUE),
                    RemainderReturn(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(
                Test(vy, specifier=Test.IsInfty),
                Statement(
                    QuotientReturn(0),
                    RemainderReturn(vx),
                )
            ),
            ConditionBlock(
                Abs(vx) < Abs(vy * 0.5),
                Statement(
                    QuotientReturn(0),
                    RemainderReturn(vx),
                )
            ),
            ConditionBlock(
                Equal(vx, vy),
                Statement(
                    QuotientReturn(1),
                    # 0 with the same sign as x
                    RemainderReturn(vx - vx),
                ),
            ),
            ConditionBlock(
                Equal(vx, -vy),
                Statement(
                    # quotient is -1
                    QuotientReturn(-1),
                    # 0 with the same sign as x
                    RemainderReturn(vx - vx),
                ),
            ),
            loop,
            QuotientReturn(q),
            RemainderReturn(mx),
        )

        quo_scheme = Statement(
            # x or y is NaN, a NaN is returned
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsNaN), Test(vy, specifier=Test.IsNaN)),
                Return(QUO_INVALID_VALUE),
            ),
            #
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Return(QUO_INVALID_VALUE),
            ),
            ConditionBlock(
                Test(vx, specifier=Test.IsZero),
                Return(0),
            ),
            ConditionBlock(
                Test(vx, specifier=Test.IsInfty),
                Return(QUO_INVALID_VALUE),
            ),
            ConditionBlock(
                Test(vy, specifier=Test.IsInfty),
                Return(QUO_INVALID_VALUE),
            ),
            ConditionBlock(
                Abs(vx) < Abs(vy * 0.5),
                Return(0),
            ),
            ConditionBlock(
                Equal(vx, vy),
                Return(1),
            ),
            ConditionBlock(
                Equal(vx, -vy),
                Return(-1),
            ),
            loop,
            Return(q),

        )

        return mod_scheme
Ejemplo n.º 20
0
def generate_pipeline_stage(entity, reset=False, recirculate=False, one_process_per_stage=True, synchronous_reset=True, negate_reset=False):
    """ Process a entity to generate pipeline stages required to implement
        pipeline structure described by node's stage attributes.

        :param entity: input entity to pipeline
        :type entity: ML_EntityBasis
        :param reset: indicate if a reset must be generated for pipeline registers
        :type reset: bool
        :param recirculate: trigger the integration of a recirculation signal to the stage
            flopping condition
        :type recirculate: bool
        :param one_process_per_stage:forces the generation of a separate process for each
               pipeline stage (else a unique process is generated for all the stages
        :type one_process_per_stage: bool
        :param synchronous_reset: triggers the generation of a clocked reset
        :type synchronous_reset: bool
        :param negate_reset: if set indicates the reset is triggered when reset signal is 0
                            (else 1)
        :type negate_reset: bool
    """
    retiming_map = {}
    retime_map = RetimeMap()
    output_assign_list = entity.implementation.get_output_assign()
    for output in output_assign_list:
        Log.report(Log.Verbose, "generating pipeline from output {} ", output)
        retime_op(output, retime_map)
    for recirculate_stage in entity.recirculate_signal_map:
        recirculate_ctrl = entity.recirculate_signal_map[recirculate_stage]
        Log.report(Log.Verbose, "generating pipeline from recirculation control signal {}", recirculate_ctrl)
        retime_op(recirculate_ctrl, retime_map)

    process_statement = Statement()

    # adding stage forward process
    clk = entity.get_clk_input()
    clock_statement = Statement()
    global_reset_statement = Statement()


    Log.report(Log.Info, "design has {} flip-flop(s).", retime_map.register_count)

    # handle towards the first clock Process (in generation order)
    # which must be the one whose pre_statement is filled with
    # signal required to be generated outside the processes
    first_process = False
    for stage_id in sorted(retime_map.stage_forward.keys()):
        stage_statement = Statement(
            *tuple(assign for assign in retime_map.stage_forward[stage_id]))

        if reset:
            reset_statement = Statement()
            for assign in retime_map.stage_forward[stage_id]:
                target = assign.get_input(0)
                reset_value = Constant(0, precision=target.get_precision())
                reset_statement.push(ReferenceAssign(target, reset_value))

            if recirculate:
                # inserting recirculation condition
                recirculate_signal = entity.get_recirculate_signal(stage_id)
                stage_statement = ConditionBlock(
                    Comparison(
                        recirculate_signal,
                        Constant(0, precision=recirculate_signal.get_precision()),
                        specifier=Comparison.Equal,
                        precision=ML_Bool
                    ),
                    stage_statement
                )

            if synchronous_reset:
                # build a compound statement with reset and flops statement
                stage_statement = ConditionBlock(
                    Comparison(
                        entity.reset_signal,
                        Constant(0 if negate_reset else 1, precision=ML_StdLogic),
                        specifier=Comparison.Equal, precision=ML_Bool
                    ),
                    reset_statement,
                    stage_statement
                )
            else:
                # for asynchronous reset, reset is in a non-clocked statement
                # and will be added at the end of stage to the same process than
                # register clocking
                global_reset_statement.add(reset_statement)

        # To meet simulation / synthesis tools, we build
        # a single if clock predicate block per stage
        clock_block = ConditionBlock(
            LogicalAnd(
                Event(clk, precision=ML_Bool),
                Comparison(
                    clk,
                    Constant(1, precision=ML_StdLogic),
                    specifier=Comparison.Equal,
                    precision=ML_Bool
                ),
                precision=ML_Bool
            ),
            stage_statement
        )

        if one_process_per_stage:
            if reset and not synchronous_reset:
                clock_block = ConditionBlock(
                    Comparison(
                        entity.reset_signal,
                        Constant(0 if negate_reset else 1, precision=ML_StdLogic),
                        specifier=Comparison.Equal, precision=ML_Bool
                    ),
                    reset_statement,
                    clock_block
                )
                clock_process = Process(clock_block, sensibility_list=[clk, entity.reset_signal])

            else:
                # no reset, or synchronous reset (already appended to clock_block)
                clock_process = Process(clock_block, sensibility_list=[clk])
            entity.implementation.add_process(clock_process)

            first_process = first_process or clock_process
        else:
            clock_statement.add(clock_block)
    if one_process_per_stage:
        # reset and clock processed where generated at each stage loop
        pass
    else:
        process_statement.add(clock_statement)
        if synchronous_reset:
            pipeline_process = Process(process_statement, sensibility_list=[clk])
        else:
            process_statement.add(global_reset_statement)
            pipeline_process = Process(process_statement, sensibility_list=[clk, entity.reset_signal])
        entity.implementation.add_process(pipeline_process)
        first_process = pipeline_process
    # statement that gather signals which must be pre-computed
    for op in retime_map.pre_statement:
        first_process.add_to_pre_statement(op)
    stage_num = len(retime_map.stage_forward.keys())
    Log.report(Log.Info, "there are {} pipeline stage(s)", stage_num)
    return stage_num
Ejemplo n.º 21
0
    def generate_auto_test(self,
                           test_num=10,
                           test_range=Interval(-1.0, 1.0),
                           debug=False,
                           time_step=10):
        """ time_step: duration of a stage (in ns) """
        # instanciating tested component
        # map of input_tag -> input_signal and output_tag -> output_signal
        io_map = {}
        # map of input_tag -> input_signal, excludind commodity signals
        # (e.g. clock and reset)
        input_signals = {}
        # map of output_tag -> output_signal
        output_signals = {}
        # excluding clock and reset signals from argument list
        # reduced_arg_list = [input_port for input_port in self.implementation.get_arg_list() if not input_port.get_tag() in ["clk", "reset"]]
        reduced_arg_list = self.implementation.get_arg_list()
        for input_port in reduced_arg_list:
            input_tag = input_port.get_tag()
            input_signal = Signal(input_tag + "_i",
                                  precision=input_port.get_precision(),
                                  var_type=Signal.Local)
            io_map[input_tag] = input_signal
            if not input_tag in ["clk", "reset"]:
                input_signals[input_tag] = input_signal
        for output_port in self.implementation.get_output_port():
            output_tag = output_port.get_tag()
            output_signal = Signal(output_tag + "_o",
                                   precision=output_port.get_precision(),
                                   var_type=Signal.Local)
            io_map[output_tag] = output_signal
            output_signals[output_tag] = output_signal

        # building list of test cases
        tc_list = []

        self_component = self.implementation.get_component_object()
        self_instance = self_component(io_map=io_map, tag="tested_entity")
        test_statement = Statement()

        # initializing random test case generator
        self.init_test_generator()

        # Appending standard test cases if required
        if self.auto_test_std:
            tc_list += self.standard_test_cases

        for i in range(test_num):
            input_values = self.generate_test_case(input_signals, io_map, i,
                                                   test_range)
            tc_list.append((input_values, None))

        def compute_results(tc):
            """ update test case with output values if required """
            input_values, output_values = tc
            if output_values is None:
                return input_values, self.numeric_emulate(input_values)
            else:
                return tc

        # filling output values
        tc_list = [compute_results(tc) for tc in tc_list]

        for input_values, output_values in tc_list:
            input_msg = ""

            # Adding input setting
            for input_tag in input_values:
                input_signal = io_map[input_tag]
                # FIXME: correct value generation depending on signal precision
                input_value = input_values[input_tag]
                test_statement.add(
                    ReferenceAssign(
                        input_signal,
                        Constant(input_value,
                                 precision=input_signal.get_precision())))
                value_msg = input_signal.get_precision().get_cst(
                    input_value, language=VHDL_Code).replace('"', "'")
                value_msg += " / " + hex(input_signal.get_precision(
                ).get_base_format().get_integer_coding(input_value))
                input_msg += " {}={} ".format(input_tag, value_msg)
            test_statement.add(Wait(time_step * self.stage_num))
            # Adding output value comparison
            for output_tag in output_signals:
                output_signal = output_signals[output_tag]
                output_value = Constant(
                    output_values[output_tag],
                    precision=output_signal.get_precision())
                output_precision = output_signal.get_precision()
                expected_dec = output_precision.get_cst(
                    output_values[output_tag],
                    language=VHDL_Code).replace('"', "'")
                expected_hex = " / " + hex(
                    output_precision.get_base_format().get_integer_coding(
                        output_values[output_tag]))
                value_msg = "{} / {}".format(expected_dec, expected_hex)

                test_pass_cond = Comparison(output_signal,
                                            output_value,
                                            specifier=Comparison.Equal,
                                            precision=ML_Bool)

                test_statement.add(
                    ConditionBlock(
                        LogicalNot(test_pass_cond, precision=ML_Bool),
                        Report(
                            Concatenation(
                                " result for {}: ".format(output_tag),
                                Conversion(TypeCast(
                                    output_signal,
                                    precision=ML_StdLogicVectorFormat(
                                        output_signal.get_precision(
                                        ).get_bit_size())),
                                           precision=ML_String),
                                precision=ML_String))))
                test_statement.add(
                    Assert(
                        test_pass_cond,
                        "\"unexpected value for inputs {input_msg}, output {output_tag}, expecting {value_msg}, got: \""
                        .format(input_msg=input_msg,
                                output_tag=output_tag,
                                value_msg=value_msg),
                        severity=Assert.Failure))

        testbench = CodeEntity("testbench")
        test_process = Process(
            test_statement,
            # end of test
            Assert(Constant(0, precision=ML_Bool),
                   " \"end of test, no error encountered \"",
                   severity=Assert.Failure))

        testbench_scheme = Statement(self_instance, test_process)

        if self.pipelined:
            half_time_step = time_step / 2
            assert (half_time_step * 2) == time_step
            # adding clock process for pipelined bench
            clk_process = Process(
                Statement(
                    ReferenceAssign(io_map["clk"],
                                    Constant(1, precision=ML_StdLogic)),
                    Wait(half_time_step),
                    ReferenceAssign(io_map["clk"],
                                    Constant(0, precision=ML_StdLogic)),
                    Wait(half_time_step),
                ))
            testbench_scheme.push(clk_process)

        testbench.add_process(testbench_scheme)

        return [testbench]
Ejemplo n.º 22
0
    def generate_auto_test(self,
                           test_num=10,
                           test_range=Interval(-1.0, 1.0),
                           debug=False,
                           time_step=10):
        """ time_step: duration of a stage (in ns) """
        # instanciating tested component
        # map of input_tag -> input_signal and output_tag -> output_signal
        io_map = {}
        # map of input_tag -> input_signal, excludind commodity signals
        # (e.g. clock and reset)
        input_signals = {}
        # map of output_tag -> output_signal
        output_signals = {}
        # excluding clock and reset signals from argument list
        # reduced_arg_list = [input_port for input_port in self.implementation.get_arg_list() if not input_port.get_tag() in ["clk", "reset"]]
        reduced_arg_list = self.implementation.get_arg_list()
        for input_port in reduced_arg_list:
            input_tag = input_port.get_tag()
            input_signal = Signal(input_tag + "_i",
                                  precision=input_port.get_precision(),
                                  var_type=Signal.Local)
            io_map[input_tag] = input_signal
            if not input_tag in ["clk", "reset"]:
                input_signals[input_tag] = input_signal
        for output_port in self.implementation.get_output_port():
            output_tag = output_port.get_tag()
            output_signal = Signal(output_tag + "_o",
                                   precision=output_port.get_precision(),
                                   var_type=Signal.Local)
            io_map[output_tag] = output_signal
            output_signals[output_tag] = output_signal

        # building list of test cases
        tc_list = []

        self_component = self.implementation.get_component_object()
        self_instance = self_component(io_map=io_map, tag="tested_entity")
        test_statement = Statement()

        # initializing random test case generator
        self.init_test_generator()

        # Appending standard test cases if required
        if self.auto_test_std:
            tc_list += self.standard_test_cases

        for i in range(test_num):
            input_values = self.generate_test_case(input_signals, io_map, i,
                                                   test_range)
            tc_list.append((input_values, None))

        def compute_results(tc):
            """ update test case with output values if required """
            input_values, output_values = tc
            if output_values is None:
                return input_values, self.numeric_emulate(input_values)
            else:
                return tc

        # filling output values
        tc_list = [compute_results(tc) for tc in tc_list]

        for input_values, output_values in tc_list:
            test_statement.add(
                self.implement_test_case(io_map, input_values, output_signals,
                                         output_values, time_step))

        testbench = CodeEntity("testbench")
        test_process = Process(
            test_statement,
            # end of test
            Assert(Constant(0, precision=ML_Bool),
                   " \"end of test, no error encountered \"",
                   severity=Assert.Failure))

        testbench_scheme = Statement(self_instance, test_process)

        if self.pipelined:
            half_time_step = time_step / 2
            assert (half_time_step * 2) == time_step
            # adding clock process for pipelined bench
            clk_process = Process(
                Statement(
                    ReferenceAssign(io_map["clk"],
                                    Constant(1, precision=ML_StdLogic)),
                    Wait(half_time_step),
                    ReferenceAssign(io_map["clk"],
                                    Constant(0, precision=ML_StdLogic)),
                    Wait(half_time_step),
                ))
            testbench_scheme.push(clk_process)

        testbench.add_process(testbench_scheme)

        return [testbench]
Ejemplo n.º 23
0
    def generate_bench_wrapper(self,
                               test_num=1,
                               loop_num=100000,
                               test_ranges=[Interval(-1.0, 1.0)],
                               debug=False):
        # interval where the array lenght is chosen from (randomly)
        index_range = self.test_index_range

        auto_test = CodeFunction("bench_wrapper", output_format=ML_Binary64)

        tested_function = self.implementation.get_function_object()
        function_name = self.implementation.get_name()

        failure_report_op = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void,
                                                 failure_report_op)

        printf_success_op = FunctionOperator(
            "printf",
            arg_map={0: "\"test successful %s\\n\"" % function_name},
            void_function=True)
        printf_success_function = FunctionObject("printf", [], ML_Void,
                                                 printf_success_op)

        output_precision = FormatAttributeWrapper(self.precision, ["volatile"])

        test_total = test_num

        # number of arrays expected as inputs for tested_function
        NUM_INPUT_ARRAY = 1
        # position of the input array in tested_function operands (generally
        # equals to 1 as to 0-th input is often the destination array)
        INPUT_INDEX_OFFSET = 1

        # concatenating standard test array at the beginning of randomly
        # generated array
        TABLE_SIZE_VALUES = [
            len(std_table) for std_table in self.standard_test_cases
        ] + [
            random.randrange(index_range[0], index_range[1] + 1)
            for i in range(test_num)
        ]
        OFFSET_VALUES = [sum(TABLE_SIZE_VALUES[:i]) for i in range(test_total)]

        table_size_offset_array = generate_2d_table(
            test_total,
            2,
            ML_UInt32,
            self.uniquify_name("table_size_array"),
            value_gen=(lambda row_id:
                       (TABLE_SIZE_VALUES[row_id], OFFSET_VALUES[row_id])))

        INPUT_ARRAY_SIZE = sum(TABLE_SIZE_VALUES)

        # TODO/FIXME: implement proper input range depending on input index
        # assuming a single input array
        input_precisions = [self.get_input_precision(1).get_data_precision()]
        rng_map = [
            get_precision_rng(precision, inf(test_range), sup(test_range))
            for precision, test_range in zip(input_precisions, test_ranges)
        ]

        # generated table of inputs
        input_tables = [
            generate_1d_table(
                INPUT_ARRAY_SIZE,
                self.get_input_precision(INPUT_INDEX_OFFSET +
                                         table_id).get_data_precision(),
                self.uniquify_name("input_table_arg%d" % table_id),
                value_gen=(
                    lambda _: input_precisions[table_id].round_sollya_object(
                        rng_map[table_id].get_new_value(), sollya.RN)))
            for table_id in range(NUM_INPUT_ARRAY)
        ]

        # generate output_array
        output_array = generate_1d_table(
            INPUT_ARRAY_SIZE,
            output_precision,
            self.uniquify_name("output_array"),
            #value_gen=(lambda _: FP_QNaN(self.precision))
            value_gen=(lambda _: None),
            const=False,
            empty=True)

        # accumulate element number
        acc_num = Variable("acc_num",
                           precision=ML_Int64,
                           var_type=Variable.Local)

        def empty_post_statement_gen(input_tables, output_array,
                                     table_size_offset_array, array_offset,
                                     array_len, test_id):
            return Statement()

        test_loop = self.get_array_test_wrapper(test_total, tested_function,
                                                table_size_offset_array,
                                                input_tables, output_array,
                                                acc_num,
                                                empty_post_statement_gen)

        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)
        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"%s %%\"PRIi64\" elts computed in %%\"PRIi64\" nanoseconds => %%.3f CPE \\n\""
                % function_name,
                1:
                FO_Arg(0),
                2:
                FO_Arg(1),
                3:
                FO_Arg(2)
            },
            void_function=True)
        printf_timing_function = FunctionObject(
            "printf", [ML_Int64, ML_Int64, ML_Binary64], ML_Void,
            printf_timing_op)

        vj = Variable("j", precision=ML_Int32, var_type=Variable.Local)
        loop_num_cst = Constant(loop_num, precision=ML_Int32, tag="loop_num")
        loop_increment = 1

        # bench measure of clock per element
        cpe_measure = Division(
            Conversion(timer, precision=ML_Binary64),
            Conversion(acc_num, precision=ML_Binary64),
            precision=ML_Binary64,
            tag="cpe_measure",
        )

        # common test scheme between scalar and vector functions
        test_scheme = Statement(
            self.processor.get_init_timestamp(),
            ReferenceAssign(timer, self.processor.get_current_timestamp()),
            ReferenceAssign(acc_num, 0),
            Loop(
                ReferenceAssign(vj, Constant(0, precision=ML_Int32)),
                vj < loop_num_cst,
                Statement(test_loop, ReferenceAssign(vj,
                                                     vj + loop_increment))),
            ReferenceAssign(
                timer,
                Subtraction(self.processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            printf_timing_function(
                Conversion(acc_num, precision=ML_Int64),
                timer,
                cpe_measure,
            ),
            Return(cpe_measure),
            # Return(Constant(0, precision = ML_Int32))
        )
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])
Ejemplo n.º 24
0
Log.report(LOG_PASS_INFO, "Registering ssa translation pass")
Pass.register(Pass_SSATranslate)
# registering basic-block simplification pass
Log.report(LOG_PASS_INFO, "Registering basic-block simplification pass")
Pass.register(Pass_BBSimplification)

if __name__ == "__main__":
    bb_root = BasicBlock(tag="bb_root")
    bb_1 = BasicBlock(tag="bb_1")
    bb_2 = BasicBlock(tag="bb_2")
    bb_3 = BasicBlock(tag="bb_3")

    var_x = Variable("x", precision=None)
    var_y = Variable("y", precision=None)

    bb_root.add(ReferenceAssign(var_x, 1))
    bb_root.add(ReferenceAssign(var_y, 2))
    bb_root.add(ConditionalBranch(var_x > var_y, bb_1, bb_2))

    bb_1.add(ReferenceAssign(var_x, 2))
    bb_1.add(UnconditionalBranch(bb_3))

    bb_2.add(ReferenceAssign(var_y, 3))
    bb_2.add(UnconditionalBranch(bb_3))

    bb_3.add(ReferenceAssign(var_y, var_x))


    program_bb_list = BasicBlockList(tag="main")
    for bb in [bb_root, bb_1, bb_2, bb_3]:
        program_bb_list.add(bb)
Ejemplo n.º 25
0
    def generate_array_check_loop(self, input_tables, output_array,
                                  table_size_offset_array, array_offset,
                                  array_len, test_id):
        # internal array iterator index
        vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local)

        printf_input_function = self.get_printf_input_function()

        printf_error_template = "printf(\"max %s error is %s \\n\", %s)" % (
            self.function_name,
            self.precision.get_display_format().format_string,
            self.precision.get_display_format().pre_process_fct("{0}"))
        printf_error_op = TemplateOperatorFormat(printf_error_template,
                                                 arity=1,
                                                 void_function=True,
                                                 require_header=["stdio.h"])

        printf_error_function = FunctionObject("printf", [self.precision],
                                               ML_Void, printf_error_op)

        printf_max_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"max %s error is reached at input number %s \\n \"" %
                (self.function_name, "%d"),
                1:
                FO_Arg(0)
            },
            void_function=True,
            require_header=["stdio.h"])
        printf_max_function = FunctionObject("printf", [self.precision],
                                             ML_Void, printf_max_op)

        NUM_INPUT_ARRAY = len(input_tables)

        # generate the expected table for the whole multi-array
        expected_table = self.generate_expected_table(input_tables,
                                                      table_size_offset_array)

        # inputs for the (vj)-th entry of the sub-arrat
        local_inputs = tuple(
            TableLoad(input_tables[in_id], array_offset + vj)
            for in_id in range(NUM_INPUT_ARRAY))
        # expected values for the (vj)-th entry of the sub-arrat
        expected_values = [
            TableLoad(expected_table, array_offset + vj, i)
            for i in range(self.accuracy.get_num_output_value())
        ]
        # local result for the (vj)-th entry of the sub-arrat
        local_result = TableLoad(output_array, array_offset + vj)

        if self.break_error:
            return_statement_break = Statement(
                printf_input_function(*((vj, ) + local_inputs +
                                        (local_result, ))),
                self.accuracy.get_output_print_call(self.function_name,
                                                    output_values))
        else:
            return_statement_break = Statement(
                printf_input_function(*((vj, ) + local_inputs +
                                        (local_result, ))),
                self.accuracy.get_output_print_call(self.function_name,
                                                    expected_values),
                Return(Constant(1, precision=ML_Int32)))

        # loop implementation to check sub-array array_offset
        # results validity
        check_array_loop = Loop(
            ReferenceAssign(vj, 0), vj < array_len,
            Statement(
                ConditionBlock(
                    self.accuracy.get_output_check_test(
                        local_result, expected_values),
                    return_statement_break),
                ReferenceAssign(vj, vj + 1),
            ))
        return check_array_loop