예제 #1
0
def _make_thread_func(name, iet, root, threads, sregistry):
    # Create the SharedData, that is the data structure that will be used by the
    # main thread to pass information dows to the child thread(s)
    required, parameters, dynamic_parameters = diff_parameters(iet, root)
    parameters = sorted(parameters, key=lambda i: i.is_Function)  # Allow casting
    sdata = SharedData(name=sregistry.make_name(prefix='sdata'), npthreads=threads.size,
                       fields=required, dynamic_fields=dynamic_parameters)

    sbase = sdata.symbolic_base
    sid = sdata.symbolic_id

    # Create a Callable to initialize `sdata` with the known const values
    iname = 'init_%s' % sdata.dtype._type_.__name__
    ibody = [DummyExpr(FieldFromPointer(i._C_name, sbase), i._C_symbol)
             for i in parameters]
    ibody.extend([
        BlankLine,
        DummyExpr(FieldFromPointer(sdata._field_id, sbase), sid),
        DummyExpr(FieldFromPointer(sdata._field_flag, sbase), 1)
    ])
    iparameters = parameters + [sdata, sid]
    isdata = Callable(iname, ibody, 'void', iparameters, 'static')

    # Prepend the SharedData fields available upon thread activation
    preactions = [DummyExpr(i, FieldFromPointer(i.name, sbase))
                  for i in dynamic_parameters]

    # Append the flag reset
    postactions = [List(body=[
        BlankLine,
        DummyExpr(FieldFromPointer(sdata._field_flag, sbase), 1)
    ])]

    iet = List(body=preactions + [iet] + postactions)

    # The thread has work to do when it receives the signal that all locks have
    # been set to 0 by the main thread
    iet = Conditional(CondEq(FieldFromPointer(sdata._field_flag, sbase), 2), iet)

    # The thread keeps spinning until the alive flag is set to 0 by the main thread
    iet = While(CondNe(FieldFromPointer(sdata._field_flag, sbase), 0), iet)

    # pthread functions expect exactly one argument, a void*, and must return void*
    tretval = 'void*'
    tparameter = VoidPointer('_%s' % sdata.name)

    # Unpack `sdata`
    unpack = [PointerCast(sdata, tparameter), BlankLine]
    for i in parameters:
        if i.is_AbstractFunction:
            unpack.extend([Dereference(i, sdata), PointerCast(i)])
        else:
            unpack.append(DummyExpr(i, FieldFromPointer(i.name, sbase)))
    unpack.append(DummyExpr(sid, FieldFromPointer(sdata._field_id, sbase)))
    unpack.append(BlankLine)
    iet = List(body=unpack + [iet, BlankLine, Return(Macro('NULL'))])

    tfunc = ThreadFunction(name, iet, tretval, tparameter, 'static')

    return tfunc, isdata, sdata
예제 #2
0
def opsit(trees, count, name_to_ops_dat, block, dims):
    """
    Given an affine tree, generate a Callable representing an OPS Kernel.

    Parameters
    ----------
    tree : IterationTree
        IterationTree containing the loop to extract into an OPS Kernel
    count : int
        Generated kernel counters
    """
    node_factory = OPSNodeFactory()
    expressions = []

    expressions.extend(
        *[FindNodes(Expression).visit(tree.inner) for tree in trees])

    ops_expressions = [
        Expression(make_ops_ast(expr.expr, node_factory))
        for expr in expressions
    ]

    parameters = sorted(node_factory.ops_params,
                        key=lambda i: (i.is_Constant, i.name))

    stencil_arrays_initializations = []
    par_to_ops_stencil = {}

    for p in parameters:
        if isinstance(p, OpsAccessible):
            stencil, initialization = to_ops_stencil(
                p, node_factory.ops_args_accesses[p])

            par_to_ops_stencil[p] = stencil
            stencil_arrays_initializations.append(initialization)

    ops_kernel = Callable(namespace['ops_kernel'](count), ops_expressions,
                          "void", parameters)

    ops_par_loop_init, ops_par_loop_call = create_ops_par_loop(
        trees, ops_kernel, parameters, block, name_to_ops_dat,
        node_factory.ops_args, par_to_ops_stencil, dims)

    pre_time_loop = stencil_arrays_initializations + ops_par_loop_init

    return pre_time_loop, ops_kernel, ops_par_loop_call
예제 #3
0
def opsit(trees, count):
    """
    Given an affine tree, generate a Callable representing an OPS Kernel.

    Parameters
    ----------
    tree : IterationTree
        IterationTree containing the loop to extract into an OPS Kernel
    count : int
        Generated kernel counters
    """
    node_factory = OPSNodeFactory()
    expressions = []
    ops_expressions = []

    for tree in trees:
        expressions.extend(FindNodes(Expression).visit(tree.inner))

    for expr in expressions:
        ops_expressions.append(
            Expression(make_ops_ast(expr.expr, node_factory)))

    parameters = sorted(node_factory.ops_params,
                        key=lambda i: (i.is_Constant, i.name))

    ops_kernel = Callable(namespace['ops_kernel'](count), ops_expressions,
                          "void", parameters)

    stencil_arrays_initializations = itertools.chain(*[
        to_ops_stencil(p, node_factory.ops_args_accesses[p])
        for p in parameters if isinstance(p, OpsAccessible)
    ])

    pre_time_loop = stencil_arrays_initializations

    return pre_time_loop, ops_kernel