Example #1
0
 def __init__(self):
     self.context = cl.clCreateContextFromType()
     self.queue = cl.clCreateCommandQueue(self.context)
Example #2
0
 def __init__(self):
     self.context = cl.clCreateContextFromType()
     self.queue = cl.clCreateCommandQueue(self.context)
Example #3
0
 def __init__(self):
     self.context = cl.clCreateContextFromType()
     self.queue = cl.clCreateCommandQueue(
         context=self.context
     )  #, properties=cl.cl_command_queue_properties.CL_QUEUE_PROFILING_ENABLE)
     self.device = self.queue.device
Example #4
0
    def transform(self, py_ast, program_config):
        """
        Convert the Python AST to a C AST according to the directions
        given in program_config.
        """
        arg_config, tuner_config = program_config
        dot = DotWriter()

        # hack yo
        ComputedVector._next_id = 0
        CopiedVector._next_id = 0

        # set up OpenCL context and memory spaces
        import pycl
        context = pycl.clCreateContextFromType(pycl.CL_DEVICE_TYPE_ALL)
        queues = [pycl.clCreateCommandQueue(context, dev) for dev in context.devices]
        c_func = ElementwiseFunction(context, queues)

        memories = [MainMemory()] + [OclMemory(q) for q in queues]
        main_memory = memories[0]
        ptrs = arg_config['ptrs']
        dtype, length = ptrs[0]._dtype_, arg_config['len']

        # pull stuff out of autotuner
        distribute_directives = tuner_config['distribute']
        reassoc_directives = tuner_config['reassociate']
        locs = [memories[loc] for loc in tuner_config['locs']]
        fusion_directives = tuner_config['fusion']
        parallelize_directives = tuner_config['parallelize']

        dot.write(py_ast)

        # run basic conversions
        proj = PyBasicConversions().visit(py_ast)
        dot.write(proj)

        # run platform-independent transformations
        proj = ApplyDistributiveProperty(distribute_directives).visit(proj)
        dot.write(proj)

        proj = ApplyAssociativeProperty(reassoc_directives).visit(proj)
        dot.write(proj)

        # set parameter types
        proj = VectorFinder(ptrs, main_memory).visit(proj)
        dot.write(proj)

        proj = LocationTagger(locs).visit(proj)
        dot.write(proj)

        proj = InsertIntermediates(main_memory).visit(proj)
        dot.write(proj)

        proj = CopyInserter(main_memory).visit(proj)
        dot.write(proj)

        proj = DoFusion(fusion_directives).visit(proj)
        dot.write(proj)

        proj = AllocateIntermediates(dtype, length).visit(proj)
        dot.write(proj, "postintermed")

        py_op = proj.find(FunctionDecl, name="py_op")
        schedules = FindParallelism(parallelize_directives).visit(py_op)
        py_op.defn = parallelize_tasks(schedules)
        dot.write(proj, "postparallel")

        proj = KernelOutliner(length).visit(proj)
        dot.write(proj)

        proj = LowerKernelCalls().visit(proj)
        dot.write(proj)

        proj = RefConverter().visit(proj)
        dot.write(proj)

        proj = LowerLoopsAndCopies(length).visit(proj)
        dot.write(proj)

        zipper = ArgZipper()
        proj = zipper.visit( Lifter().visit(proj) )
        c_func.extra_args = zipper.extra_args
        c_func.answer = zipper.answer
        dot.write(proj)

        fn = proj.find(FunctionDecl)
        return c_func.finalize("py_op", proj, fn.get_type())
Example #5
0
 def __init__(self):
     self.context = cl.clCreateContextFromType()
     self.queue = cl.clCreateCommandQueue(self.context, device=TARGET_GPU)
Example #6
0

#
### Specialist-Writtern Code ###
#
# The code below is written by an industrt SPECIALIST. This code is meant
# to be more complicated and requires specialized knowledge to write.
#


#
# Global Constants
#

WORK_GROUP_SIZE = 1024
devices = cl.clCreateContextFromType().devices + cl.clCreateContext().devices
TARGET_GPU = devices[1]
ITERATIONS = 0


class ConcreteReduction(ConcreteSpecializedFunction):
    def __init__(self):
        self.context = cl.clCreateContextFromType()
        self.queue = cl.clCreateCommandQueue(self.context, device=TARGET_GPU)

    def finalize(self, kernel, tree, entry_name, entry_type):
        self.kernel = kernel
        self._c_function = self._compile(entry_name, tree, entry_type)
        return self

    def __call__(self, A):