コード例 #1
0
def TestCompileExec():
    import time
    SIZE = 1024
    kernel = (
        "il_ps_2_0\n" +
        "dcl_input_position_interp(linear_noperspective) v0\n" +
        "dcl_output_generic o0\n" + "dcl_output_generic o1\n" +
        #"dcl_output_generic o2\n" +
        "dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n"
        +
        #"mov r0, g[0]\n" +
        "sample_resource(0)_sampler(0) o0, v0.xyxx\n" + "mov g[0], r0\n" +
        "end\n")

    t1 = time.time()
    image = cal_exec.compile(kernel)
    t2 = time.time()
    print "compile time", t2 - t1

    input = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, SIZE, SIZE, 0)
    output = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, SIZE, SIZE, 0)
    #glob = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, 4096, 4096, cal_exec.GLOBAL_BUFFER)
    print "input", input
    print "output", output

    remote = {"o0": output, "i0": input}
    local = {
        "o1": (SIZE, SIZE, cal_exec.FMT_FLOAT32_4),
        "g[]": (4096, 4096, cal_exec.FMT_FLOAT32_4)
    }
    domain = (0, 0, SIZE, SIZE)
    print "remote bindings", remote
    print "local bindings", local

    # image, dev num, (x, y, w, h)
    t1 = time.time()
    cal_exec.run_stream(image, 0, domain, local, remote)
    t2 = time.time()
    print "run time", t2 - t1

    cal_exec.free_remote(input)
    cal_exec.free_remote(output)
    #cal_exec.free_remote(glob)
    cal_exec.free_image(image)
    return
コード例 #2
0
def TestCompileExec():
  import time
  SIZE = 1024
  kernel = ("il_ps_2_0\n" +
            "dcl_input_position_interp(linear_noperspective) v0\n" +
            "dcl_output_generic o0\n" +
            "dcl_output_generic o1\n" +
            #"dcl_output_generic o2\n" +
            "dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n" +
            #"mov r0, g[0]\n" +
            "sample_resource(0)_sampler(0) o0, v0.xyxx\n" +
            "mov g[0], r0\n" +
            "end\n")

  t1 = time.time()
  image = cal_exec.compile(kernel)
  t2 = time.time()
  print "compile time", t2 - t1

  input = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, SIZE, SIZE, 0)
  output = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, SIZE, SIZE, 0)
  #glob = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, 4096, 4096, cal_exec.GLOBAL_BUFFER)
  print "input", input
  print "output", output

  remote = {"o0": output, "i0": input}
  local = {"o1": (SIZE, SIZE, cal_exec.FMT_FLOAT32_4),
           "g[]": (4096, 4096, cal_exec.FMT_FLOAT32_4)}
  domain = (0, 0, SIZE, SIZE)
  print "remote bindings", remote
  print "local bindings", local

  # image, dev num, (x, y, w, h)
  t1 = time.time()
  cal_exec.run_stream(image, 0, domain, local, remote)
  t2 = time.time()
  print "run time", t2 - t1

  cal_exec.free(input)
  cal_exec.free(output)
  #cal_exec.free(glob)
  cal_exec.free_image(image)
  return
コード例 #3
0
class Processor(spe.Processor):
    exec_module = cal_exec

    def __init__(self, device):
        spe.Processor.__init__(self)

        if device < 0 or device > N_GPUS:
            raise Exception("Invalid device number %d" % device)

        self.device = device
        return

    def execute(self, code, domain=None, async=False):
        code.cache_code()

        if domain is None:
            try:
                input = code.get_remote_binding("i0")
            except KeyError:
                raise Exception(
                    "No domain specified and no remote i0 register bound")

            domain = (0, 0, input.gpu_width, len(input) / input.gpu_width)

        if async:
            th = cal_exec.run_stream_async(code.render_code, self.device,
                                           domain, code._local_bindings,
                                           code._remote_bindings,
                                           code._copy_bindings)
            return (th, code)
        else:
            cal_exec.run_stream(code.render_code, self.device, domain,
                                code._local_bindings, code._remote_bindings,
                                code._copy_bindings)

            try:
                import numpy

                for (key, arr) in code._remote_bindings_data.items():
                    if isinstance(arr, extarray.extarray):
                        arr.set_memory(arr.gpu_mem_handle[0],
                                       arr.data_len * arr.itemsize)
                    elif isinstance(arr, numpy.ndarray):
                        cal_exec.set_ndarray_ptr(arr,
                                                 code._remote_bindings[key][0])

                for (key, arr) in code._copy_bindings_data.items():
                    if isinstance(arr, extarray.extarray):
                        arr.set_memory(arr.gpu_mem_handle[0],
                                       arr.data_len * arr.itemsize)
                    elif isinstance(arr, numpy.ndarray):
                        cal_exec.set_ndarray_ptr(arr,
                                                 code._remote_bindings[key][0])

            except ImportError:
                for arr in code._remote_bindings_data.values():
                    arr.set_memory(arr.gpu_mem_handle[0],
                                   arr.data_len * arr.itemsize)
                for arr in code._copy_bindings_data.values():
                    arr.set_memory(arr.gpu_mem_handle[0],
                                   arr.data_len * arr.itemsize)
            return
コード例 #4
0
      if isinstance(arr, extarray.extarray):
        domain = (0, 0, arr.gpu_width, arr.gpu_height)
      elif isinstance(arr, numpy.ndarray):
        domain = (0, 0, arr.base.width, arr.base.height)
      elif isinstance(arr, LocalMemory):
        domain = (0, 0, arr.width, arr.height)
      else:
        raise Exception("Invalid o0 binding!")

    if async:
      th = cal_exec.run_stream_async(prgm.render_code,
          self.ctx, domain, prgm._bindings)
      return (th, prgm)
    else:
      cal_exec.run_stream(prgm.render_code, self.ctx, domain, prgm._bindings)

      # Go through the bindings and re-set all the pointers
      #  When a kernel is executed, remote memory has to be unmapped and
      #  remapped, meaning the memory location can change.
      for (key, arr) in prgm._bindings_data.items():
        binding = prgm._bindings[key]
        if isinstance(arr, extarray.extarray):
          arr.set_memory(binding[1], arr.data_len * arr.itemsize)
        elif isinstance(arr, numpy.ndarray) and HAS_NUMPY:
          cal_exec.set_ndarray_ptr(arr, binding[1])
      return


  def join(self, hdl):
    # TODO - do something better to differentiate