コード例 #1
0
    def __init__(self, balljoint, texture):
        self.balljoint = balljoint
        self.tex = texture
        self.interpol = self.mod.get_function("MagneticFieldInterpolateKernel")
        self.texref = self.mod.get_texref('tex')
        drv.bind_array_to_texref(
            drv.make_multichannel_2d_array(self.tex, order="C"), self.texref)
        self.texref.set_flags(drv.TRSF_NORMALIZED_COORDINATES)
        self.texref.set_filter_mode(drv.filter_mode.LINEAR)
        self.texref.set_address_mode(0, drv.address_mode.WRAP)
        self.texref.set_address_mode(1, drv.address_mode.CLAMP)
        self.sensor_pos = balljoint.config['sensor_pos']
        self.number_of_sensors = len(self.sensor_pos)
        self.input = np.zeros((self.number_of_sensors, 3),
                              dtype=np.float32,
                              order='C')
        self.output = np.zeros((self.number_of_sensors, 3),
                               dtype=np.float32,
                               order='C')
        self.b_target = np.zeros((self.number_of_sensors, 3),
                                 dtype=np.float32,
                                 order='C')

        self.bdim = (16, 16, 1)
        dx, mx = divmod(self.number_of_sensors, self.bdim[0])
        dy, my = divmod(self.number_of_sensors, self.bdim[1])
        self.gdim = (int((dx + (mx > 0))), int((dy + (my > 0))))
        rospy.init_node('BallJointPoseestimator', anonymous=True)
        self.joint_state = rospy.Publisher('/external_joint_states',
                                           sensor_msgs.msg.JointState,
                                           queue_size=1)
コード例 #2
0
    def test_multichannel_2d_texture(self):
        mod = SourceModule("""
        #define CHANNELS 4
        texture<float4, 2, cudaReadModeElementType> mtx_tex;

        __global__ void copy_texture(float *dest)
        {
          int row = threadIdx.x;
          int col = threadIdx.y;
          int w = blockDim.y;
          float4 texval = tex2D(mtx_tex, row, col);
          dest[(row*w+col)*CHANNELS + 0] = texval.x;
          dest[(row*w+col)*CHANNELS + 1] = texval.y;
          dest[(row*w+col)*CHANNELS + 2] = texval.z;
          dest[(row*w+col)*CHANNELS + 3] = texval.w;
        }
        """)

        copy_texture = mod.get_function("copy_texture")
        mtx_tex = mod.get_texref("mtx_tex")

        shape = (5, 6)
        channels = 4
        a = np.asarray(np.random.randn(*((channels, ) + shape)),
                       dtype=np.float32,
                       order="F")
        drv.bind_array_to_texref(drv.make_multichannel_2d_array(a, order="F"),
                                 mtx_tex)

        dest = np.zeros(shape + (channels, ), dtype=np.float32)
        copy_texture(drv.Out(dest), block=shape + (1, ), texrefs=[mtx_tex])
        reshaped_a = a.transpose(1, 2, 0)
        #print reshaped_a
        #print dest
        assert la.norm(dest - reshaped_a) == 0
コード例 #3
0
ファイル: test_driver.py プロジェクト: timlegrand/pycuda
    def test_multichannel_2d_texture(self):
        mod = SourceModule(
            """
        #define CHANNELS 4
        texture<float4, 2, cudaReadModeElementType> mtx_tex;

        __global__ void copy_texture(float *dest)
        {
          int row = threadIdx.x;
          int col = threadIdx.y;
          int w = blockDim.y;
          float4 texval = tex2D(mtx_tex, row, col);
          dest[(row*w+col)*CHANNELS + 0] = texval.x;
          dest[(row*w+col)*CHANNELS + 1] = texval.y;
          dest[(row*w+col)*CHANNELS + 2] = texval.z;
          dest[(row*w+col)*CHANNELS + 3] = texval.w;
        }
        """
        )

        copy_texture = mod.get_function("copy_texture")
        mtx_tex = mod.get_texref("mtx_tex")

        shape = (5, 6)
        channels = 4
        a = np.asarray(np.random.randn(*((channels,) + shape)), dtype=np.float32, order="F")
        drv.bind_array_to_texref(drv.make_multichannel_2d_array(a, order="F"), mtx_tex)

        dest = np.zeros(shape + (channels,), dtype=np.float32)
        copy_texture(drv.Out(dest), block=shape + (1,), texrefs=[mtx_tex])
        reshaped_a = a.transpose(1, 2, 0)
        # print reshaped_a
        # print dest
        assert la.norm(dest - reshaped_a) == 0
コード例 #4
0
def create_2d_rgba_texture(a, module, variable, point_sampling=False):
    a = numpy.ascontiguousarray(a)
    out_texref = module.get_texref(variable)
    cuda.bind_array_to_texref(
        cuda.make_multichannel_2d_array(a, order='C'), out_texref)    
    if point_sampling: out_texref.set_filter_mode(cuda.filter_mode.POINT)
    else: out_texref.set_filter_mode(cuda.filter_mode.LINEAR)
    return out_texref
コード例 #5
0
ファイル: texture.py プロジェクト: hvcl/Vivaldi
def create_2d_rgba_texture(a, module, variable, point_sampling=False):
    a = numpy.ascontiguousarray(a)
    out_texref = module.get_texref(variable)
    cuda.bind_array_to_texref(
        cuda.make_multichannel_2d_array(a, order='C'), out_texref)    
    if point_sampling: out_texref.set_filter_mode(cuda.filter_mode.POINT)
    else: out_texref.set_filter_mode(cuda.filter_mode.LINEAR)
    return out_texref
コード例 #6
0
	def yt_to_function(self, tf):
		size = tf.nbins
		self.interleaved = np.asarray(np.dstack((tf.red.y.reshape(1, size),
					         	 tf.green.y.reshape(1, size),
							 tf.blue.y.reshape(1, size),
							 tf.alpha.y.reshape(1, size))),
					      dtype=np.float32, order='F')

		self.cuda_transfer_array = drv.make_multichannel_2d_array(np.asarray(self.interleaved.transpose((2,1,0)),
			 							     dtype=np.float32, order='F'), order='F')
コード例 #7
0
def tex2DToGPU(tex):
    nChannal = 1 if (len(tex.shape) == 2) else 3

    if (nChannal == 3):
        #Add padding channal
        tex = np.dstack((tex, np.ones((tex.shape[0], tex.shape[1]))))
        tex = np.ascontiguousarray(tex).astype(np.float32)
        texGPUArray = cuda.make_multichannel_2d_array(tex, 'C')
    else:
        texGPUArray = cuda.np_to_array(tex, 'C')

    return texGPUArray
コード例 #8
0
    def yt_to_function(self, tf):
        size = tf.nbins
        self.interleaved = np.asarray(np.dstack(
            (tf.red.y.reshape(1, size), tf.green.y.reshape(1, size),
             tf.blue.y.reshape(1, size), tf.alpha.y.reshape(1, size))),
                                      dtype=np.float32,
                                      order='F')

        self.cuda_transfer_array = drv.make_multichannel_2d_array(
            np.asarray(self.interleaved.transpose((2, 1, 0)),
                       dtype=np.float32,
                       order='F'),
            order='F')
コード例 #9
0
	def arrays_to_transfer_function(self, arrays):

		(r_array, g_array, b_array, a_array) = arrays
		(size, ) = r_array.shape


		self.interleaved = np.asarray(np.dstack((r_array.reshape(1, size),
										g_array.reshape(1, size),
										b_array.reshape(1, size),
										a_array.reshape(1, size))),
                                                            dtype=np.float32, order='F')
		self.cuda_transfer_array = drv.make_multichannel_2d_array(np.asarray(self.interleaved.transpose((2,1,0)),
													   dtype=np.float32, order='F'), order='F')
コード例 #10
0
    def arrays_to_transfer_function(self, arrays):

        (r_array, g_array, b_array, a_array) = arrays
        (size, ) = r_array.shape

        self.interleaved = np.asarray(np.dstack(
            (r_array.reshape(1, size), g_array.reshape(1, size),
             b_array.reshape(1, size), a_array.reshape(1, size))),
                                      dtype=np.float32,
                                      order='F')
        self.cuda_transfer_array = drv.make_multichannel_2d_array(
            np.asarray(self.interleaved.transpose((2, 1, 0)),
                       dtype=np.float32,
                       order='F'),
            order='F')
コード例 #11
0
    def __init__(self, img_path):
	super(LFapplication, self).__init__()

	#
	# Load image data
	#
	base_path = os.path.splitext(img_path)[0]
	lenslet_path = base_path + '-lenslet.txt'
	optics_path = base_path + '-optics.txt'

	with open(lenslet_path, 'r') as f:
            tmp = eval(f.readline())
	    x_offset, y_offset, right_dx, right_dy, down_dx, down_dy = \
              np.array(tmp, dtype=np.float32)

	with open(optics_path, 'r') as f:
            for line in f:
                name, val = line.strip().split()
                try:
                    setattr(self, name, np.float32(val))
                except:
                    pass

        max_angle = math.atan(self.pitch/2/self.flen)

        #
	# Prepare image
	#
	im_pil = Image.open(img_path)
        if im_pil.mode == 'RGB':
            self.NCHANNELS = 3
            w, h = im_pil.size
            im = np.zeros((h, w, 4), dtype=np.float32)
            im[:, :, :3] = np.array(im_pil).astype(np.float32)
            self.LF_dim = (ceil(h/down_dy), ceil(w/right_dx), 3)
        else:
            self.NCHANNELS = 1
            im = np.array(im_pil.getdata()).reshape(im_pil.size[::-1]).astype(np.float32)
            h, w = im.shape
            self.LF_dim = (ceil(h/down_dy), ceil(w/right_dx))

        x_start = x_offset - int(x_offset / right_dx) * right_dx
        y_start = y_offset - int(y_offset / down_dy) * down_dy
        x_ratio = self.flen * right_dx / self.pitch
        y_ratio = self.flen * down_dy / self.pitch

        #
        # Generate the cuda kernel
        #
        mod_LFview = pycuda.compiler.SourceModule(
            _kernel_tpl.render(
                newiw=self.LF_dim[1],
                newih=self.LF_dim[0],
                oldiw=w,
                oldih=h,
                x_start=x_start,
                y_start=y_start,
                x_ratio=x_ratio,
                y_ratio=y_ratio,
                x_step=right_dx,
                y_step=down_dy,
                NCHANNELS=self.NCHANNELS
                )
            )
        
        self.LFview_func = mod_LFview.get_function("LFview_kernel")
        self.texref = mod_LFview.get_texref("tex")
        
        #
	# Now generate the cuda texture
	#
        if self.NCHANNELS == 3:
            cuda.bind_array_to_texref(
                cuda.make_multichannel_2d_array(im, order="C"),
                self.texref
                )
        else:
            cuda.matrix_to_texref(im, self.texref, order="C")
            
	#
	# We could set the next if we wanted to address the image
	# in normalized coordinates ( 0 <= coordinate < 1.)
	# texref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
	#
	self.texref.set_filter_mode(cuda.filter_mode.LINEAR)

        #
	# Prepare the traits
	#
        self.add_trait('X_angle', Range(-max_angle, max_angle, 0.0))
        self.add_trait('Y_angle', Range(-max_angle, max_angle, 0.0))
	
        self.plotdata = ArrayPlotData(LF_img=self.sampleLF())
	self.LF_img = Plot(self.plotdata)
        if self.NCHANNELS == 3:
            self.LF_img.img_plot("LF_img")
        else:
            self.LF_img.img_plot("LF_img", colormap=gray)
コード例 #12
0
ファイル: pythonBPDebug.py プロジェクト: dwm203/osubp
    f.close()
    return src

src = grabsource('PyCUDABackProjectionKernel.cu')
mod = SourceModule(src, 
        include_dirs=['.','/home/aldebrn/NVIDIA_GPU_Computing_SDK/C/common/inc',
            '/home/aldebrn/matlab2009a/extern/include'])

def complex_to_2chan(x):
    return numpy.array(numpy.dstack((x.real, x.imag)), 
            dtype=numpy.float32, order='C', copy=True) 
def four_to_4chan(x,y,z,w):
    return numpy.array(numpy.dstack((x,y,z,w)), dtype=numpy.float32, order='C',copy=True)

tex_projections = mod.get_texref('tex_projections')
arr_projections = drv.make_multichannel_2d_array(complex_to_2chan(rp), order='C') 
tex_projections.set_filter_mode(drv.filter_mode.LINEAR)
tex_projections.set_array(arr_projections)


tex_platform_info = mod.get_texref('tex_platform_info')
arr_platform_info = drv.make_multichannel_2d_array(four_to_4chan(
    mdouble(data.AntX), mdouble(data.AntY), mdouble(data.AntZ), 
    mdouble(data.R0)), order='C')
tex_platform_info.set_array(arr_platform_info)

platform_info = four_to_4chan(mdouble(data.AntX), mdouble(data.AntY), 
        mdouble(data.AntZ), mdouble(data.R0))

# height, width, num_channels for order == 'C'
コード例 #13
0
def complex_to_2chan(x):
    return numpy.array(numpy.dstack((x.real, x.imag)),
                       dtype=numpy.float32,
                       order='C',
                       copy=True)


def four_to_4chan(x, y, z, w):
    return numpy.array(numpy.dstack((x, y, z, w)),
                       dtype=numpy.float32,
                       order='C',
                       copy=True)


tex_projections = mod.get_texref('tex_projections')
arr_projections = drv.make_multichannel_2d_array(complex_to_2chan(rp),
                                                 order='C')
tex_projections.set_filter_mode(drv.filter_mode.LINEAR)
tex_projections.set_array(arr_projections)

tex_platform_info = mod.get_texref('tex_platform_info')
arr_platform_info = drv.make_multichannel_2d_array(four_to_4chan(
    mdouble(data.AntX), mdouble(data.AntY), mdouble(data.AntZ),
    mdouble(data.R0)),
                                                   order='C')
tex_platform_info.set_array(arr_platform_info)

platform_info = four_to_4chan(mdouble(data.AntX), mdouble(data.AntY),
                              mdouble(data.AntZ), mdouble(data.R0))

# height, width, num_channels for order == 'C'
コード例 #14
0
ファイル: GPU_unit.py プロジェクト: Anukura/Vivaldi
def run_function(function_package, function_name):
	# global variables
	global FD
	
	# initialize variables
	fp = function_package
	func_output = fp.output
	u, ss, sp = func_output.get_id()
	data_halo = func_output.data_halo
	
	args = fp.function_args
	work_range = fp.work_range

	stream = stream_list[0]
	mod = source_module_dict[function_name]

#	cuda.memcpy_htod_async(bandwidth, numpy.float32(fp.TF_bandwidth), stream=stream)
#	cuda.memcpy_htod_async(front_back, numpy.int32(fp.front_back), stream=stream)

	tf = mod.get_texref('TFF')	
	tf1 = mod.get_texref('TFF1')
	bandwidth,_ = mod.get_global('TF_bandwidth')
	
	if fp.Sliders != None:
		sld,_ = mod.get_global('slider')
		sld_op,_ = mod.get_global('slider_opacity')

		cuda.memcpy_htod_async(sld, fp.Sliders, stream=stream)
		cuda.memcpy_htod_async(sld_op, fp.Slider_opacity, stream=stream)
	
	if fp.transN != 0:
		tf = mod.get_texref('TFF')
		tf1 = mod.get_texref('TFF1')
		bandwidth,_ = mod.get_global('TF_bandwidth')

		if fp.update_tf == 1 and fp.trans_tex != None:
			global tfTex
			tfTex = fp.trans_tex
		if fp.update_tf2 == 1 and fp.trans_tex != None:
			global tfTex2
			tfTex2  = fp.trans_tex

		tf.set_filter_mode(cuda.filter_mode.LINEAR)
		tf1.set_filter_mode(cuda.filter_mode.LINEAR)
		cuda.bind_array_to_texref(cuda.make_multichannel_2d_array(tfTex.reshape(1,256,4), order='C'), tf)
		cuda.bind_array_to_texref(cuda.make_multichannel_2d_array(tfTex2.reshape(1,256,4), order='C'), tf1)
		cuda.memcpy_htod_async(bandwidth, numpy.float32(fp.TF_bandwidth), stream=stream)
	
	cuda_args = []

	data_exist = True
	if u not in data_list: data_exist = False
	elif ss not in data_list[u]: data_exist = False
	elif sp not in data_list[u][ss]: data_exist = False

	if data_exist:
		# initialize variables
		data_package = data_list[u][ss][sp]
		dp = data_package

		if dp.devptr == None:
			wait_data_arrive(data_package, stream=stream)
		###########################
		devptr = dp.devptr
		output_range = dp.data_range
		full_output_range = dp.full_data_range
		buffer_range = dp.buffer_range
		buffer_halo = dp.buffer_halo
		
		ad = data_range_to_cuda_in(output_range, full_output_range, buffer_range, buffer_halo=buffer_halo, stream=stream)
		cuda_args += [ad]
		output_package = dp
		
		FD.append(ad)
	else:
		bytes = func_output.data_bytes
		devptr, usage = malloc_with_swap_out(bytes)
		log("created output data bytes %s"%(str(func_output.data_bytes)),'detail',log_type)
		data_range = func_output.data_range
		full_data_range = func_output.full_data_range
		buffer_range = func_output.buffer_range
		buffer_halo = func_output.buffer_halo
	
		ad = data_range_to_cuda_in(data_range, full_data_range, buffer_range, buffer_halo=buffer_halo, stream=stream)

		cuda_args += [ad]
		output_package = func_output
		output_package.set_usage(usage)
		
		if False:
			print "OUTPUT"
			print "OUTPUT_RANGE", data_range
			print "OUTPUT_FULL_RANGE",  full_data_range
			
		FD.append(ad)

	# set work range
	block, grid = range_to_block_grid(work_range)
	cuda_args = [devptr] + cuda_args
	
#	print "GPU", rank, "BEFORE RECV", time.time()
	# Recv data from other process
	for data_package in args:
		u = data_package.get_unique_id()
		data_name = data_package.data_name
		if data_name not in work_range and u != '-1':
			wait_data_arrive(data_package, stream=stream)

#	print "GPU", rank, "Recv Done", time.time()
	# set cuda arguments 
	for data_package in args:
		data_name = data_package.data_name
		data_dtype = data_package.data_dtype
		data_contents_dtype = data_package.data_contents_dtype

		u = data_package.get_unique_id()
		if data_name in work_range:
			cuda_args.append(numpy.int32(work_range[data_name][0]))
			cuda_args.append(numpy.int32(work_range[data_name][1]))
		elif u == '-1':
			data = data_package.data
			dtype = data_package.data_contents_dtype
			if dtype == 'int': 
				cuda_args.append(numpy.int32(data))
			elif dtype == 'float':
				cuda_args.append(numpy.float32(data))
			elif dtype == 'double':
				cuda_args.append(numpy.float64(data))
			else:
				cuda_args.append(numpy.float32(data)) # temp
		else:
			ss = data_package.get_split_shape()
			sp = data_package.get_split_position()
			dp = data_list[u][ss][sp] # it must be fixed to data_package later

			memory_type = dp.memory_type
			if memory_type == 'devptr':

				cuda_args.append(dp.devptr)
				data_range = dp.data_range
				full_data_range = dp.full_data_range
				if False:
					print "DP", dp.info()
					print_devptr(dp.devptr, dp)
				ad = data_range_to_cuda_in(data_range, full_data_range, data_halo=dp.data_halo, stream=stream)

				cuda_args.append(ad)
				FD.append(ad)

	# set modelview matrix
	func = mod.get_function(function_name)
	mmtx,_ = mod.get_global('modelview')
	inv_mmtx, _ = mod.get_global('inv_modelview')
	inv_m = numpy.linalg.inv(fp.mmtx)
	cuda.memcpy_htod_async(mmtx, fp.mmtx.reshape(16), stream=stream)
	cuda.memcpy_htod_async(inv_mmtx, inv_m.reshape(16), stream=stream)

	stream_list[0].synchronize()

	if log_type in ['time','all']:
		start = time.time()
	
#	st = time.time()
	kernel_finish = cuda.Event()
	func( *cuda_args, block=block, grid=grid, stream=stream_list[0])
	kernel_finish.record(stream=stream_list[0])
#	ctx.synchronize()
#	print "GPU TIME", time.time() - st
#	print "FFFFOOo", func_output.info()
#	print_devptr(cuda_args[0], func_output)
	u, ss, sp = func_output.get_id()
	target = (u,ss,sp)

	Event_dict[target] = kernel_finish
	if target not in valid_list:
		valid_list.append(target)

	#################################################################################
	# finish
	if log_type in ['time','all']:
		t = (time.time() - start)
		ms = 1000*t

		log("rank%d, %s,  \"%s\", u=%d, GPU%d function running,,, time: %.3f ms "%(rank, func_output.data_name, function_name, u, device_number,  ms),'time',log_type)
	#log("rank%d, \"%s\", GPU%d function finish "%(rank, function_name, device_number),'general',log_type)

	###################################################################################
	# decrease retain_count
	for data_package in args:
		u = data_package.get_unique_id()
		if u != '-1':
			mem_release(data_package)

#	print "Release", time.time()
	return devptr, output_package
コード例 #15
0
        sensor = ball.gen_sensors_custom(pos, [[0, 0, 0]])
        val = sensor[0].getB(magnets)
        tex[i, j, 0] = val[0]
        tex[i, j, 1] = val[1]
        tex[i, j, 2] = val[2]
        # print(val)
        x_angle_queries[k] = theta / 180.0
        y_angle_queries[k] = phi / 180.0
        k += 1

print(texture_shape)

interpol = mod.get_function("MagneticFieldInterpolateKernel")
texref = mod.get_texref('tex')

drv.bind_array_to_texref(drv.make_multichannel_2d_array(tex, order="C"),
                         texref)
texref.set_flags(drv.TRSF_NORMALIZED_COORDINATES)
texref.set_filter_mode(drv.filter_mode.LINEAR)
texref.set_address_mode(0, drv.address_mode.WRAP)
texref.set_address_mode(1, drv.address_mode.WRAP)

# number_of_queries = 100
# x_angle_queries = np.random.rand(number_of_queries)
# y_angle_queries = np.random.rand(number_of_queries)
# x_angle_queries = x_angles
# y_angle_queries = y_angles
# x_angle_queries = np.float32(np.arange(0,1,1/number_of_queries))
# y_angle_queries = np.float32(np.arange(0,1,1/number_of_queries))
# x_angle_queries = np.zeros(number_of_samples*number_of_samples,dtype=np.float32)
# y_angle_queries = np.zeros(number_of_samples*number_of_samples,dtype=np.float32)
コード例 #16
0
    def __init__(self, img_path):
        super(LFapplication, self).__init__()

        #
        # Load image data
        #
        base_path = os.path.splitext(img_path)[0]
        lenslet_path = base_path + '-lenslet.txt'
        optics_path = base_path + '-optics.txt'

        with open(lenslet_path, 'r') as f:
            tmp = eval(f.readline())
            x_offset, y_offset, right_dx, right_dy, down_dx, down_dy = \
                     np.array(tmp, dtype=np.float32)

        with open(optics_path, 'r') as f:
            for line in f:
                name, val = line.strip().split()
                try:
                    setattr(self, name, np.float32(val))
                except:
                    pass

        max_angle = math.atan(self.pitch / 2 / self.flen)

        #
        # Prepare image
        #
        im_pil = Image.open(img_path)
        if im_pil.mode == 'RGB':
            self.NCHANNELS = 3
            w, h = im_pil.size
            im = np.zeros((h, w, 4), dtype=np.float32)
            im[:, :, :3] = np.array(im_pil).astype(np.float32)
            self.LF_dim = (ceil(h / down_dy), ceil(w / right_dx), 3)
        else:
            self.NCHANNELS = 1
            im = np.array(im_pil.getdata()).reshape(im_pil.size[::-1]).astype(
                np.float32)
            h, w = im.shape
            self.LF_dim = (ceil(h / down_dy), ceil(w / right_dx))

        x_start = x_offset - int(x_offset / right_dx) * right_dx
        y_start = y_offset - int(y_offset / down_dy) * down_dy
        x_ratio = self.flen * right_dx / self.pitch
        y_ratio = self.flen * down_dy / self.pitch

        #
        # Generate the cuda kernel
        #
        mod_LFview = pycuda.compiler.SourceModule(
            _kernel_tpl.render(newiw=self.LF_dim[1],
                               newih=self.LF_dim[0],
                               oldiw=w,
                               oldih=h,
                               x_start=x_start,
                               y_start=y_start,
                               x_ratio=x_ratio,
                               y_ratio=y_ratio,
                               x_step=right_dx,
                               y_step=down_dy,
                               NCHANNELS=self.NCHANNELS))

        self.LFview_func = mod_LFview.get_function("LFview_kernel")
        self.texref = mod_LFview.get_texref("tex")

        #
        # Now generate the cuda texture
        #
        if self.NCHANNELS == 3:
            cuda.bind_array_to_texref(
                cuda.make_multichannel_2d_array(im, order="C"), self.texref)
        else:
            cuda.matrix_to_texref(im, self.texref, order="C")

        #
        # We could set the next if we wanted to address the image
        # in normalized coordinates ( 0 <= coordinate < 1.)
        # texref.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
        #
        self.texref.set_filter_mode(cuda.filter_mode.LINEAR)

        #
        # Prepare the traits
        #
        self.add_trait('X_angle', Range(-max_angle, max_angle, 0.0))
        self.add_trait('Y_angle', Range(-max_angle, max_angle, 0.0))

        self.plotdata = ArrayPlotData(LF_img=self.sampleLF())
        self.LF_img = Plot(self.plotdata)
        if self.NCHANNELS == 3:
            self.LF_img.img_plot("LF_img")
        else:
            self.LF_img.img_plot("LF_img", colormap=gray)
コード例 #17
0
ファイル: pythonBP.py プロジェクト: dwm203/osubp
R0 = float64(clight) / (2.0 * float64(a2f(data.deltaF)))
Nfft = nint(a2f(data.Nfft))
rmin = -R0 / 2.0
rmax = (Nfft * 0.5 - 1.0) * R0 / float(Nfft)

platform_info = four_to_4chan(mdouble(data.AntX), mdouble(data.AntY), 
        mdouble(data.AntZ), mdouble(data.R0))

# Load CUDA source file
src = grabsource('PyCUDABackProjectionKernel.cu')
mod = SourceModule(src, 
        include_dirs=['.'])

# Set up CUDA texture for range projections
tex_projections = mod.get_texref('tex_projections')
arr_projections = drv.make_multichannel_2d_array(complex_to_2chan(rp),
        order='C')
tex_projections.set_filter_mode(drv.filter_mode.LINEAR)
tex_projections.set_array(arr_projections)

# Run!
backprojection_loop = mod.get_function('backprojection_loop')
backprojection_loop(
        drv.Out(im), 
        nint(a2f(data.Np)),
        nint(Nimg_height),
        nfloat(delta_pixel_x), 
        nfloat(delta_pixel_y),
        nint(a2f(data.Nfft)),
        drv.In(pi_4_f0__clight),
        nfloat(numpy.min(data.x_vec)),
        nfloat(numpy.min(data.y_vec)),
コード例 #18
0
ファイル: GPU_unit.py プロジェクト: hvcl-old/Vivaldi
def run_function(function_package):
    # global variables
    global FD
    global tb_cnt

    # initialize variables
    fp = function_package
    func_output = fp.output
    u = func_output.unique_id
    ss = func_output.split_shape
    sp = func_output.split_position
    data_halo = func_output.data_halo
    function_name = fp.function_name

    args = fp.function_args
    work_range = fp.work_range

    tb_cnt = 0

    stream = stream_list[0]

    #	cuda.memcpy_htod_async(bandwidth, numpy.float32(fp.TF_bandwidth), stream=stream)
    #	cuda.memcpy_htod_async(front_back, numpy.int32(fp.front_back), stream=stream)
    if fp.update_tf == 1:
        tf.set_filter_mode(cuda.filter_mode.LINEAR)
        cuda.bind_array_to_texref(
            cuda.make_multichannel_2d_array(fp.trans_tex.reshape(1, 256, 4),
                                            order='C'), tf)
        cuda.memcpy_htod_async(bandwidth,
                               numpy.float32(fp.TF_bandwidth),
                               stream=stream)
    if fp.update_tf2 == 1:
        tf1.set_filter_mode(cuda.filter_mode.LINEAR)
        cuda.bind_array_to_texref(
            cuda.make_multichannel_2d_array(fp.trans_tex.reshape(1, 256, 4),
                                            order='C'), tf1)
        cuda.memcpy_htod_async(bandwidth,
                               numpy.float32(fp.TF_bandwidth),
                               stream=stream)

    cuda_args = []

    data_exist = True
    if u not in data_list: data_exist = False
    elif ss not in data_list[u]: data_exist = False
    elif sp not in data_list[u][ss]: data_exist = False

    if data_exist:
        # initialize variables
        data_package = data_list[u][ss][sp]
        dp = data_package

        if dp.devptr == None:
            wait_data_arrive(data_package, stream=stream)
        ###########################
        devptr = dp.devptr
        output_range = dp.data_range
        full_output_range = dp.full_data_range
        ad = data_range_to_cuda_in(output_range,
                                   full_output_range,
                                   stream=stream)
        cuda_args += [ad]
        output_package = dp

        FD.append(ad)
    else:
        bytes = func_output.buffer_bytes
        devptr, usage = malloc_with_swap_out(bytes)
        log("created output data bytes %s" % (str(func_output.buffer_bytes)),
            'detail', log_type)
        data_range = func_output.data_range
        full_data_range = func_output.full_data_range
        buffer_range = func_output.buffer_range
        buffer_halo = func_output.buffer_halo
        ad = data_range_to_cuda_in(data_range,
                                   full_data_range,
                                   buffer_range,
                                   buffer_halo=buffer_halo,
                                   stream=stream)

        cuda_args += [ad]
        output_package = func_output
        output_package.buffer_bytes = usage

        if False:
            print "OUTPUT"
            print "OUTPUT_RANGE", data_range
            print "OUTPUT_FULL_RANGE", full_data_range

        FD.append(ad)

    # set work range
    block, grid = range_to_block_grid(work_range)
    # set block and grid
    #	log("work_range "+str(work_range),'detail',log_type)
    #	log("block %s grid %s"%(str(block),str(grid)),'detail',log_type)

    cuda_args = [devptr] + cuda_args

    #	print "GPU", rank, "BEFORE RECV", time.time()
    # Recv data from other process
    for data_package in args:
        u = data_package.unique_id
        data_name = data_package.data_name
        if data_name not in work_range and u != -1:
            wait_data_arrive(data_package, stream=stream)

#	print "GPU", rank, "Recv Done", time.time()
# set cuda arguments
    for data_package in args:
        data_name = data_package.data_name
        data_dtype = data_package.data_dtype
        data_contents_dtype = data_package.data_contents_dtype

        u = data_package.unique_id

        if data_name in work_range:
            cuda_args.append(numpy.int32(work_range[data_name][0]))
            cuda_args.append(numpy.int32(work_range[data_name][1]))

        elif u == -1:
            data = data_package.data
            dtype = type(data)

            if dtype in [int]: data = numpy.float32(data)
            if dtype in [float]: data = numpy.float32(data)

            cuda_args.append(numpy.float32(data))  # temp
        else:
            ss = data_package.split_shape
            sp = data_package.split_position
            dp = data_list[u][ss][
                sp]  # it must be fixed to data_package latter

            memory_type = dp.memory_type
            if memory_type == 'devptr':

                cuda_args.append(dp.devptr)
                data_range = dp.data_range
                full_data_range = dp.full_data_range
                buffer_range = dp.buffer_range

                if False:
                    print "DATA_NAME", data_name
                    print "DATA_RANGE", data_range
                    print "FULL_DATA_RANGE", full_data_range
                    print "BUFFER_RANGE", buffer_range
                    print "DATA_HALO", dp.data_halo
                    print "BUFFER_HALO", dp.buffer_halo
                    print dp

                    print_devptr(dp.devptr, dp)
                ad = data_range_to_cuda_in(data_range,
                                           full_data_range,
                                           buffer_range,
                                           data_halo=dp.data_halo,
                                           buffer_halo=dp.buffer_halo,
                                           stream=stream)

                cuda_args.append(ad)
                FD.append(ad)

#	log("function cuda name %s"%(function_name),'detail',log_type)

#	if function_name in func_dict:
#		func = func_dict[function_name]
#	else:

# set modelview matrix
    cuda.memcpy_htod_async(mmtx, fp.mmtx.reshape(16), stream=stream)
    cuda.memcpy_htod_async(inv_mmtx, fp.inv_mmtx.reshape(16), stream=stream)

    try:
        if Debug:
            print "Function name: ", function_name
        func = mod.get_function(function_name.strip())
    except:
        print "Function not found ERROR"
        print "Function name: " + function_name
        assert (False)

    stream_list[0].synchronize()

    if log_type in ['time', 'all']:
        start = time.time()

    kernel_finish = cuda.Event()
    func(*cuda_args, block=block, grid=grid, stream=stream_list[0])
    kernel_finish.record(stream=stream_list[0])
    """
	try:
		a = numpy.empty((30,30),dtype=numpy.int32)
		cuda.memcpy_dtoh(a, cuda_args[0])
		print a[10:-10,10:-10]
	except:
		print "Fail", function_name
		print "Fp.output", fp.output
		pass
	"""

    u = func_output.unique_id
    ss = func_output.split_shape
    sp = func_output.split_position

    target = (u, ss, sp)

    Event_dict[target] = kernel_finish
    if target not in valid_list:
        valid_list.append(target)

    #################################################################################
    # finish
    if log_type in ['time', 'all']:
        t = (time.time() - start)
        ms = 1000 * t

        log(
            "rank%d, %s,  \"%s\", u=%d, GPU%d function running,,, time: %.3f ms "
            %
            (rank, func_output.data_name, function_name, u, device_number, ms),
            'time', log_type)
    #log("rank%d, \"%s\", GPU%d function finish "%(rank, function_name, device_number),'general',log_type)

    ###################################################################################
    # decrease retain_count
    for data_package in args:
        u = data_package.unique_id
        if u != -1:
            mem_release(data_package)

#	print "Release", time.time()
    return devptr, output_package
コード例 #19
0
# tex = np.float32(np.random.rand(number_of_samples, number_of_samples,4))
# tex = np.float32(np.arange(number_of_samples*number_of_samples*4).reshape(number_of_samples,number_of_samples,4))
tex = np.zeros((number_of_samples,number_of_samples,4),dtype=np.float32)
for i in range(number_of_samples):
    for j in range(number_of_samples):
        tex[i,j,0] = sin(x_angles[i]*pi)*cos(y_angles[j]*pi)
        tex[i,j,1] = sin(x_angles[i]*pi)*sin(y_angles[j]*pi)
        tex[i,j,2] = cos(x_angles[i]*pi)

# print(tex)

interpol = mod.get_function("MagneticFieldInterpolateKernel")
texref = mod.get_texref('tex')

drv.bind_array_to_texref(
                drv.make_multichannel_2d_array(tex, order="C"),
                texref
                )
texref.set_flags(drv.TRSF_NORMALIZED_COORDINATES)
texref.set_filter_mode(drv.filter_mode.LINEAR)

# number_of_queries = 100
# x_angle_queries = np.random.rand(number_of_queries)
# y_angle_queries = np.random.rand(number_of_queries)
# x_angle_queries = x_angles
# y_angle_queries = y_angles+0.1
# x_angle_queries = np.float32(np.arange(0,1,1/number_of_queries))
# y_angle_queries = np.float32(np.arange(0,1,1/number_of_queries))
x_angle_queries = np.zeros(number_of_samples*number_of_samples,dtype=np.float32)
y_angle_queries = np.zeros(number_of_samples*number_of_samples,dtype=np.float32)
k = 0
コード例 #20
0
def run_function(function_package):
	# global variables
	global FD
	global tb_cnt
	
	# initialize variables
	fp = function_package
	func_output = fp.output
	u = func_output.unique_id
	ss = func_output.split_shape
	sp = func_output.split_position
	data_halo = func_output.data_halo
	function_name = fp.function_name

	args = fp.function_args
	work_range = fp.work_range

	tb_cnt = 0
	
	stream = stream_list[0]

#	cuda.memcpy_htod_async(bandwidth, numpy.float32(fp.TF_bandwidth), stream=stream)
#	cuda.memcpy_htod_async(front_back, numpy.int32(fp.front_back), stream=stream)
	if fp.update_tf == 1:
		tf.set_filter_mode(cuda.filter_mode.LINEAR)
		cuda.bind_array_to_texref(cuda.make_multichannel_2d_array(fp.trans_tex.reshape(1,256,4), order='C'), tf)
		cuda.memcpy_htod_async(bandwidth, numpy.float32(fp.TF_bandwidth), stream=stream)
	if fp.update_tf2 == 1:
		tf1.set_filter_mode(cuda.filter_mode.LINEAR)
		cuda.bind_array_to_texref(cuda.make_multichannel_2d_array(fp.trans_tex.reshape(1,256,4), order='C'), tf1)
		cuda.memcpy_htod_async(bandwidth, numpy.float32(fp.TF_bandwidth), stream=stream)
	
	cuda_args = []

	data_exist = True
	if u not in data_list: data_exist = False
	elif ss not in data_list[u]: data_exist = False
	elif sp not in data_list[u][ss]: data_exist = False

	if data_exist:
		# initialize variables
		data_package = data_list[u][ss][sp]
		dp = data_package

		if dp.devptr == None:
			wait_data_arrive(data_package, stream=stream)
		###########################
		devptr = dp.devptr
		output_range = dp.data_range
		full_output_range = dp.full_data_range
		ad = data_range_to_cuda_in(output_range, full_output_range, stream=stream)
		cuda_args += [ad]
		output_package = dp
		
		FD.append(ad)
	else:
		bytes = func_output.buffer_bytes
		devptr, usage = malloc_with_swap_out(bytes)
		log("created output data bytes %s"%(str(func_output.buffer_bytes)),'detail',log_type)
		data_range = func_output.data_range
		full_data_range = func_output.full_data_range
		buffer_range = func_output.buffer_range
		buffer_halo = func_output.buffer_halo
		ad = data_range_to_cuda_in(data_range, full_data_range, buffer_range, buffer_halo=buffer_halo, stream=stream)

		cuda_args += [ad]
		output_package = func_output
		output_package.buffer_bytes = usage
		
		if False:
			print "OUTPUT"
			print "OUTPUT_RANGE", data_range
			print "OUTPUT_FULL_RANGE",  full_data_range
			
		FD.append(ad)

	# set work range
	block, grid = range_to_block_grid(work_range)
	# set block and grid
#	log("work_range "+str(work_range),'detail',log_type)
#	log("block %s grid %s"%(str(block),str(grid)),'detail',log_type)

	cuda_args = [devptr] + cuda_args

#	print "GPU", rank, "BEFORE RECV", time.time()
	# Recv data from other process
	for data_package in args:
		u = data_package.unique_id
		data_name = data_package.data_name
		if data_name not in work_range and u != -1:
			wait_data_arrive(data_package, stream=stream)

#	print "GPU", rank, "Recv Done", time.time()
	# set cuda arguments 
	for data_package in args:
		data_name = data_package.data_name
		data_dtype = data_package.data_dtype
		data_contents_dtype = data_package.data_contents_dtype

		u = data_package.unique_id

		if data_name in work_range:
			cuda_args.append( numpy.int32(work_range[data_name][0]))
			cuda_args.append( numpy.int32(work_range[data_name][1]))

		elif u == -1:
			data = data_package.data
			dtype = type(data)

			if dtype in [int]: data = numpy.float32(data)
			if dtype in [float]: data = numpy.float32(data)

			cuda_args.append(numpy.float32(data)) # temp
		else:
			ss = data_package.split_shape
			sp = data_package.split_position
			dp = data_list[u][ss][sp] # it must be fixed to data_package latter

			memory_type = dp.memory_type
			if memory_type == 'devptr':

				cuda_args.append(dp.devptr)
				data_range = dp.data_range
				full_data_range = dp.full_data_range
				buffer_range = dp.buffer_range
				
				if False:
					print "DATA_NAME", data_name
					print "DATA_RANGE", data_range
					print "FULL_DATA_RANGE", full_data_range
					print "BUFFER_RANGE", buffer_range
					print "DATA_HALO", dp.data_halo
					print "BUFFER_HALO", dp.buffer_halo
					print dp
	
					print_devptr(dp.devptr, dp)
				ad = data_range_to_cuda_in(data_range, full_data_range, buffer_range, data_halo=dp.data_halo, buffer_halo=dp.buffer_halo, stream=stream)

				cuda_args.append(ad)
				FD.append(ad)
		
#	log("function cuda name %s"%(function_name),'detail',log_type)

#	if function_name in func_dict:
#		func = func_dict[function_name]
#	else:


	# set modelview matrix
	cuda.memcpy_htod_async(mmtx, fp.mmtx.reshape(16), stream=stream)
	cuda.memcpy_htod_async(inv_mmtx, fp.inv_mmtx.reshape(16), stream=stream)

	try:
		if Debug:
			print "Function name: ", function_name
		func = mod.get_function(function_name.strip())
	except:
		print "Function not found ERROR"
		print "Function name: " + function_name
		assert(False)
	
	stream_list[0].synchronize()

	if log_type in ['time','all']:
		start = time.time()

	kernel_finish = cuda.Event()
	func( *cuda_args, block=block, grid=grid, stream=stream_list[0])
	kernel_finish.record(stream=stream_list[0])

	"""
	try:
		a = numpy.empty((30,30),dtype=numpy.int32)
		cuda.memcpy_dtoh(a, cuda_args[0])
		print a[10:-10,10:-10]
	except:
		print "Fail", function_name
		print "Fp.output", fp.output
		pass
	"""
	
	u = func_output.unique_id
	ss = func_output.split_shape
	sp = func_output.split_position

	target = (u,ss,sp)

	Event_dict[target] = kernel_finish
	if target not in valid_list:
		valid_list.append(target)

	#################################################################################
	# finish
	if log_type in ['time','all']:
		t = (time.time() - start)
		ms = 1000*t

		log("rank%d, %s,  \"%s\", u=%d, GPU%d function running,,, time: %.3f ms "%(rank, func_output.data_name, function_name, u, device_number,  ms),'time',log_type)
	#log("rank%d, \"%s\", GPU%d function finish "%(rank, function_name, device_number),'general',log_type)

	###################################################################################
	# decrease retain_count
	for data_package in args:
		u = data_package.unique_id
		if u != -1:
			mem_release(data_package)

#	print "Release", time.time()
	return devptr, output_package