def test_multi_accumulate(H, N, F, T, n_cycles): reset = 1 # Create complex data xcorr = np.zeros((H, F, N, N * 2), dtype='float32') xcorr_bf = bf.ndarray(xcorr, dtype='f32', space='cuda') d = np.random.randint(64, size=(H, F, N, T, 2), dtype='int8') print "Computing Xcorr on CPU..." xcorr_cpu = compute_xcorr_cpu(d) print "Running xcorr_lite..." d_gpu = bf.ndarray(d, dtype='i8', space='cuda') _bf.XcorrLite(d_gpu.as_BFarray(), xcorr_bf.as_BFarray(), np.int32(reset)) xcorr_gpu = np.array(xcorr_bf.copy('system')) print "Testing first integration cycle..." assert np.allclose(xcorr_gpu.squeeze(), xcorr_cpu.squeeze()) print "Running loop ..." for ii in range(1, n_cycles): print "Run xcorr_lite..." reset = 0 _bf.XcorrLite(d_gpu.as_BFarray(), xcorr_bf.as_BFarray(), np.int32(reset)) print "Copy result from GPU..." xcorr_gpu = np.array(xcorr_bf.copy('system')) xcorr_cpu += compute_xcorr_cpu(d) print "Testing integration cycle %i /%i..." % (ii + 1, n_cycles) assert np.allclose(xcorr_gpu.squeeze(), xcorr_cpu.squeeze())
def _create_data(self, data_size, ntime, nchan, npol, dtype=numpy.complex64): ishape = (ntime, nchan, npol, data_size) data = numpy.zeros(shape=ishape, dtype=numpy.complex64) data_i = numpy.zeros(shape=(ntime, nchan, npol, data_size, 2), dtype=numpy.complex64) data_i[:, :, :, :, 0] = numpy.random.normal(0, 1.0, size=(ntime, nchan, npol, data_size)) data_i[:, :, :, :, 1] = numpy.random.normal(0, 1.0, size=(ntime, nchan, npol, data_size)) data = 7 * numpy.copy(data, order='C') data = bifrost.ndarray(data_i[..., 0] + 1j * data_i[..., 1]) if dtype not in (numpy.complex64, 'cf32'): data_quantized = bifrost.ndarray(shape=data.shape, dtype=dtype) quantize(data, data_quantized) data = data_quantized return data
def test_xcorr(H, N, F, T): # Create complex data reset = 1 # Create complex data xcorr = np.zeros((H, F, N, N * 2), dtype='float32') xcorr_bf = bf.ndarray(xcorr, dtype='f32', space='cuda') for ii in range(8): print "---- Iteration %i ----" % ii print "Generate test vectors..." d = np.random.randint(64, size=(H, F, N, T, 2), dtype='int8') print "Data shape: ", d.shape print "Xcorr shape:", xcorr.shape print "Computing Xcorr on CPU..." xcorr_cpu = compute_xcorr_cpu(d) print "Copying data to GPU..." d_gpu = bf.ndarray(d, dtype='i8', space='cuda') print "Run xcorr_lite..." _bf.XcorrLite(d_gpu.as_BFarray(), xcorr_bf.as_BFarray(), np.int32(reset)) print "Copy result from GPU..." xcorr_gpu = np.array(xcorr_bf.copy('system')) print "Comparing CPU to GPU..." assert np.allclose(xcorr_gpu.squeeze(), xcorr_cpu.squeeze())
def test_2d_decimate_active(self): shape = self.shape2D known_data = np.random.normal(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.empty((idata.shape[0] // 2, idata.shape[1]), dtype=idata.dtype, space='cuda') coeffs = self.coeffs * 1.0 coeffs.shape += (1, ) coeffs = np.repeat(coeffs, idata.shape[1], axis=1) coeffs.shape = (coeffs.shape[0], idata.shape[1]) coeffs = bf.ndarray(coeffs, space='cuda') fir = Fir() fir.init(coeffs, 2) fir.execute(idata, odata) fir.execute(idata, odata) odata = odata.copy('system') for i in range(known_data.shape[1]): zf = lfiltic(self.coeffs, 1.0, 0.0) known_result, zf = lfilter(self.coeffs, 1.0, known_data[:, i], zi=zf) known_result, zf = lfilter(self.coeffs, 1.0, known_data[:, i], zi=zf) known_result = known_result[0::2] compare(odata[:, i], known_result)
def test_array(testvec): # Create test vectors a = bf.ndarray(np.array(testvec), dtype='i32', space='cuda') b = bf.ndarray(np.zeros_like(testvec, dtype='float32'), dtype='f32', space='cuda') # Run kernel _bf.XcorrLiteAccumulate(a.as_BFarray(), b.as_BFarray(), 0) # Copy back from GPU b_out = b.copy('system') b_out = np.array(b_out) a_sys = a.copy('system') a_sys = np.array(a_sys) assert np.allclose(a_sys.astype('float32'), b_out) # Run kernel in a loop for ii in range(0, 100): _bf.XcorrLiteAccumulate(a.as_BFarray(), b.as_BFarray(), 0) b_out = np.array(b.copy('system')) assert np.allclose(a_sys.astype('float32') * (ii + 2), b_out) # Test reset reset = 1 _bf.XcorrLiteAccumulate(a.as_BFarray(), b.as_BFarray(), np.int32(reset)) b_out = np.array(b.copy('system')) assert np.allclose(a_sys.astype('float32'), b_out)
def test_2d_active(self): shape = self.shape2D known_data = np.random.normal(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda_managed') odata = bf.empty_like(idata) coeffs = self.coeffs * 1.0 coeffs.shape += (1, ) coeffs = np.repeat(coeffs, idata.shape[1], axis=1) coeffs.shape = (coeffs.shape[0], idata.shape[1]) coeffs = bf.ndarray(coeffs, space='cuda_managed') fir = Fir() fir.init(coeffs, 1) fir.execute(idata, odata) fir.execute(idata, odata) stream_synchronize() for i in range(known_data.shape[1]): zf = lfiltic(self.coeffs, 1.0, 0.0) known_result, zf = lfilter(self.coeffs, 1.0, known_data[:, i], zi=zf) known_result, zf = lfilter(self.coeffs, 1.0, known_data[:, i], zi=zf) compare(odata[:, i], known_result)
def run_test_r2c_dtype(self, shape, axes, dtype=np.float32, scale=1., misalign=0): known_data = np.random.normal(size=shape).astype(np.float32) known_data = (known_data * scale).astype(dtype) # Force misaligned data padded_shape = shape[:-1] + (shape[-1] + misalign, ) known_data = np.resize(known_data, padded_shape) idata = bf.ndarray(known_data, space='cuda_managed') known_data = known_data[..., misalign:] idata = idata[..., misalign:] oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda_managed') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) stream_synchronize() known_result = gold_rfftn(known_data.astype(np.float32) / scale, axes=axes) compare(odata, known_result)
def test_3d_initial(self): shape = self.shape3D known_data = np.random.normal(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.empty_like(idata) coeffs = self.coeffs * 1.0 coeffs.shape += (1, ) coeffs = np.repeat(coeffs, idata.shape[1] * idata.shape[2], axis=1) coeffs.shape = (coeffs.shape[0], idata.shape[1], idata.shape[2]) coeffs = bf.ndarray(coeffs, space='cuda') fir = Fir() fir.init(coeffs, 1) fir.execute(idata, odata) odata = odata.copy('system') for i in range(known_data.shape[1]): for j in range(known_data.shape[2]): zf = lfiltic(self.coeffs, 1.0, 0.0) known_result, zf = lfilter(self.coeffs, 1.0, known_data[:, i, j], zi=zf) compare(odata[:, i, j], known_result)
def run_test_r2c_dtype(self, shape, axes, dtype=np.float32, scale=1., misalign=0): known_data = np.random.uniform(size=shape).astype(np.float32) * 2 - 1 known_data = (known_data * scale).astype(dtype) # Force misaligned data padded_shape = shape[:-1] + (shape[-1] + misalign, ) known_data = np.resize(known_data, padded_shape) idata = bf.ndarray(known_data, space='cuda') known_data = known_data[..., misalign:] idata = idata[..., misalign:] oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) known_result = gold_rfftn(known_data.astype(np.float32) / scale, axes=axes) np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_unpack_to_cf32_test(self, iarray): oarray = bf.ndarray(shape=iarray.shape, dtype='cf32') oarray_known = bf.ndarray( [[0 + 1j, 2 + 3j], [4 + 5j, 6 + 7j], [-8 - 7j, -6 - 5j]], dtype='cf32') bf.unpack.unpack(iarray, oarray) np.testing.assert_equal(oarray, oarray_known)
def run_unpack_to_ci8_test(self, iarray): oarray = bf.ndarray(shape=iarray.shape, dtype='ci8') oarray_known = bf.ndarray([[(0, 1), (2, 3)], [(4, 5), (6, 7)], [(-8, -7), (-6, -5)]], dtype='ci8') bf.unpack.unpack(iarray, oarray) np.testing.assert_equal(oarray, oarray_known)
def run_unpack_to_ci8_test(self, iarray): oarray = bf.ndarray(shape=iarray.shape, dtype='ci8', space='cuda_managed') oarray_known = bf.ndarray([[(0, 1), (2, 3)], [(4, 5), (6, 7)], [(-8, -7), (-6, -5)]], dtype='ci8') bf.unpack(iarray.copy(space='cuda_managed'), oarray) stream_synchronize() np.testing.assert_equal(oarray, oarray_known)
def run_test_r2c(self, shape, axes): known_data = np.random.uniform(size=shape).astype(np.float32) idata = bf.ndarray(known_data, space='cuda') oshape = list(shape) oshape[axes[-1]] = shape[axes[-1]] // 2 + 1 odata = bf.ndarray(shape=oshape, dtype='cf32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) known_result = gold_rfftn(known_data, axes=axes) np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_quantize_from_cf32_test(self, out_dtype): iarray = bf.ndarray( [[0.4 + 0.5j, 1.4 + 1.5j], [2.4 + 2.5j, 3.4 + 3.5j], [4.4 + 4.5j, 5.4 + 5.5j]], dtype='cf32') oarray = bf.ndarray(shape=iarray.shape, dtype=out_dtype) oarray_known = bf.ndarray([[(0, 0), (1, 2)], [(2, 2), (3, 4)], [(4, 4), (5, 6)]], dtype=out_dtype) bf.quantize(iarray, oarray) np.testing.assert_equal(oarray, oarray_known)
def run_quantize_from_f32_test(self, out_dtype): if 'i' in out_dtype: # Signed iarray = bf.ndarray(np.arange(255) - 128, dtype='f32') oarray_known = bf.ndarray(np.arange(255) - 128, dtype=out_dtype) else: # Unsigned iarray = bf.ndarray(np.arange(255), dtype='f32') oarray_known = bf.ndarray(np.arange(255), dtype=out_dtype) oarray = bf.ndarray(shape=iarray.shape, dtype=out_dtype, space='cuda') bf.quantize.quantize(iarray.copy(space='cuda'), oarray) oarray = oarray.copy(space='system') np.testing.assert_equal(oarray, oarray_known)
def run_test_c2c_impl(self, shape, axes, inverse=False, fftshift=False): shape = list(shape) shape[-1] *= 2 # For complex known_data = np.random.uniform(size=shape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.empty_like(idata) fft = Fft() fft.init(idata, odata, axes=axes, apply_fftshift=fftshift) fft.execute(idata, odata, inverse) if inverse: if fftshift: known_data = np.fft.ifftshift(known_data, axes=axes) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [known_data.shape[d] for d in axes]) known_result = gold_ifftn(known_data, axes=axes) * norm else: known_result = gold_fftn(known_data, axes=axes) if fftshift: known_result = np.fft.fftshift(known_result, axes=axes) x = (np.abs(odata.copy('system') - known_result) / known_result > RTOL).astype(np.int32) a = odata.copy('system') b = known_result np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def _create_locs(self, data_size, ntime, nchan, npol, loc_min, loc_max): ishape = (ntime, nchan, npol, data_size) locs = numpy.random.uniform(loc_min, loc_max, size=(3, ) + ishape) locs = numpy.copy(locs.astype(numpy.int32), order='C') locs = bifrost.ndarray(locs) return locs
def run_test_c2r(self, shape, axes): ishape = list(shape) ishape[axes[-1]] = shape[axes[-1]] // 2 + 1 ishape[-1] *= 2 # For complex known_data = np.random.uniform(size=ishape).astype(np.float32).view( np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.ndarray(shape=shape, dtype='f32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes) fft.execute(idata, odata) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [shape[d] for d in axes]) known_result = gold_irfftn(known_data, axes=axes) * norm np.testing.assert_allclose(odata.copy('system'), known_result, RTOL, ATOL)
def run_simple_test(self, axes, dtype): idata = np.arange(43401).reshape((23,37,51)) % 251 iarray = bf.ndarray(idata, dtype=dtype, space='cuda') oarray = bf.empty_like(iarray.transpose(axes)) bf.transpose.transpose(oarray, iarray, axes) np.testing.assert_equal(oarray.copy('system'), idata.transpose(axes))
def _create_locs(self, data_size, ntime, nchan, npol, loc_min, loc_max): ishape = (ntime, nchan, npol, data_size) xlocs = numpy.random.uniform(loc_min, loc_max, size=ishape) ylocs = numpy.random.uniform(loc_min, loc_max, size=ishape) zlocs = numpy.random.uniform(loc_min, loc_max, size=ishape) xlocs = numpy.copy(xlocs.astype(numpy.int32), order='C') ylocs = numpy.copy(ylocs.astype(numpy.int32), order='C') zlocs = numpy.copy(zlocs.astype(numpy.int32), order='C') xlocs = bifrost.ndarray(xlocs) ylocs = bifrost.ndarray(ylocs) zlocs = bifrost.ndarray(zlocs) locs = numpy.stack((xlocs, ylocs, zlocs)) #locs = numpy.transpose(locs,(1,2,3,4,0)) locs = numpy.copy(locs.astype(numpy.int32), order='C') locs = bifrost.ndarray(locs) return locs
def test_getitem(self): g = bf.ndarray(self.known_vals, space='cuda') np.testing.assert_equal(g[0].copy('system'), self.known_array[0]) np.testing.assert_equal(g[(0, )].copy('system'), self.known_array[(0, )]) np.testing.assert_equal(int(g[0, 0]), self.known_array[0, 0]) np.testing.assert_equal(g[:1, 1:].copy('system'), self.known_array[:1, 1:])
def test_repr(self): f = bf.ndarray(self.known_vals, dtype='f32', space='cuda') repr_f = repr(f) # Note: This chops off the class name repr_f = repr_f[repr_f.find('('):] repr_k = repr(self.known_array) repr_k = repr_k[repr_k.find('('):] self.assertEqual(repr_f, repr_k)
def run_positions_test(self, grid_size, illum_size, data_size, ntime, npol, nchan, polmajor, dtype=numpy.complex64): TEST_OFFSET = 1 gridshape = (ntime, nchan, npol, grid_size, grid_size) ishape = (ntime, nchan, npol, data_size) illum_shape = (ntime, nchan, npol, data_size, illum_size, illum_size) # Create grid and illumination pattern grid = numpy.zeros(shape=gridshape, dtype=numpy.complex64) grid = numpy.copy(grid, order='C') grid = bifrost.ndarray(grid) illum = self._create_illum(illum_size, data_size, ntime, npol, nchan) # Create data data = self._create_data(data_size, ntime, nchan, npol, dtype=dtype) # Create the locations locs = self._create_locs(data_size, ntime, nchan, npol, illum_size, grid_size - illum_size) # Grid using a naive method gridnaive = self.naive_romein(gridshape, illum, data, locs[0, :] + TEST_OFFSET, locs[1, :] + TEST_OFFSET, locs[2, :] + TEST_OFFSET, ntime, npol, nchan, data_size) # Transpose for non pol-major kernels if not polmajor: data = data.transpose((0, 1, 3, 2)).copy() locs = locs.transpose((0, 1, 2, 4, 3)).copy() illum = illum.transpose((0, 1, 3, 2, 4, 5)).copy() grid = grid.copy(space='cuda') data = data.copy(space='cuda') illum = illum.copy(space='cuda') locs = locs.copy(space='cuda') self.romein.init(locs, illum, grid_size, polmajor=polmajor) # Offset locs = locs.copy(space='system') locs += TEST_OFFSET locs = locs.copy(space='cuda') self.romein.set_positions(locs) self.romein.execute(data, grid) grid = grid.copy(space="system") # Compare the two methods numpy.testing.assert_allclose(grid, gridnaive, 1e-4, 1e-5)
def run_test_c2r_impl(self, shape, axes, fftshift=False): ishape = list(shape) oshape = list(shape) ishape[axes[-1]] = shape[axes[-1]] // 2 + 1 oshape[axes[-1]] = (ishape[axes[-1]] - 1) * 2 ishape[-1] *= 2 # For complex known_data = np.random.normal(size=ishape).astype(np.float32).view(np.complex64) idata = bf.ndarray(known_data, space='cuda') odata = bf.ndarray(shape=oshape, dtype='f32', space='cuda') fft = Fft() fft.init(idata, odata, axes=axes, apply_fftshift=fftshift) fft.execute(idata, odata) # Note: Numpy applies normalization while CUFFT does not norm = reduce(lambda a, b: a * b, [shape[d] for d in axes]) if fftshift: known_data = np.fft.ifftshift(known_data, axes=axes) known_result = gold_irfftn(known_data, axes=axes) * norm compare(odata.copy('system'), known_result)
def run_simple_test(self, axes, dtype, shape): n = reduce(lambda a,b:a*b, shape) idata = (np.arange(n).reshape(shape) % 251).astype(dtype) odata_gold = idata.transpose(axes) iarray = bf.ndarray(idata, space='cuda') oarray = bf.empty_like(iarray.transpose(axes)) bf.transpose.transpose(oarray, iarray, axes) oarray = oarray.copy('system') np.testing.assert_array_equal(oarray, odata_gold)
def test_repr(self): f = bf.ndarray(self.known_vals, dtype='f32', space='cuda') repr_f = repr(f) # Note: This chops off the class name repr_f = repr_f[repr_f.find('('):] repr_k = repr(self.known_array) repr_k = repr_k[repr_k.find('('):] # Remove whitespace (for some reason the indentation differs) repr_f = repr_f.replace(' ', '') repr_k = repr_k.replace(' ', '') self.assertEqual(repr_f, repr_k)
def on_sequence(self, iseq): self.frame_count = 0 ihdr = iseq.header itensor = ihdr['_tensor'] to_raise = False if self.weights_file in ('', None): to_raise = True print('ERR: need to specify weights hickle file') else: w = hkl.load(self.weights_file) try: assert w.shape == (self.n_chan, self.n_beam, self.n_pol, self.n_ant) assert w.dtype.names[0] == 're' assert w.dtype.names[1] == 'im' assert str(w.dtype[0]) == 'int8' except AssertionError: print('ERR: beam weight shape/dtype is incorrect') print('ERR: beam weights shape is: %s' % str(w.shape)) print('ERR: shape should be %s' % str((self.n_chan, self.n_beam, self.n_pol, self.n_ant, 2))) print('ERR: dtype should be int8, dtype: %s' % w.dtype.str) to_raise = True #w = np.ones((self.n_chan, self.n_beam, self.n_pol, self.n_ant), dtype='int8') self.weights = bf.ndarray(w, dtype='ci8', space='cuda') try: assert(itensor['labels'] == ['time', 'freq', 'fine_time', 'pol', 'station']) assert(itensor['dtype'] == 'ci8') assert(ihdr['gulp_nframe'] == 1) except AssertionError: print('ERR: gulp_nframe %s (must be 1!)' % str(ihdr['gulp_nframe'])) print('ERR: Frame shape %s' % str(itensor['shape'])) print('ERR: Frame labels %s' % str(itensor['labels'])) print('ERR: Frame dtype %s' % itensor['dtype']) to_raise = True if to_raise: raise RuntimeError('Correlator block misconfiguration. Check tensor labels, dtype, shape, gulp size).') ohdr = deepcopy(ihdr) otensor = ohdr['_tensor'] otensor['dtype'] = 'cf32' # output is (time, channel, beam, fine_time) ft0, fts = itensor['scales'][2] otensor['shape'] = [itensor['shape'][0], itensor['shape'][1], self.n_beam, itensor['shape'][2] // self.n_avg] otensor['labels'] = ['time', 'freq', 'beam', 'fine_time'] otensor['scales'] = [itensor['scales'][0], itensor['scales'][1], [0, 0], [ft0, fts / self.n_avg]] otensor['units'] = [itensor['units'][0], itensor['units'][1], None, itensor['units'][2]] otensor['dtype'] = 'f32' return ohdr
def test_complex_integer(self): n = 7919 for in_dtype in ('ci4', 'ci8', 'ci16', 'ci32'): a_orig = bf.ndarray(shape=(n, ), dtype=in_dtype, space='system') try: a_orig['re'] = np.random.randint(256, size=n) a_orig['im'] = np.random.randint(256, size=n) except ValueError: # ci4 is different a_orig['re_im'] = np.random.randint(256, size=n) for out_dtype in (in_dtype, 'cf32'): a = a_orig.copy(space='cuda') b = bf.ndarray(shape=(n, ), dtype=out_dtype, space='cuda') bf.map('b(i) = a(i)', { 'a': a, 'b': b }, shape=a.shape, axis_names=('i', )) a = a.copy(space='system') try: a = a['re'] + 1j * a['im'] except ValueError: # ci4 is different a = np.int8(a['re_im'] & 0xF0) + 1j * np.int8( (a['re_im'] & 0x0F) << 4) a /= 16 b = b.copy(space='system') try: b = b['re'] + 1j * b['im'] except ValueError: # ci4 is different b = np.int8(b['re_im'] & 0xF0) + 1j * np.int8( (b['re_im'] & 0x0F) << 4) b /= 16 except IndexError: # pass through cf32 pass np.testing.assert_equal(a, b)
def _create_illum(self, illum_size, data_size, ntime, npol, nchan, dtype=numpy.complex64): illum_shape = (ntime, nchan, npol, data_size, illum_size, illum_size) illum = numpy.ones(shape=illum_shape, dtype=dtype) illum = numpy.copy(illum, order='C') illum = bifrost.ndarray(illum) return illum
def naive_romein(self, grid_shape, illum, data, xlocs, ylocs, zlocs, ntime, npol, nchan, ndata, dtype=numpy.complex64): # Unpack the ci4 data to ci8, if needed if data.dtype == ci4: data_unpacked = bifrost.ndarray(shape=data.shape, dtype='ci8') unpack(data, data_unpacked) data = data_unpacked # Create the output grid grid = bifrost.zeros(shape=grid_shape, dtype=dtype) # Combine axes xlocs_flat = xlocs.reshape(-1, ndata) ylocs_flat = ylocs.reshape(-1, ndata) data_flat = data.reshape(-1, ndata) grid_flat = grid.reshape(-1, grid.shape[-2], grid.shape[-1]) illum_flat = illum.reshape(-1, ndata, illum.shape[-2], illum.shape[-1]) # Excruciatingly slow, but it's just for testing purposes... # Could probably use a blas based function for simplicity. for tcp in range(data_flat.shape[0]): for d in range(ndata): datapoint = data_flat[tcp, d] if data.dtype != dtype: try: datapoint = dtype(datapoint[0] + 1j * datapoint[1]) except IndexError: datapoint = dtype(datapoint) #if(d==128): # print(datapoint) x_s = xlocs_flat[tcp, d] y_s = ylocs_flat[tcp, d] for y in range(y_s, y_s + illum_flat.shape[-2]): for x in range(x_s, x_s + illum_flat.shape[-1]): illump = illum_flat[tcp, d, y - y_s, x - x_s] grid_flat[tcp, y, x] += datapoint * illump return grid