def _make_trf_conj(self): return Transformation( [ Parameter('output', Annotation(self._system.unitary, 'o')), Parameter('input', Annotation(self._system.unitary, 'i')) ], """ ${output.store_same}(${conj}(${input.load_same})); """, render_kwds=dict(conj=functions.conj(self._system.unitary.dtype)))
def _build_plan(self, plan_factory, device_params, output, input_): plan = plan_factory() N = (input_.shape[-1] - 1) * 2 WNmk = numpy.exp(-2j * numpy.pi * numpy.arange(N//2) / N) A = 0.5 * (1 - 1j * WNmk) B = 0.5 * (1 + 1j * WNmk) A_arr = plan.persistent_array(A.conj()) B_arr = plan.persistent_array(B.conj()) cfft_arr = Type(input_.dtype, input_.shape[:-1] + (N // 2,)) cfft = FFT(cfft_arr, axes=(len(input_.shape) - 1,)) prepare_output = prepare_irfft_output(cfft.parameter.output) cfft.parameter.output.connect( prepare_output, prepare_output.input, real_output=prepare_output.output) temp = plan.temp_array_like(cfft.parameter.input) batch_size = helpers.product(output.shape[:-1]) plan.kernel_call( TEMPLATE.get_def('prepare_irfft_input'), [temp, input_, A_arr, B_arr], global_size=(batch_size, N // 2), render_kwds=dict( slices=(len(input_.shape) - 1, 1), N=N, mul=functions.mul(input_.dtype, input_.dtype), conj=functions.conj(input_.dtype))) plan.computation_call(cfft, output, temp, inverse=True) return plan
def _build_plan(self, plan_factory, device_params, output, input_): plan = plan_factory() N = (input_.shape[-1] - 1) * 2 WNmk = numpy.exp(-2j * numpy.pi * numpy.arange(N // 2) / N) A = 0.5 * (1 - 1j * WNmk) B = 0.5 * (1 + 1j * WNmk) A_arr = plan.persistent_array(A.conj()) B_arr = plan.persistent_array(B.conj()) cfft_arr = Type(input_.dtype, input_.shape[:-1] + (N // 2, )) cfft = FFT(cfft_arr, axes=(len(input_.shape) - 1, )) prepare_output = prepare_irfft_output(cfft.parameter.output) cfft.parameter.output.connect(prepare_output, prepare_output.input, real_output=prepare_output.output) temp = plan.temp_array_like(cfft.parameter.input) batch_size = helpers.product(output.shape[:-1]) plan.kernel_call(TEMPLATE.get_def('prepare_irfft_input'), [temp, input_, A_arr, B_arr], global_size=(batch_size, N // 2), render_kwds=dict(slices=(len(input_.shape) - 1, 1), N=N, mul=functions.mul( input_.dtype, input_.dtype), conj=functions.conj(input_.dtype))) plan.computation_call(cfft, output, temp, inverse=True) return plan
def _build_plan(self, plan_factory, device_params, alpha, beta, alpha_i, beta_i, seed): plan = plan_factory() system = self._system representation = self._representation unitary = plan.persistent_array(self._system.unitary) needs_noise_matrix = representation != Representation.POSITIVE_P and system.needs_noise_matrix( ) mmul = MatrixMul(alpha, unitary, transposed_b=True) if not needs_noise_matrix: # TODO: this could be sped up for repr != POSITIVE_P, # since in that case alpha == conj(beta), and we don't need to do two multuplications. mmul_beta = MatrixMul(beta, unitary, transposed_b=True) trf_conj = self._make_trf_conj() mmul_beta.parameter.matrix_b.connect(trf_conj, trf_conj.output, matrix_b_p=trf_conj.input) plan.computation_call(mmul, alpha, alpha_i, unitary) plan.computation_call(mmul_beta, beta, beta_i, unitary) else: noise_matrix = system.noise_matrix() noise_matrix_dev = plan.persistent_array(noise_matrix) # If we're here, it's not positive-P, and alpha == conj(beta). # This means we can just calculate alpha, and then build beta from it. w = plan.temp_array_like(alpha) temp_alpha = plan.temp_array_like(alpha) plan.computation_call(mmul, temp_alpha, alpha_i, unitary) bijection = philox(64, 2) # Keeping the kernel the same so it can be cached. # The seed will be passed as the computation parameter instead. keygen = KeyGenerator.create(bijection, seed=numpy.int32(0)) sampler = normal_bm(bijection, numpy.float64) plan.kernel_call(TEMPLATE.get_def("generate_apply_matrix_noise"), [w, seed], kernel_name="generate_apply_matrix_noise", global_size=alpha.shape, render_kwds=dict( bijection=bijection, keygen=keygen, sampler=sampler, mul_cr=functions.mul(numpy.complex128, numpy.float64), add_cc=functions.add(numpy.complex128, numpy.complex128), )) noise = plan.temp_array_like(alpha) plan.computation_call(mmul, noise, w, noise_matrix_dev) plan.kernel_call(TEMPLATE.get_def("add_noise"), [alpha, beta, temp_alpha, noise], kernel_name="add_noise", global_size=alpha.shape, render_kwds=dict( add=functions.add(numpy.complex128, numpy.complex128), conj=functions.conj(numpy.complex128))) return plan
def test_conj(thr, out_code, in_codes): out_dtype, in_dtypes = generate_dtypes(out_code, in_codes) check_func(thr, functions.conj(in_dtypes[0]), numpy.conj, out_dtype, in_dtypes)
def test_conj(thr, out_code, in_codes): out_dtype, in_dtypes = generate_dtypes(out_code, in_codes) check_func( thr, functions.conj(in_dtypes[0]), numpy.conj, out_dtype, in_dtypes)