def derive_out_dtype(out_dtype, *in_dtypes): expected_dtype = dtypes.result_type(*in_dtypes) if out_dtype is None: out_dtype = expected_dtype else: check_information_loss(out_dtype, expected_dtype) return out_dtype
def get_kprop_trf(state_arr, ksquared_arr, coeffs, exp=False): compound_dtype = dtypes.result_type(coeffs.dtype, ksquared_arr.dtype) return Transformation( [ Parameter('output', Annotation(state_arr, 'o')), Parameter('input', Annotation(state_arr, 'i')), Parameter('ksquared', Annotation(ksquared_arr, 'i')), Parameter('dt', Annotation(ksquared_arr.dtype))], """ %if max(coeffs.values) > 0: ${ksquared.ctype} ksquared = ${ksquared.load_idx}(${', '.join(idxs[2:])}); %endif ${dtypes.ctype(compound_dtype)} compound_coeff = ${dtypes.c_constant(0, compound_dtype)}; %for pwr, values in coeffs.values.items(): { ${dtypes.ctype(coeffs.dtype)} value; %for comp in range(output.shape[1]): ${'if' if comp == 0 else 'else if'} (${idxs[1]} == ${comp}) { value = ${dtypes.c_constant(values[comp], coeffs.dtype)}; } %endfor compound_coeff = compound_coeff + ${mul_kc}( %if pwr == 0: ${dt} %elif pwr == 2: -ksquared * ${dt} %else: pow(-ksquared, ${pwr // 2}) * ${dt} %endif , value ); } %endfor ${output.store_same}(${mul_ic}( ${input.load_same}, %if exp is not None: ${exp}(compound_coeff) %else: compound_coeff %endif )); """, render_kwds=dict( coeffs=coeffs, compound_dtype=compound_dtype, mul_ic=functions.mul(state_arr.dtype, compound_dtype, out_dtype=state_arr.dtype), mul_kc=functions.mul(ksquared_arr.dtype, coeffs.dtype, out_dtype=compound_dtype), exp=functions.exp(compound_dtype) if exp else None))
def generate_dtypes(out_code, in_codes): test_dtype = lambda idx: dict(i=numpy.int32, f=numpy.float32, c=numpy.complex64)[idx] in_dtypes = list(map(test_dtype, in_codes)) out_dtype = dtypes.result_type(*in_dtypes) if out_code == 'auto' else test_dtype(out_code) if not any(map(dtypes.is_double, in_dtypes)): # numpy thinks that int32 * float32 == float64, # but we still need to run this test on older videocards if dtypes.is_complex(out_dtype): out_dtype = numpy.complex64 elif dtypes.is_real(out_dtype): out_dtype = numpy.float32 return out_dtype, in_dtypes
def generate_dtypes(out_code, in_codes): test_dtype = lambda idx: dict( i=numpy.int32, f=numpy.float32, c=numpy.complex64)[idx] in_dtypes = list(map(test_dtype, in_codes)) out_dtype = dtypes.result_type( *in_dtypes) if out_code == 'auto' else test_dtype(out_code) if not any(map(dtypes.is_double, in_dtypes)): # numpy thinks that int32 * float32 == float64, # but we still need to run this test on older videocards if dtypes.is_complex(out_dtype): out_dtype = numpy.complex64 elif dtypes.is_real(out_dtype): out_dtype = numpy.float32 return out_dtype, in_dtypes
def __init__(self, a_arr, b_arr, out_arr=None, block_width_override=None, transposed_a=False, transposed_b=False): if len(a_arr.shape) == 1: a_arr = Type(a_arr.dtype, shape=(1, ) + a_arr.shape) if len(b_arr.shape) == 1: b_arr = Type(b_arr.dtype, shape=b_arr.shape + (1, )) a_batch_shape = a_arr.shape[:-2] b_batch_shape = b_arr.shape[:-2] a_outer_size = a_arr.shape[-1 if transposed_a else -2] convolution_size = a_arr.shape[-2 if transposed_a else -1] b_outer_size = b_arr.shape[-2 if transposed_b else -1] if out_arr is None: out_dtype = dtypes.result_type(a_arr.dtype, b_arr.dtype) batch_len = max(len(a_batch_shape), len(b_batch_shape)) batch_shape = b_batch_shape if helpers.product( a_batch_shape) == 1 else a_batch_shape batch_shape = (1, ) * (batch_len - len(batch_shape)) + batch_shape out_shape = batch_shape + (a_outer_size, b_outer_size) out_arr = Type(out_dtype, shape=out_shape) Computation.__init__(self, [ Parameter('output', Annotation(out_arr, 'o')), Parameter('matrix_a', Annotation(a_arr, 'i')), Parameter('matrix_b', Annotation(b_arr, 'i')) ]) self._block_width_override = block_width_override self._a_outer_size = a_outer_size self._convolution_size = convolution_size self._b_outer_size = b_outer_size self._transposed_a = transposed_a self._transposed_b = transposed_b
def __init__(self, a_arr, b_arr, out_arr=None, block_width_override=None, transposed_a=False, transposed_b=False): if len(a_arr.shape) == 1: a_arr = Type(a_arr.dtype, shape=(1,) + a_arr.shape) if len(b_arr.shape) == 1: b_arr = Type(b_arr.dtype, shape=b_arr.shape + (1,)) a_batch_shape = a_arr.shape[:-2] b_batch_shape = b_arr.shape[:-2] a_outer_size = a_arr.shape[-1 if transposed_a else -2] convolution_size = a_arr.shape[-2 if transposed_a else -1] b_outer_size = b_arr.shape[-2 if transposed_b else -1] if out_arr is None: out_dtype = dtypes.result_type(a_arr.dtype, b_arr.dtype) batch_len = max(len(a_batch_shape), len(b_batch_shape)) batch_shape = b_batch_shape if helpers.product(a_batch_shape) == 1 else a_batch_shape batch_shape = (1,) * (batch_len - len(batch_shape)) + batch_shape out_shape = batch_shape + (a_outer_size, b_outer_size) out_arr = Type(out_dtype, shape=out_shape) Computation.__init__(self, [ Parameter('output', Annotation(out_arr, 'o')), Parameter('matrix_a', Annotation(a_arr, 'i')), Parameter('matrix_b', Annotation(b_arr, 'i'))]) self._block_width_override = block_width_override self._a_outer_size = a_outer_size self._convolution_size = convolution_size self._b_outer_size = b_outer_size self._transposed_a = transposed_a self._transposed_b = transposed_b