def __abs__(self): """Return a `GPUArray` of the absolute values of the elements of `self`. """ result = self._new_like_me() if self.dtype == np.float32: fname = "fabsf" elif self.dtype == np.float64: fname = "fabs" else: fname = "abs" if issubclass(self.dtype.type, np.complexfloating): from pytools import match_precision out_dtype = match_precision(np.dtype(np.float64), self.dtype) result = self._new_like_me(out_dtype) else: out_dtype = self.dtype func = elementwise.get_unary_func_kernel(fname, self.dtype, out_dtype=out_dtype) func.prepared_async_call(self._grid, self._block, None, self.gpudata, result.gpudata, self.mem_size) return result
def f(array, stream=None): result = array._new_like_me() if array.dtype == numpy.float32: func_name = name + "f" else: func_name = name func = elementwise.get_unary_func_kernel(func_name, array.dtype) func.set_block_shape(*array._block) func.prepared_async_call(array._grid, stream, array.gpudata, result.gpudata, array.mem_size) return result
def f(array, stream=None): result = array._new_like_me() if array.dtype == np.float32: func_name = name + "f" else: func_name = name func = elementwise.get_unary_func_kernel(func_name, array.dtype) func.set_block_shape(*array._block) func.prepared_async_call(array._grid, stream, array.gpudata, result.gpudata, array.mem_size) return result
def f(array, stream_or_out=None, **kwargs): if stream_or_out is not None: warnings.warn( "please use 'out' or 'stream' keyword arguments", DeprecationWarning ) if isinstance(stream_or_out, Stream): stream = stream_or_out out = None else: stream = None out = stream_or_out out, stream = None, None if "out" in kwargs: out = kwargs["out"] if "stream" in kwargs: stream = kwargs["stream"] if array.dtype == np.float32: func_name = name + "f" else: func_name = name if not array.flags.forc: raise RuntimeError( "only contiguous arrays may " "be used as arguments to this operation" ) if out is None: out = array._new_like_me() else: assert out.dtype == array.dtype assert out.strides == array.strides assert out.shape == array.shape func = elementwise.get_unary_func_kernel(func_name, array.dtype) func.prepared_async_call( array._grid, array._block, stream, array.gpudata, out.gpudata, array.mem_size, ) return out
def f(array, stream=None): result = array._new_like_me() if array.dtype == np.float32: func_name = name + "f" else: func_name = name if not array.flags.forc: raise RuntimeError("only contiguous arrays may " "be used as arguments to this operation") func = elementwise.get_unary_func_kernel(func_name, array.dtype) func.prepared_async_call(array._grid, array._block, stream, array.gpudata, result.gpudata, array.mem_size) return result
def __abs__(self): """Return a `GPUArray` of the absolute values of the elements of `self`. """ result = self._new_like_me() if self.dtype == np.float32: fname = "fabsf" elif self.dtype == np.float64: fname = "fabs" else: fname = "abs" func = elementwise.get_unary_func_kernel(fname, self.dtype) func.prepared_async_call(self._grid, self._block, None, self.gpudata, result.gpudata, self.mem_size) return result
def f(array, stream_or_out=None, **kwargs): if stream_or_out is not None: warnings.warn("please use 'out' or 'stream' keyword arguments", DeprecationWarning) if isinstance(stream_or_out, Stream): stream = stream_or_out out = None else: stream = None out = stream_or_out out, stream = None, None if 'out' in kwargs: out = kwargs['out'] if 'stream' in kwargs: stream = kwargs['stream'] if array.dtype == np.float32: func_name = name + "f" else: func_name = name if not array.flags.forc: raise RuntimeError("only contiguous arrays may " "be used as arguments to this operation") if out is None: out = array._new_like_me() else: assert out.dtype == array.dtype assert out.strides == array.strides assert out.shape == array.shape func = elementwise.get_unary_func_kernel(func_name, array.dtype) func.prepared_async_call(array._grid, array._block, stream, array.gpudata, out.gpudata, array.mem_size) return out
def init(): from pycuda import elementwise global all_kernels global exp_func global log_func all_kernels_code = { 'sign': { 'float': ("float *mat, float *target", "target[i] = (mat[i] > 0.) - (mat[i] < 0);"), 'double': ("double *mat, double *target", "target[i] = (mat[i] > 0.) - (mat[i] < 0);") }, 'sigmoid': { 'float': ( "float *mat", "mat[i] = 1. / (1. + __expf(-mat[i]))", ), 'double': ("double *mat", "mat[i] = 1. / (1. + exp(-mat[i]))") }, 'df_sigmoid': { 'float': ("float *mat, float *target", """const float f = mat[i]; target[i] = f * (1 - f); """), 'double': ("double *mat, double *target", """const double f = mat[i]; target[i] = f * (1 - f); """) }, 'tanh_inplace': { 'float': ("float *mat", "mat[i] = tanhf(mat[i]);"), 'double': ("double *mat", "mat[i] = tanh(mat[i]);") }, 'df_tanh': { 'float': ("float *mat, float *target", """float f = mat[i]; target[i] = 1 - pow(f, 2);"""), 'double': ("double *mat, double *target", """double f = mat[i]; target[i] = 1 - pow(f, 2);""") }, 'relu': { 'float': ( "float *mat", "if (mat[i] < 0.) mat[i] = 0.", ), 'double': ("double *mat", "if (mat[i] < 0.) mat[i] = 0.") }, 'df_relu': { 'float': ("float *mat, float *target", "if (mat[i] <= 0.)\n target[i] = 0.;\nelse\n target[i] = 1.;"), 'double': ("double *mat, double *target", "if (mat[i] <= 0.)\n target[i] = 0.;\nelse\n target[i] = 1.;") }, 'sample_dropout_mask': { 'float': ("float *mat, float *target, char *dropout_mask, " "float *dropout_prob_array, float dropout_probability", """if (dropout_prob_array[i] <= dropout_probability) { dropout_mask[i] = 0.; target[i] = 0.; } else { dropout_mask[i] = 1.; if (target != mat) target[i] = mat[i]; } """), 'double': ("double *mat, double *targets, char *dropout_mask, " "double *dropout_prob_array float dropout_probability", """if (dropout_prob_array[i] <= dropout_probability) { dropout_mask[i] = 0.; target[i] = 0.; } else { dropout_mask[i] = 1.; if (target != mat) target[i] = mat[i]; } """) }, 'apply_dropout_mask': { 'float': ("float *mat, char *mask", "if (mask[i] == 0.) mat[i] = 0;"), 'double': ("double *mat, char *mask", "if (mask[i] == 0.) mat[i] = 0;"), }, 'nan_to_zeros': { 'float': ("float *mat, float *target", "target[i] = isnan(mat[i]) ? 0. : mat[i];"), 'double': ("double *mat, double *target", "target[i] = isnan(mat[i]) ? 0. : mat[i];") }, 'mult_matrix': { 'float': ("const float *a, const float *b, float *c", "c[i] = a[i] * b[i];"), 'double': ("const double *b, const double *b, double *c", "c[i] = a[i] * b[i];") }, 'substract_matrix': { 'float': ("const float *a, const float *b, float *c", "c[i] = a[i] - b[i];"), 'double': ("const double *a, const double *b, double *c", "c[i] = a[i] - b[i];") } } all_kernels = { name: Kernel(name, val['float'][0], val['float'][1], val['double'][0], val['double'][1]) for name, val in all_kernels_code.iteritems() } exp_func = elementwise.get_unary_func_kernel('expf', np.float32) log_func = elementwise.get_unary_func_kernel('logf', np.float32)
def init(): from pycuda import elementwise global all_kernels global exp_func global log_func all_kernels_code = { 'sign': { 'float': ("float *mat, float *target", "target[i] = (mat[i] > 0.) - (mat[i] < 0);"), 'double': ("double *mat, double *target", "target[i] = (mat[i] > 0.) - (mat[i] < 0);") }, 'sigmoid': { 'float': ("float *mat", "mat[i] = 1. / (1. + __expf(-mat[i]))",), 'double': ("double *mat", "mat[i] = 1. / (1. + exp(-mat[i]))") }, 'df_sigmoid': { 'float': ("float *mat, float *target", """const float f = mat[i]; target[i] = f * (1 - f); """), 'double': ("double *mat, double *target", """const double f = mat[i]; target[i] = f * (1 - f); """) }, 'tanh_inplace': { 'float': ("float *mat", "mat[i] = tanhf(mat[i]);"), 'double': ("double *mat", "mat[i] = tanh(mat[i]);") }, 'df_tanh': { 'float': ("float *mat, float *target", """float f = mat[i]; target[i] = 1 - pow(f, 2);"""), 'double': ("double *mat, double *target", """double f = mat[i]; target[i] = 1 - pow(f, 2);""") }, 'relu': { 'float': ("float *mat", "if (mat[i] < 0.) mat[i] = 0.",), 'double': ("double *mat", "if (mat[i] < 0.) mat[i] = 0.") }, 'df_relu': { 'float': ("float *mat, float *target", "if (mat[i] <= 0.)\n target[i] = 0.;\nelse\n target[i] = 1.;"), 'double': ("double *mat, double *target", "if (mat[i] <= 0.)\n target[i] = 0.;\nelse\n target[i] = 1.;") }, 'sample_dropout_mask': { 'float': ("float *mat, float *target, char *dropout_mask, " "float *dropout_prob_array, float dropout_probability", """if (dropout_prob_array[i] <= dropout_probability) { dropout_mask[i] = 0.; target[i] = 0.; } else { dropout_mask[i] = 1.; if (target != mat) target[i] = mat[i]; } """), 'double': ("double *mat, double *targets, char *dropout_mask, " "double *dropout_prob_array float dropout_probability", """if (dropout_prob_array[i] <= dropout_probability) { dropout_mask[i] = 0.; target[i] = 0.; } else { dropout_mask[i] = 1.; if (target != mat) target[i] = mat[i]; } """) }, 'apply_dropout_mask': { 'float': ("float *mat, char *mask", "if (mask[i] == 0.) mat[i] = 0;"), 'double': ("double *mat, char *mask", "if (mask[i] == 0.) mat[i] = 0;"), }, 'nan_to_zeros': { 'float': ("float *mat, float *target", "target[i] = isnan(mat[i]) ? 0. : mat[i];"), 'double': ("double *mat, double *target", "target[i] = isnan(mat[i]) ? 0. : mat[i];") }, 'mult_matrix': { 'float': ("const float *a, const float *b, float *c", "c[i] = a[i] * b[i];"), 'double': ("const double *b, const double *b, double *c", "c[i] = a[i] * b[i];") }, 'substract_matrix': { 'float': ("const float *a, const float *b, float *c", "c[i] = a[i] - b[i];"), 'double': ("const double *a, const double *b, double *c", "c[i] = a[i] - b[i];") } } all_kernels = { name: Kernel(name, val['float'][0], val['float'][1], val['double'][0], val['double'][1]) for name, val in all_kernels_code.iteritems() } exp_func = elementwise.get_unary_func_kernel('expf', np.float32) log_func = elementwise.get_unary_func_kernel('logf', np.float32)
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from . import eps from .reductions import max_by_axis from .matrix import add_vec_to_mat from .reductions import matrix_sum_out_axis from .elementwise import nan_to_zeros from pycuda import cumath, gpuarray, elementwise import numpy as np exp_func = elementwise.get_unary_func_kernel('__expf', np.float32) def logsumexp(mat, tmp=None): max_dim = max_by_axis(mat, 1) if tmp is None: tmp = gpuarray.empty_like(mat) add_vec_to_mat(mat, max_dim, 0, target=tmp, substract=True) exp_func.prepared_async_call(tmp._grid, tmp._block, None, tmp.gpudata, tmp.gpudata, tmp.mem_size) # tmp = cumath.exp(tmp) tmp = matrix_sum_out_axis(tmp, 1) tmp = cumath.log(tmp) max_dim += tmp return max_dim