def redraw(self): pics = self._pics_to_draw if pics is None or not len(pics): self.warning("No pics to draw") return None figure = self.pp.figure(self.name) n_cols = roundup(int(numpy.round(numpy.sqrt(len(pics)))), self.column_align) n_rows = int(numpy.ceil(len(pics) / n_cols)) i = 0 for _row in range(n_rows): for _col in range(n_cols): ax = figure.add_subplot(n_rows, n_cols, i + 1) ax.axis('off') if len(pics[i].shape) == 3: ax.imshow(pics[i], interpolation="nearest") else: ax.imshow(pics[i], interpolation="nearest", cmap=self.cm.gray) i += 1 if i >= len(pics): break if i >= len(pics): break self.show_figure(figure) figure.canvas.draw() return figure
def _gpu_fill(self, nbytes): bytes_per_round = self.num_states * 16 * 8 nbytes = roundup(nbytes, bytes_per_round) if nbytes > self.output.nbytes: raise error.Bug("nbytes > self.output.nbytes") self.unmap_vectors(self.states, self.output) self.cl_const[0] = nbytes // bytes_per_round self.set_arg(1, self.cl_const) self.execute_kernel(self._global_size, self._local_size)
def ocl_run(self): global_size = (roundup(self.size, self.block_size),) * 2 local_size = (self.block_size,) * 2 self.device.queue_.flush() self.device.queue_.finish() def execute(repeats): for _ in range(repeats): self.execute_kernel(global_size, local_size) self.device.queue_.flush() self.device.queue_.finish() if self.dry_run_first: execute(1) return timeit(execute, self.repeats)[1]
def initialize(self, device, **kwargs): super(Uniform, self).initialize(device, **kwargs) if not self.states or self.states.size != self.num_states * 16: self.states.reset(numpy.empty(self.num_states * 16 * 2, dtype=numpy.uint32)) self.states.mem[:] = self.prng.randint(0, (1 << 32) + 1, self.states.size) if not self.output or self.output.nbytes < self.output_bytes: self.output_bytes = roundup(self.output_bytes, self.num_states * 16 * 8) self.output.reset(numpy.zeros(self.output_bytes, numpy.uint8)) else: self.output_bytes = self.output.nbytes self.init_vectors(self.states, self.output)
def numpy_fill(self, nbytes): bytes_per_round = self.num_states * 16 * 8 nbytes = roundup(nbytes, bytes_per_round) if nbytes > self.output.nbytes: raise error.Bug("nbytes > self.output.nbytes") self.states.map_write() self.output.map_invalidate() n_rounds = nbytes // bytes_per_round u64 = numpy.array([1181783497276652981], dtype=numpy.uint64) s0 = numpy.zeros(1, dtype=numpy.uint64) s1 = numpy.zeros(1, dtype=numpy.uint64) states = self.states.mem.view(dtype=numpy.uint64) states = states.reshape(states.size // 16, 16) output = self.output.mem.view(dtype=numpy.uint64) for i in range(self.num_states): offs = i s = states[i] self.p = 0 for _round in range(n_rounds): for _iter in range(16): output[offs] = self._next_rand(s, s0, s1, u64) offs += self.num_states
def ocl_init(self): self.input.initialize(self.device) self.weights.initialize(self.device) self.winners.initialize(self.device) self.argmins.initialize(self.device) self._distances.initialize(self.device) self._coords.initialize(self.device) batch_size = self.input.mem.shape[0] chunk_size = self._neurons_number // self.device.max_group_size if chunk_size < 2: chunk_size = self._neurons_number // 2 + 1 self.argmin_group_size = int( numpy.ceil(float(self._neurons_number) / chunk_size)) block_size, vector_opt = self.device.device_info.get_kernel_bs_vo( kernel="matrix_multiplication", dtype=self.input.dtype) defines = { 'BLOCK_SIZE': block_size, 'VECTOR_OPT': int(bool(vector_opt)), 'BATCH': batch_size, 'SAMPLE_LENGTH': self._sample_length, 'NEURONS_NUMBER': self._neurons_number, 'CHUNK_SIZE': chunk_size, 'GRADIENT_CHUNK_SIZE': self.device.max_group_size, 'coord_type': "%s%d" % (opencl_types.numpy_dtype_to_opencl( self._coords.mem.dtype), self._coords.mem.shape[-1]) } if self.weights_transposed: defines['WEIGHTS_TRANSPOSED'] = 1 self.build_program(defines, "%s_%d_%d_%d" % (self.__class__.__name__, batch_size, self._sample_length, self._neurons_number), dtype=self.weights.mem.dtype) self.ocl_consts_ = numpy.zeros(1, dtype=self.weights.mem.dtype) self._krn_distances_ = self.get_kernel("calculate_distances") self._krn_distances_.set_args(self.input.devmem, self.weights.devmem, self._distances.devmem) self._krn_argmin_ = self.get_kernel("calculate_argmin") self._krn_argmin_.set_args(self._distances.devmem, self.argmins.devmem, self.winners.devmem) self._krn_gravity_ = self.get_kernel("compute_gravity") self._krn_gravity_.set_args(self.argmins.devmem, self._coords.devmem) self._krn_gravity_.set_arg(3, self._distances.devmem) self._krn_apply_gradient_ = self.get_kernel("apply_gradient") self._krn_apply_gradient_.set_args(self.input.devmem, self._distances.devmem) self._krn_apply_gradient_.set_arg(3, self.weights.devmem) self._gs_distance = [ roundup(self._neurons_number, block_size), roundup(batch_size, block_size) ] self._ls_distance = [block_size, block_size]
def ocl_init(self): batch_size = self.input.mem.shape[0] self.output.initialize(self.device) if self.argmins is None: self.input.initialize(self.device) self.weights.initialize(self.device) self._distances.initialize(self.device) elif self.total is None: return if self.total is not None: self.total.initialize(self.device) copy_chunk_size = int( numpy.ceil(batch_size / self.device.max_group_size)) chunk_size = self.neurons_number // self.device.max_group_size if chunk_size < 2: chunk_size = self.neurons_number // 2 + 1 self.argmin_group_size = \ int(numpy.ceil(self.neurons_number / chunk_size)) block_size, vector_opt = self.device.device_info.get_kernel_bs_vo( kernel="matrix_multiplication", dtype=self.input.dtype) defines = { 'BLOCK_SIZE': block_size, 'VECTOR_OPT': int(bool(vector_opt)), 'BATCH': batch_size, 'SAMPLE_LENGTH': self.sample_length, 'NEURONS_NUMBER': self.neurons_number, 'CHUNK_SIZE': chunk_size, 'COPY_CHUNK_SIZE': copy_chunk_size, } if self.weights_transposed: defines['WEIGHTS_TRANSPOSED'] = 1 self.build_program(defines, "%s_%d_%d_%d" % (self.__class__.__name__, batch_size, self.sample_length, self.neurons_number), dtype=self.weights.mem.dtype) if self.total is not None: self._set_total_global_size_ = \ [int(numpy.ceil(batch_size / copy_chunk_size))] self._krn_set_total_ = self.get_kernel("set_total") self._krn_set_total_.set_args(self.output.devmem, cl.skip, self.total.devmem) if self.argmins is not None: return self._krn_distances_ = self.get_kernel("calculate_distances") self._krn_distances_.set_args(self.input.devmem, self.weights.devmem, self._distances.devmem) self._krn_argmin_ = self.get_kernel("calculate_argmin") self._krn_argmin_.set_args(self._distances.devmem, self.output.devmem, None) self._gs_distance = [ roundup(self.neurons_number, block_size), roundup(batch_size, block_size) ] self._ls_distance = [block_size, block_size]
def initialize(self, device, **kwargs): super(GradientDescentBase, self).initialize(device, **kwargs) if self.weights: assert len(self.weights.shape) == 2 self.weights_shape = (tuple(reversed(self.weights.shape)) if self.weights_transposed else self.weights.shape) else: self.weights_shape = None self.learning_rate = kwargs.get("learning_rate", self.learning_rate) self.weights_decay = kwargs.get("weights_decay", self.weights_decay) self.gradient_moment = kwargs.get("gradient_moment", self.gradient_moment) self.learning_rate_bias = kwargs.get("learning_rate_bias", self.learning_rate_bias) self.weights_decay_bias = kwargs.get("weights_decay_bias", self.weights_decay_bias) self.gradient_moment_bias = kwargs.get("gradient_moment_bias", self.gradient_moment_bias) if self.weights: if not self.gradient_weights: self.gradient_weights.reset(numpy.zeros_like(self.weights.mem)) else: assert self.gradient_weights.size == self.weights.size if self.weights and self.accumulate_gradient: if not self.accumulated_gradient_weights: self.accumulated_gradient_weights.reset( numpy.zeros_like(self.weights.mem)) else: assert (self.accumulated_gradient_weights.size == self.weights.size) if self.weights and (self.gradient_moment or not self.is_standalone): if not self.gradient_weights_with_moment: self.gradient_weights_with_moment.reset( numpy.zeros_like(self.weights.mem)) else: assert self.gradient_weights_with_moment.size == \ self.weights.size if (self.include_bias and self.bias and (not self.gradient_bias or self.gradient_bias.size != self.bias.size)): self.gradient_bias.reset(numpy.zeros_like(self.bias.mem)) if (self.include_bias and self.bias and self.accumulate_gradient and (not self.accumulated_gradient_bias or self.accumulated_gradient_bias.size != self.bias.size)): self.accumulated_gradient_bias.reset( numpy.zeros_like(self.bias.mem)) if (self.include_bias and self.bias and (self.gradient_moment_bias or not self.is_standalone)): if not self.gradient_bias_with_moment: self.gradient_bias_with_moment.reset( numpy.zeros_like(self.bias.mem)) else: assert self.gradient_bias_with_moment.size == self.bias.size dtype = self.err_output.dtype if self.need_err_input: if not self.err_input: self.err_input.reset(numpy.zeros(self.input.shape, dtype)) else: assert self.err_input.shape == self.input.shape if self.weights: side = self.weights_shape[0] other = self.weights.size // side if self.factor_ortho: if not self.col_sums: self.col_sums.reset(numpy.zeros(other, dtype=dtype)) else: assert self.col_sums.size == other self.col_sums.initialize(self.device) self.reduce_size = roundup(min(self.reduce_size, other), 32) self.weights.initialize(self.device) for vec in self.bias, self.input, self.err_input: if vec: vec.initialize(self.device) self.init_vectors(self.err_output, self.gradient_weights, self.gradient_bias, self.accumulated_gradient_weights, self.accumulated_gradient_bias, self.gradient_weights_with_moment, self.gradient_bias_with_moment)
def initialize(self, device, **kwargs): super(GradientDescentBase, self).initialize(device, **kwargs) if self.weights: assert len(self.weights.shape) == 2 self.weights_shape = tuple(reversed(self.weights.shape)) if self.weights_transposed else self.weights.shape else: self.weights_shape = None self.learning_rate = kwargs.get("learning_rate", self.learning_rate) self.weights_decay = kwargs.get("weights_decay", self.weights_decay) self.gradient_moment = kwargs.get("gradient_moment", self.gradient_moment) self.learning_rate_bias = kwargs.get("learning_rate_bias", self.learning_rate_bias) self.weights_decay_bias = kwargs.get("weights_decay_bias", self.weights_decay_bias) self.gradient_moment_bias = kwargs.get("gradient_moment_bias", self.gradient_moment_bias) if self.weights: if not self.gradient_weights: self.gradient_weights.reset(numpy.zeros_like(self.weights.mem)) else: assert self.gradient_weights.size == self.weights.size if self.weights and self.accumulate_gradient: if not self.accumulated_gradient_weights: self.accumulated_gradient_weights.reset(numpy.zeros_like(self.weights.mem)) else: assert self.accumulated_gradient_weights.size == self.weights.size if self.weights and (self.gradient_moment or not self.is_standalone): if not self.gradient_weights_with_moment: self.gradient_weights_with_moment.reset(numpy.zeros_like(self.weights.mem)) else: assert self.gradient_weights_with_moment.size == self.weights.size if self.include_bias and self.bias and (not self.gradient_bias or self.gradient_bias.size != self.bias.size): self.gradient_bias.reset(numpy.zeros_like(self.bias.mem)) if ( self.include_bias and self.bias and self.accumulate_gradient and (not self.accumulated_gradient_bias or self.accumulated_gradient_bias.size != self.bias.size) ): self.accumulated_gradient_bias.reset(numpy.zeros_like(self.bias.mem)) if self.include_bias and self.bias and (self.gradient_moment_bias or not self.is_standalone): if not self.gradient_bias_with_moment: self.gradient_bias_with_moment.reset(numpy.zeros_like(self.bias.mem)) else: assert self.gradient_bias_with_moment.size == self.bias.size dtype = self.err_output.dtype if self.need_err_input: if not self.err_input: self.err_input.reset(numpy.zeros(self.input.shape, dtype)) else: assert self.err_input.shape == self.input.shape if self.weights: side = self.weights_shape[0] other = self.weights.size // side if self.factor_ortho: if not self.col_sums: self.col_sums.reset(numpy.zeros(other, dtype=dtype)) else: assert self.col_sums.size == other self.col_sums.initialize(self.device) self.reduce_size = roundup(min(self.reduce_size, other), 32) self.weights.initialize(self.device) for vec in self.bias, self.input, self.err_input: if vec: vec.initialize(self.device) self.init_vectors( self.err_output, self.gradient_weights, self.gradient_bias, self.accumulated_gradient_weights, self.accumulated_gradient_bias, self.gradient_weights_with_moment, self.gradient_bias_with_moment, )
def ocl_init(self): self.input.initialize(self.device) self.weights.initialize(self.device) self.winners.initialize(self.device) self.argmins.initialize(self.device) self._distances.initialize(self.device) self._coords.initialize(self.device) batch_size = self.input.mem.shape[0] chunk_size = self._neurons_number // self.device.max_group_size if chunk_size < 2: chunk_size = self._neurons_number // 2 + 1 self.argmin_group_size = int(numpy.ceil(float(self._neurons_number) / chunk_size)) block_size, vector_opt = self.device.device_info.get_kernel_bs_vo( kernel="matrix_multiplication", dtype=self.input.dtype) defines = { 'BLOCK_SIZE': block_size, 'VECTOR_OPT': int(bool(vector_opt)), 'BATCH': batch_size, 'SAMPLE_LENGTH': self._sample_length, 'NEURONS_NUMBER': self._neurons_number, 'CHUNK_SIZE': chunk_size, 'GRADIENT_CHUNK_SIZE': self.device.max_group_size, 'coord_type': "%s%d" % (opencl_types.numpy_dtype_to_opencl(self._coords.mem.dtype), self._coords.mem.shape[-1]) } if self.weights_transposed: defines['WEIGHTS_TRANSPOSED'] = 1 self.build_program(defines, "%s_%d_%d_%d" % (self.__class__.__name__, batch_size, self._sample_length, self._neurons_number), dtype=self.weights.mem.dtype) self.ocl_consts_ = numpy.zeros(1, dtype=self.weights.mem.dtype) self._krn_distances_ = self.get_kernel("calculate_distances") self._krn_distances_.set_args(self.input.devmem, self.weights.devmem, self._distances.devmem) self._krn_argmin_ = self.get_kernel("calculate_argmin") self._krn_argmin_.set_args(self._distances.devmem, self.argmins.devmem, self.winners.devmem) self._krn_gravity_ = self.get_kernel("compute_gravity") self._krn_gravity_.set_args(self.argmins.devmem, self._coords.devmem) self._krn_gravity_.set_arg(3, self._distances.devmem) self._krn_apply_gradient_ = self.get_kernel("apply_gradient") self._krn_apply_gradient_.set_args(self.input.devmem, self._distances.devmem) self._krn_apply_gradient_.set_arg(3, self.weights.devmem) self._gs_distance = [ roundup(self._neurons_number, block_size), roundup(batch_size, block_size)] self._ls_distance = [block_size, block_size]
def ocl_init(self): batch_size = self.input.mem.shape[0] self.output.initialize(self.device) if self.argmins is None: self.input.initialize(self.device) self.weights.initialize(self.device) self._distances.initialize(self.device) elif self.total is None: return if self.total is not None: self.total.initialize(self.device) copy_chunk_size = int(numpy.ceil(batch_size / self.device.max_group_size)) chunk_size = self.neurons_number // self.device.max_group_size if chunk_size < 2: chunk_size = self.neurons_number // 2 + 1 self.argmin_group_size = \ int(numpy.ceil(self.neurons_number / chunk_size)) block_size, vector_opt = self.device.device_info.get_kernel_bs_vo( kernel="matrix_multiplication", dtype=self.input.dtype) defines = { 'BLOCK_SIZE': block_size, 'VECTOR_OPT': int(bool(vector_opt)), 'BATCH': batch_size, 'SAMPLE_LENGTH': self.sample_length, 'NEURONS_NUMBER': self.neurons_number, 'CHUNK_SIZE': chunk_size, 'COPY_CHUNK_SIZE': copy_chunk_size, } if self.weights_transposed: defines['WEIGHTS_TRANSPOSED'] = 1 self.build_program(defines, "%s_%d_%d_%d" % (self.__class__.__name__, batch_size, self.sample_length, self.neurons_number), dtype=self.weights.mem.dtype) if self.total is not None: self._set_total_global_size_ = \ [int(numpy.ceil(batch_size / copy_chunk_size))] self._krn_set_total_ = self.get_kernel("set_total") self._krn_set_total_.set_args(self.output.devmem, cl.skip, self.total.devmem) if self.argmins is not None: return self._krn_distances_ = self.get_kernel("calculate_distances") self._krn_distances_.set_args(self.input.devmem, self.weights.devmem, self._distances.devmem) self._krn_argmin_ = self.get_kernel("calculate_argmin") self._krn_argmin_.set_args(self._distances.devmem, self.output.devmem, None) self._gs_distance = [ roundup(self.neurons_number, block_size), roundup(batch_size, block_size)] self._ls_distance = [block_size, block_size]