def redraw(self):
        pics = self._pics_to_draw
        if pics is None or not len(pics):
            self.warning("No pics to draw")
            return None

        figure = self.pp.figure(self.name)

        n_cols = roundup(int(numpy.round(numpy.sqrt(len(pics)))),
                         self.column_align)
        n_rows = int(numpy.ceil(len(pics) / n_cols))

        i = 0
        for _row in range(n_rows):
            for _col in range(n_cols):
                ax = figure.add_subplot(n_rows, n_cols, i + 1)
                ax.axis('off')
                if len(pics[i].shape) == 3:
                    ax.imshow(pics[i], interpolation="nearest")
                else:
                    ax.imshow(pics[i], interpolation="nearest",
                              cmap=self.cm.gray)
                i += 1
                if i >= len(pics):
                    break
            if i >= len(pics):
                break

        self.show_figure(figure)
        figure.canvas.draw()
        return figure
Exemple #2
0
    def redraw(self):
        pics = self._pics_to_draw
        if pics is None or not len(pics):
            self.warning("No pics to draw")
            return None

        figure = self.pp.figure(self.name)

        n_cols = roundup(int(numpy.round(numpy.sqrt(len(pics)))),
                         self.column_align)
        n_rows = int(numpy.ceil(len(pics) / n_cols))

        i = 0
        for _row in range(n_rows):
            for _col in range(n_cols):
                ax = figure.add_subplot(n_rows, n_cols, i + 1)
                ax.axis('off')
                if len(pics[i].shape) == 3:
                    ax.imshow(pics[i], interpolation="nearest")
                else:
                    ax.imshow(pics[i],
                              interpolation="nearest",
                              cmap=self.cm.gray)
                i += 1
                if i >= len(pics):
                    break
            if i >= len(pics):
                break

        self.show_figure(figure)
        figure.canvas.draw()
        return figure
Exemple #3
0
 def _gpu_fill(self, nbytes):
     bytes_per_round = self.num_states * 16 * 8
     nbytes = roundup(nbytes, bytes_per_round)
     if nbytes > self.output.nbytes:
         raise error.Bug("nbytes > self.output.nbytes")
     self.unmap_vectors(self.states, self.output)
     self.cl_const[0] = nbytes // bytes_per_round
     self.set_arg(1, self.cl_const)
     self.execute_kernel(self._global_size, self._local_size)
    def ocl_run(self):
        global_size = (roundup(self.size, self.block_size),) * 2
        local_size = (self.block_size,) * 2
        self.device.queue_.flush()
        self.device.queue_.finish()

        def execute(repeats):
            for _ in range(repeats):
                self.execute_kernel(global_size, local_size)
            self.device.queue_.flush()
            self.device.queue_.finish()

        if self.dry_run_first:
            execute(1)

        return timeit(execute, self.repeats)[1]
Exemple #5
0
    def initialize(self, device, **kwargs):
        super(Uniform, self).initialize(device, **kwargs)

        if not self.states or self.states.size != self.num_states * 16:
            self.states.reset(numpy.empty(self.num_states * 16 * 2,
                                          dtype=numpy.uint32))
            self.states.mem[:] = self.prng.randint(0, (1 << 32) + 1,
                                                   self.states.size)

        if not self.output or self.output.nbytes < self.output_bytes:
            self.output_bytes = roundup(self.output_bytes,
                                        self.num_states * 16 * 8)
            self.output.reset(numpy.zeros(self.output_bytes, numpy.uint8))
        else:
            self.output_bytes = self.output.nbytes

        self.init_vectors(self.states, self.output)
Exemple #6
0
    def numpy_fill(self, nbytes):
        bytes_per_round = self.num_states * 16 * 8
        nbytes = roundup(nbytes, bytes_per_round)
        if nbytes > self.output.nbytes:
            raise error.Bug("nbytes > self.output.nbytes")
        self.states.map_write()
        self.output.map_invalidate()
        n_rounds = nbytes // bytes_per_round

        u64 = numpy.array([1181783497276652981], dtype=numpy.uint64)
        s0 = numpy.zeros(1, dtype=numpy.uint64)
        s1 = numpy.zeros(1, dtype=numpy.uint64)

        states = self.states.mem.view(dtype=numpy.uint64)
        states = states.reshape(states.size // 16, 16)
        output = self.output.mem.view(dtype=numpy.uint64)
        for i in range(self.num_states):
            offs = i
            s = states[i]
            self.p = 0
            for _round in range(n_rounds):
                for _iter in range(16):
                    output[offs] = self._next_rand(s, s0, s1, u64)
                    offs += self.num_states
Exemple #7
0
    def ocl_init(self):
        self.input.initialize(self.device)
        self.weights.initialize(self.device)
        self.winners.initialize(self.device)
        self.argmins.initialize(self.device)
        self._distances.initialize(self.device)
        self._coords.initialize(self.device)

        batch_size = self.input.mem.shape[0]
        chunk_size = self._neurons_number // self.device.max_group_size
        if chunk_size < 2:
            chunk_size = self._neurons_number // 2 + 1
        self.argmin_group_size = int(
            numpy.ceil(float(self._neurons_number) / chunk_size))

        block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
            kernel="matrix_multiplication", dtype=self.input.dtype)

        defines = {
            'BLOCK_SIZE':
            block_size,
            'VECTOR_OPT':
            int(bool(vector_opt)),
            'BATCH':
            batch_size,
            'SAMPLE_LENGTH':
            self._sample_length,
            'NEURONS_NUMBER':
            self._neurons_number,
            'CHUNK_SIZE':
            chunk_size,
            'GRADIENT_CHUNK_SIZE':
            self.device.max_group_size,
            'coord_type':
            "%s%d" % (opencl_types.numpy_dtype_to_opencl(
                self._coords.mem.dtype), self._coords.mem.shape[-1])
        }
        if self.weights_transposed:
            defines['WEIGHTS_TRANSPOSED'] = 1
        self.build_program(defines,
                           "%s_%d_%d_%d" %
                           (self.__class__.__name__, batch_size,
                            self._sample_length, self._neurons_number),
                           dtype=self.weights.mem.dtype)

        self.ocl_consts_ = numpy.zeros(1, dtype=self.weights.mem.dtype)

        self._krn_distances_ = self.get_kernel("calculate_distances")
        self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
                                      self._distances.devmem)

        self._krn_argmin_ = self.get_kernel("calculate_argmin")
        self._krn_argmin_.set_args(self._distances.devmem, self.argmins.devmem,
                                   self.winners.devmem)

        self._krn_gravity_ = self.get_kernel("compute_gravity")
        self._krn_gravity_.set_args(self.argmins.devmem, self._coords.devmem)
        self._krn_gravity_.set_arg(3, self._distances.devmem)

        self._krn_apply_gradient_ = self.get_kernel("apply_gradient")
        self._krn_apply_gradient_.set_args(self.input.devmem,
                                           self._distances.devmem)
        self._krn_apply_gradient_.set_arg(3, self.weights.devmem)

        self._gs_distance = [
            roundup(self._neurons_number, block_size),
            roundup(batch_size, block_size)
        ]
        self._ls_distance = [block_size, block_size]
Exemple #8
0
    def ocl_init(self):
        batch_size = self.input.mem.shape[0]
        self.output.initialize(self.device)
        if self.argmins is None:
            self.input.initialize(self.device)
            self.weights.initialize(self.device)
            self._distances.initialize(self.device)
        elif self.total is None:
            return
        if self.total is not None:
            self.total.initialize(self.device)

        copy_chunk_size = int(
            numpy.ceil(batch_size / self.device.max_group_size))
        chunk_size = self.neurons_number // self.device.max_group_size
        if chunk_size < 2:
            chunk_size = self.neurons_number // 2 + 1
        self.argmin_group_size = \
            int(numpy.ceil(self.neurons_number / chunk_size))

        block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
            kernel="matrix_multiplication", dtype=self.input.dtype)

        defines = {
            'BLOCK_SIZE': block_size,
            'VECTOR_OPT': int(bool(vector_opt)),
            'BATCH': batch_size,
            'SAMPLE_LENGTH': self.sample_length,
            'NEURONS_NUMBER': self.neurons_number,
            'CHUNK_SIZE': chunk_size,
            'COPY_CHUNK_SIZE': copy_chunk_size,
        }
        if self.weights_transposed:
            defines['WEIGHTS_TRANSPOSED'] = 1
        self.build_program(defines,
                           "%s_%d_%d_%d" %
                           (self.__class__.__name__, batch_size,
                            self.sample_length, self.neurons_number),
                           dtype=self.weights.mem.dtype)

        if self.total is not None:
            self._set_total_global_size_ = \
                [int(numpy.ceil(batch_size / copy_chunk_size))]
            self._krn_set_total_ = self.get_kernel("set_total")
            self._krn_set_total_.set_args(self.output.devmem, cl.skip,
                                          self.total.devmem)
        if self.argmins is not None:
            return

        self._krn_distances_ = self.get_kernel("calculate_distances")
        self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
                                      self._distances.devmem)

        self._krn_argmin_ = self.get_kernel("calculate_argmin")
        self._krn_argmin_.set_args(self._distances.devmem, self.output.devmem,
                                   None)

        self._gs_distance = [
            roundup(self.neurons_number, block_size),
            roundup(batch_size, block_size)
        ]
        self._ls_distance = [block_size, block_size]
Exemple #9
0
    def initialize(self, device, **kwargs):
        super(GradientDescentBase, self).initialize(device, **kwargs)

        if self.weights:
            assert len(self.weights.shape) == 2
            self.weights_shape = (tuple(reversed(self.weights.shape))
                                  if self.weights_transposed else
                                  self.weights.shape)
        else:
            self.weights_shape = None

        self.learning_rate = kwargs.get("learning_rate", self.learning_rate)
        self.weights_decay = kwargs.get("weights_decay", self.weights_decay)
        self.gradient_moment = kwargs.get("gradient_moment",
                                          self.gradient_moment)
        self.learning_rate_bias = kwargs.get("learning_rate_bias",
                                             self.learning_rate_bias)
        self.weights_decay_bias = kwargs.get("weights_decay_bias",
                                             self.weights_decay_bias)
        self.gradient_moment_bias = kwargs.get("gradient_moment_bias",
                                               self.gradient_moment_bias)

        if self.weights:
            if not self.gradient_weights:
                self.gradient_weights.reset(numpy.zeros_like(self.weights.mem))
            else:
                assert self.gradient_weights.size == self.weights.size

        if self.weights and self.accumulate_gradient:
            if not self.accumulated_gradient_weights:
                self.accumulated_gradient_weights.reset(
                    numpy.zeros_like(self.weights.mem))
            else:
                assert (self.accumulated_gradient_weights.size ==
                        self.weights.size)

        if self.weights and (self.gradient_moment or not self.is_standalone):
            if not self.gradient_weights_with_moment:
                self.gradient_weights_with_moment.reset(
                    numpy.zeros_like(self.weights.mem))
            else:
                assert self.gradient_weights_with_moment.size == \
                    self.weights.size

        if (self.include_bias and self.bias
                and (not self.gradient_bias
                     or self.gradient_bias.size != self.bias.size)):
            self.gradient_bias.reset(numpy.zeros_like(self.bias.mem))

        if (self.include_bias and self.bias and self.accumulate_gradient and
            (not self.accumulated_gradient_bias
             or self.accumulated_gradient_bias.size != self.bias.size)):
            self.accumulated_gradient_bias.reset(
                numpy.zeros_like(self.bias.mem))

        if (self.include_bias and self.bias
                and (self.gradient_moment_bias or not self.is_standalone)):
            if not self.gradient_bias_with_moment:
                self.gradient_bias_with_moment.reset(
                    numpy.zeros_like(self.bias.mem))
            else:
                assert self.gradient_bias_with_moment.size == self.bias.size

        dtype = self.err_output.dtype
        if self.need_err_input:
            if not self.err_input:
                self.err_input.reset(numpy.zeros(self.input.shape, dtype))
            else:
                assert self.err_input.shape == self.input.shape

        if self.weights:
            side = self.weights_shape[0]
            other = self.weights.size // side
            if self.factor_ortho:
                if not self.col_sums:
                    self.col_sums.reset(numpy.zeros(other, dtype=dtype))
                else:
                    assert self.col_sums.size == other
                self.col_sums.initialize(self.device)
            self.reduce_size = roundup(min(self.reduce_size, other), 32)
            self.weights.initialize(self.device)

        for vec in self.bias, self.input, self.err_input:
            if vec:
                vec.initialize(self.device)
        self.init_vectors(self.err_output, self.gradient_weights,
                          self.gradient_bias,
                          self.accumulated_gradient_weights,
                          self.accumulated_gradient_bias,
                          self.gradient_weights_with_moment,
                          self.gradient_bias_with_moment)
Exemple #10
0
    def initialize(self, device, **kwargs):
        super(GradientDescentBase, self).initialize(device, **kwargs)

        if self.weights:
            assert len(self.weights.shape) == 2
            self.weights_shape = tuple(reversed(self.weights.shape)) if self.weights_transposed else self.weights.shape
        else:
            self.weights_shape = None

        self.learning_rate = kwargs.get("learning_rate", self.learning_rate)
        self.weights_decay = kwargs.get("weights_decay", self.weights_decay)
        self.gradient_moment = kwargs.get("gradient_moment", self.gradient_moment)
        self.learning_rate_bias = kwargs.get("learning_rate_bias", self.learning_rate_bias)
        self.weights_decay_bias = kwargs.get("weights_decay_bias", self.weights_decay_bias)
        self.gradient_moment_bias = kwargs.get("gradient_moment_bias", self.gradient_moment_bias)

        if self.weights:
            if not self.gradient_weights:
                self.gradient_weights.reset(numpy.zeros_like(self.weights.mem))
            else:
                assert self.gradient_weights.size == self.weights.size

        if self.weights and self.accumulate_gradient:
            if not self.accumulated_gradient_weights:
                self.accumulated_gradient_weights.reset(numpy.zeros_like(self.weights.mem))
            else:
                assert self.accumulated_gradient_weights.size == self.weights.size

        if self.weights and (self.gradient_moment or not self.is_standalone):
            if not self.gradient_weights_with_moment:
                self.gradient_weights_with_moment.reset(numpy.zeros_like(self.weights.mem))
            else:
                assert self.gradient_weights_with_moment.size == self.weights.size

        if self.include_bias and self.bias and (not self.gradient_bias or self.gradient_bias.size != self.bias.size):
            self.gradient_bias.reset(numpy.zeros_like(self.bias.mem))

        if (
            self.include_bias
            and self.bias
            and self.accumulate_gradient
            and (not self.accumulated_gradient_bias or self.accumulated_gradient_bias.size != self.bias.size)
        ):
            self.accumulated_gradient_bias.reset(numpy.zeros_like(self.bias.mem))

        if self.include_bias and self.bias and (self.gradient_moment_bias or not self.is_standalone):
            if not self.gradient_bias_with_moment:
                self.gradient_bias_with_moment.reset(numpy.zeros_like(self.bias.mem))
            else:
                assert self.gradient_bias_with_moment.size == self.bias.size

        dtype = self.err_output.dtype
        if self.need_err_input:
            if not self.err_input:
                self.err_input.reset(numpy.zeros(self.input.shape, dtype))
            else:
                assert self.err_input.shape == self.input.shape

        if self.weights:
            side = self.weights_shape[0]
            other = self.weights.size // side
            if self.factor_ortho:
                if not self.col_sums:
                    self.col_sums.reset(numpy.zeros(other, dtype=dtype))
                else:
                    assert self.col_sums.size == other
                self.col_sums.initialize(self.device)
            self.reduce_size = roundup(min(self.reduce_size, other), 32)
            self.weights.initialize(self.device)

        for vec in self.bias, self.input, self.err_input:
            if vec:
                vec.initialize(self.device)
        self.init_vectors(
            self.err_output,
            self.gradient_weights,
            self.gradient_bias,
            self.accumulated_gradient_weights,
            self.accumulated_gradient_bias,
            self.gradient_weights_with_moment,
            self.gradient_bias_with_moment,
        )
Exemple #11
0
    def ocl_init(self):
        self.input.initialize(self.device)
        self.weights.initialize(self.device)
        self.winners.initialize(self.device)
        self.argmins.initialize(self.device)
        self._distances.initialize(self.device)
        self._coords.initialize(self.device)

        batch_size = self.input.mem.shape[0]
        chunk_size = self._neurons_number // self.device.max_group_size
        if chunk_size < 2:
            chunk_size = self._neurons_number // 2 + 1
        self.argmin_group_size = int(numpy.ceil(float(self._neurons_number) /
                                                chunk_size))

        block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
            kernel="matrix_multiplication", dtype=self.input.dtype)

        defines = {
            'BLOCK_SIZE': block_size,
            'VECTOR_OPT': int(bool(vector_opt)),
            'BATCH': batch_size,
            'SAMPLE_LENGTH': self._sample_length,
            'NEURONS_NUMBER': self._neurons_number,
            'CHUNK_SIZE': chunk_size,
            'GRADIENT_CHUNK_SIZE': self.device.max_group_size,
            'coord_type':  "%s%d" %
            (opencl_types.numpy_dtype_to_opencl(self._coords.mem.dtype),
             self._coords.mem.shape[-1])
        }
        if self.weights_transposed:
            defines['WEIGHTS_TRANSPOSED'] = 1
        self.build_program(defines, "%s_%d_%d_%d" %
                           (self.__class__.__name__,
                            batch_size, self._sample_length,
                            self._neurons_number),
                           dtype=self.weights.mem.dtype)

        self.ocl_consts_ = numpy.zeros(1, dtype=self.weights.mem.dtype)

        self._krn_distances_ = self.get_kernel("calculate_distances")
        self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
                                      self._distances.devmem)

        self._krn_argmin_ = self.get_kernel("calculate_argmin")
        self._krn_argmin_.set_args(self._distances.devmem, self.argmins.devmem,
                                   self.winners.devmem)

        self._krn_gravity_ = self.get_kernel("compute_gravity")
        self._krn_gravity_.set_args(self.argmins.devmem, self._coords.devmem)
        self._krn_gravity_.set_arg(3, self._distances.devmem)

        self._krn_apply_gradient_ = self.get_kernel("apply_gradient")
        self._krn_apply_gradient_.set_args(self.input.devmem,
                                           self._distances.devmem)
        self._krn_apply_gradient_.set_arg(3, self.weights.devmem)

        self._gs_distance = [
            roundup(self._neurons_number, block_size),
            roundup(batch_size, block_size)]
        self._ls_distance = [block_size, block_size]
Exemple #12
0
    def ocl_init(self):
        batch_size = self.input.mem.shape[0]
        self.output.initialize(self.device)
        if self.argmins is None:
            self.input.initialize(self.device)
            self.weights.initialize(self.device)
            self._distances.initialize(self.device)
        elif self.total is None:
            return
        if self.total is not None:
            self.total.initialize(self.device)

        copy_chunk_size = int(numpy.ceil(batch_size /
                                         self.device.max_group_size))
        chunk_size = self.neurons_number // self.device.max_group_size
        if chunk_size < 2:
            chunk_size = self.neurons_number // 2 + 1
        self.argmin_group_size = \
            int(numpy.ceil(self.neurons_number / chunk_size))

        block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
            kernel="matrix_multiplication", dtype=self.input.dtype)

        defines = {
            'BLOCK_SIZE': block_size,
            'VECTOR_OPT': int(bool(vector_opt)),
            'BATCH': batch_size,
            'SAMPLE_LENGTH': self.sample_length,
            'NEURONS_NUMBER': self.neurons_number,
            'CHUNK_SIZE': chunk_size,
            'COPY_CHUNK_SIZE': copy_chunk_size,
        }
        if self.weights_transposed:
            defines['WEIGHTS_TRANSPOSED'] = 1
        self.build_program(defines, "%s_%d_%d_%d" %
                           (self.__class__.__name__,
                            batch_size, self.sample_length,
                            self.neurons_number),
                           dtype=self.weights.mem.dtype)

        if self.total is not None:
            self._set_total_global_size_ = \
                [int(numpy.ceil(batch_size / copy_chunk_size))]
            self._krn_set_total_ = self.get_kernel("set_total")
            self._krn_set_total_.set_args(self.output.devmem, cl.skip,
                                          self.total.devmem)
        if self.argmins is not None:
            return

        self._krn_distances_ = self.get_kernel("calculate_distances")
        self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
                                      self._distances.devmem)

        self._krn_argmin_ = self.get_kernel("calculate_argmin")
        self._krn_argmin_.set_args(self._distances.devmem, self.output.devmem,
                                   None)

        self._gs_distance = [
            roundup(self.neurons_number, block_size),
            roundup(batch_size, block_size)]
        self._ls_distance = [block_size, block_size]