Exemple #1
0
    def initialize(self, device, **kwargs):
        super(FullBatchLoader, self).initialize(device=device, **kwargs)
        assert self.total_samples > 0
        self.analyze_original_dataset()
        self._map_original_labels()

        if isinstance(self.device, NumpyDevice):
            return

        self.info("Will try to store the entire dataset on the device")
        try:
            self.init_vectors(self.original_data, self.minibatch_data)
        except CLRuntimeError as e:
            if e.code == CL_MEM_OBJECT_ALLOCATION_FAILURE:
                self.warning("Failed to store the entire dataset on the " "device")
                self.force_numpy = True
                self.device = NumpyDevice()
                return
            else:
                raise from_none(e)
        except CUDARuntimeError as e:
            if e.code == CUDA_ERROR_OUT_OF_MEMORY:
                self.warning("Failed to store the entire dataset on the " "device")
                self.force_numpy = True
                self.device = NumpyDevice()
                return
            else:
                raise from_none(e)
        if self.has_labels:
            self.init_vectors(self._mapped_original_labels_, self.minibatch_labels)

        if not self.shuffled_indices:
            self.shuffled_indices.mem = numpy.arange(self.total_samples, dtype=Loader.LABEL_DTYPE)
        self.init_vectors(self.shuffled_indices, self.minibatch_indices)
    def initialize(self, device, **kwargs):
        if device is None:
            raise ValueError("device may not be None")
        if not isinstance(device, Device):
            raise TypeError("deviec must be of type %s" % Device)
        if self._force_numpy:
            device = NumpyDevice()

        # Scan class hierarchy
        checked = []  # this is just for exception message
        for cls in type(device).mro():
            if not hasattr(cls, "BACKEND"):
                continue
            checked.append(cls)
            try:
                self.verify_interface(INTERFACE_MAPPING[cls])
                break
            except NotImplementedError:
                pass
        else:
            raise NotImplementedError("%s does not implement any of %s" %
                                      (type(self), checked))

        if not device.is_attached(self.thread_pool):
            device.thread_pool_attach(self.thread_pool)
        try:
            super(AcceleratedUnit, self).initialize(**kwargs)
        except AttributeError:
            pass
        self.device = device
        self.intel_opencl_workaround = (
            self.intel_opencl_workaround and
            isinstance(device, OpenCLDevice) and
            device.device_info.is_cpu)
        if isinstance(self.device, NumpyDevice) and \
                not self._numpy_run_jitted_ and \
                not root.common.engine.disable_numba:
            if jit is None and root.common.warnings.numba:
                self.warning(
                    "Numba (http://numba.pydata.org) was not found, "
                    "numpy_run() is going to be slower. Ignore this warning "
                    "by setting root.common.warnings.numba to False.")
            else:
                self.numpy_run = jit(nopython=True, nogil=True)(self.numpy_run)
                self.debug("Jitted numpy_run() with numba")
                self._numpy_run_jitted_ = True
Exemple #3
0
 def _after_backend_init(self):
     try:
         self.fill_indices(0, min(self.max_minibatch_size, self.total_samples))
     except CLRuntimeError as e:
         if e.code == CL_MEM_OBJECT_ALLOCATION_FAILURE:
             self.warning("Failed to store the entire dataset on the " "device")
             self.force_numpy = True
             self.device = NumpyDevice()
         else:
             raise from_none(e)
     except CUDARuntimeError as e:
         if e.code == CUDA_ERROR_OUT_OF_MEMORY:
             self.warning("Failed to store the entire dataset on the " "device")
             self.force_numpy = True
             self.device = NumpyDevice()
         else:
             raise from_none(e)
 def __init__(self, workflow, **kwargs):
     super(AcceleratedUnit, self).__init__(workflow, **kwargs)
     self._device = NumpyDevice()
     self._cache = kwargs.get("cache", True)
     # Yup, this is right - self._force_numpy is initialized in
     # init_unpickled
     self._force_numpy = kwargs.get("force_numpy", self._force_numpy)
     self.intel_opencl_workaround = \
         root.common.engine.force_numpy_run_on_intel_opencl
Exemple #5
0
 def _after_backend_init(self):
     try:
         self.fill_indices(0,
                           min(self.max_minibatch_size, self.total_samples))
     except CLRuntimeError as e:
         if e.code == CL_MEM_OBJECT_ALLOCATION_FAILURE:
             self.warning("Failed to store the entire dataset on the "
                          "device")
             self.force_numpy = True
             self.device = NumpyDevice()
         else:
             raise from_none(e)
     except CUDARuntimeError as e:
         if e.code == CUDA_ERROR_OUT_OF_MEMORY:
             self.warning("Failed to store the entire dataset on the "
                          "device")
             self.force_numpy = True
             self.device = NumpyDevice()
         else:
             raise from_none(e)
Exemple #6
0
    def __init__(self, interactive=False, **kwargs):
        super(Launcher, self).__init__()
        self._initialized = False
        self._running = False
        parser = Launcher.init_parser(**kwargs)
        self.args, _ = parser.parse_known_args(self.argv)
        self.args.master_address = self.args.master_address.strip()
        self.args.listen_address = self.args.listen_address.strip()
        self.testing = self.args.test
        self.args.matplotlib_backend = self.args.matplotlib_backend.strip()
        self._slaves = [
            x.strip() for x in self.args.nodes.split(',') if x.strip() != ""
        ]
        self._slave_launch_transform = self.args.slave_launch_transform
        if self._slave_launch_transform.find("%s") < 0:
            raise ValueError("Slave launch command transform must contain %s")

        if self.args.log_file != "":
            log_file = self.args.log_file
            if self.args.log_file_pid:
                log_base_name = os.path.splitext(os.path.basename(log_file))
                log_file = os.path.join(
                    os.path.dirname(log_file), "%s.%d%s" %
                    (log_base_name[0], os.getpid(), log_base_name[1]))
            logger.Logger.redirect_all_logging_to_file(log_file)

        self._result_file = self.args.result_file

        self.info("My Python is %s %s", platform.python_implementation(),
                  platform.python_version())
        self.info("My PID is %d", os.getpid())
        self.info("My time is %s", datetime.datetime.now())
        self.id = str(uuid.uuid4()) if not self.is_slave else None
        self.log_id = self.args.log_id or self.id
        if self.logs_to_mongo:
            if self.mongo_log_addr == "":
                self.args.log_mongo = root.common.mongodb_logging_address
            if not self.is_slave:
                logger.Logger.duplicate_all_logging_to_mongo(
                    self.args.log_mongo, self.log_id, "master")

        self._monkey_patch_twisted_failure()
        self._lock = threading.Lock()
        self._webagg_port = 0
        self._agent = None
        self._workflow = None
        self._start_time = None
        self._device = NumpyDevice()
        self._interactive = interactive
        self._reactor_thread = None
        self._notify_update_interval = kwargs.get(
            "status_update_interval", root.common.web.notification_interval)
        if self.args.yarn_nodes is not None and self.is_master:
            self._discover_nodes_from_yarn(self.args.yarn_nodes)
Exemple #7
0
 def __init__(self, data=None, shallow_pickle=False):
     super(Array, self).__init__()
     self._device = NumpyDevice()
     self.mem = data
     self._max_value = 1.0
     self._shallow_pickle = shallow_pickle
     if six.PY3:
         # Workaround for unspecified destructor call order
         # This is a hard to reduce bug to report it
         Array.__vectors__.add(weakref.ref(self))
         if not Array.__registered:
             atexit.register(Array.reset_all)
             Array.__registered = True
Exemple #8
0
    def initialize(self, device, **kwargs):
        super(FullBatchLoader, self).initialize(device=device, **kwargs)
        assert self.total_samples > 0
        self.analyze_original_dataset()
        self._map_original_labels()

        if isinstance(self.device, NumpyDevice):
            return

        self.info("Will try to store the entire dataset on the device")
        try:
            self.init_vectors(self.original_data, self.minibatch_data)
        except CLRuntimeError as e:
            if e.code == CL_MEM_OBJECT_ALLOCATION_FAILURE:
                self.warning("Failed to store the entire dataset on the "
                             "device")
                self.force_numpy = True
                self.device = NumpyDevice()
                return
            else:
                raise from_none(e)
        except CUDARuntimeError as e:
            if e.code == CUDA_ERROR_OUT_OF_MEMORY:
                self.warning("Failed to store the entire dataset on the "
                             "device")
                self.force_numpy = True
                self.device = NumpyDevice()
                return
            else:
                raise from_none(e)
        if self.has_labels:
            self.init_vectors(self._mapped_original_labels_,
                              self.minibatch_labels)

        if not self.shuffled_indices:
            self.shuffled_indices.mem = numpy.arange(self.total_samples,
                                                     dtype=Loader.LABEL_DTYPE)
        self.init_vectors(self.shuffled_indices, self.minibatch_indices)
 def init_unpickled(self):
     super(AcceleratedWorkflow, self).init_unpickled()
     self._power_ = 0
     self._last_power_measurement_time = 0
     self.device = NumpyDevice()
     self._power_measure_time_interval = 120
Exemple #10
0
class FullBatchLoader(AcceleratedUnit, FullBatchLoaderBase):
    """Loads data entire in memory.

    Attributes:
        validation_ratio: specifies which part of the merged train +
        validation set goes to validation.
        original_data: original data (Array).
        original_labels: original labels (Array, dtype=Loader.LABEL_DTYPE)
            (in case of classification).

    Should be overriden in child class:
        load_data()
    """
    def __init__(self, workflow, **kwargs):
        super(FullBatchLoader, self).__init__(workflow, **kwargs)
        self.verify_interface(IFullBatchLoader)

    def init_unpickled(self):
        super(FullBatchLoader, self).init_unpickled()
        self._original_data_ = memory.Array()
        self._original_labels_ = []
        self._mapped_original_labels_ = memory.Array()
        self.sources_["fullbatch_loader"] = {}
        self._global_size = None
        self._krn_const = numpy.zeros(2, dtype=Loader.LABEL_DTYPE)

    @Loader.shape.getter
    def shape(self):
        """
        Takes the shape from original_data.
        :return: Sample's shape.
        """
        if not self.original_data:
            raise AttributeError("Must first initialize original_data")
        return self.original_data[0].shape

    @property
    def on_device(self):
        return not self.force_numpy

    @on_device.setter
    def on_device(self, value):
        if not isinstance(value, bool):
            raise TypeError("on_device must be boolean (got %s)" % type(value))
        self.force_numpy = not value

    @property
    def original_data(self):
        return self._original_data_

    @property
    def original_labels(self):
        return self._original_labels_

    @property
    def validation_ratio(self):
        return getattr(self, "_validation_ratio", None)

    @validation_ratio.setter
    def validation_ratio(self, value):
        if value is None:
            self._validation_ratio = None
            return
        if isinstance(value, int):
            if value != 0:
                raise ValueError("validation_ratio must be in [0, 1).")
            self._validation_ratio = 0.0
            return
        if not isinstance(value, float):
            raise TypeError("validation_ratio must be a float")
        if value < 0 or value >= 1:
            raise ValueError("validation_ratio must be in [0, 1).")
        self._validation_ratio = value

    def get_ocl_defines(self):
        """Add definitions before building the kernel during initialize().
        """
        return {}

    def initialize(self, device, **kwargs):
        super(FullBatchLoader, self).initialize(device=device, **kwargs)
        assert self.total_samples > 0
        self.analyze_original_dataset()
        self._map_original_labels()

        if isinstance(self.device, NumpyDevice):
            return

        self.info("Will try to store the entire dataset on the device")
        try:
            self.init_vectors(self.original_data, self.minibatch_data)
        except CLRuntimeError as e:
            if e.code == CL_MEM_OBJECT_ALLOCATION_FAILURE:
                self.warning("Failed to store the entire dataset on the "
                             "device")
                self.force_numpy = True
                self.device = NumpyDevice()
                return
            else:
                raise from_none(e)
        except CUDARuntimeError as e:
            if e.code == CUDA_ERROR_OUT_OF_MEMORY:
                self.warning("Failed to store the entire dataset on the "
                             "device")
                self.force_numpy = True
                self.device = NumpyDevice()
                return
            else:
                raise from_none(e)
        if self.has_labels:
            self.init_vectors(self._mapped_original_labels_,
                              self.minibatch_labels)

        if not self.shuffled_indices:
            self.shuffled_indices.mem = numpy.arange(self.total_samples,
                                                     dtype=Loader.LABEL_DTYPE)
        self.init_vectors(self.shuffled_indices, self.minibatch_indices)

    def _gpu_init(self):
        defines = {
            "LABELS":
            int(self.has_labels),
            "SAMPLE_SIZE":
            self.original_data.sample_size,
            "MAX_MINIBATCH_SIZE":
            self.max_minibatch_size,
            "original_data_dtype":
            numpy_dtype_to_opencl(self.original_data.dtype),
            "minibatch_data_dtype":
            numpy_dtype_to_opencl(self.minibatch_data.dtype)
        }
        defines.update(self.get_ocl_defines())

        self.build_program(defines,
                           "fullbatch_loader",
                           dtype=self.minibatch_data.dtype)
        self.assign_kernel("fill_minibatch_data_labels")

        if not self.has_labels:
            self.set_args(self.original_data, self.minibatch_data,
                          self.device.skip(2), self.shuffled_indices,
                          self.minibatch_indices)
        else:
            self.set_args(self.original_data, self.minibatch_data,
                          self.device.skip(2), self._mapped_original_labels_,
                          self.minibatch_labels, self.shuffled_indices,
                          self.minibatch_indices)

    def _after_backend_init(self):
        try:
            self.fill_indices(0,
                              min(self.max_minibatch_size, self.total_samples))
        except CLRuntimeError as e:
            if e.code == CL_MEM_OBJECT_ALLOCATION_FAILURE:
                self.warning("Failed to store the entire dataset on the "
                             "device")
                self.force_numpy = True
                self.device = NumpyDevice()
            else:
                raise from_none(e)
        except CUDARuntimeError as e:
            if e.code == CUDA_ERROR_OUT_OF_MEMORY:
                self.warning("Failed to store the entire dataset on the "
                             "device")
                self.force_numpy = True
                self.device = NumpyDevice()
            else:
                raise from_none(e)

    def numpy_run(self):
        Loader.run(self)

    def ocl_run(self):
        self.numpy_run()

    def cuda_run(self):
        self.numpy_run()

    def ocl_init(self):
        self._gpu_init()
        self._global_size = (self.max_minibatch_size,
                             self.minibatch_data.sample_size)
        self._local_size = None

    def cuda_init(self):
        self._gpu_init()
        block_size = self.device.suggest_block_size(self._kernel_)
        self._global_size = (int(
            numpy.ceil(self.minibatch_data.size / block_size)), 1, 1)
        self._local_size = (block_size, 1, 1)

    def on_before_create_minibatch_data(self):
        self._has_labels = len(self.original_labels) > 0
        try:
            super(FullBatchLoader, self).on_before_create_minibatch_data()
        except AttributeError:
            pass
        self._resize_validation()

    def create_minibatch_data(self):
        self.minibatch_data.reset(
            numpy.zeros((self.max_minibatch_size, ) + self.shape,
                        dtype=self.dtype))

    def create_originals(self, dshape, labels=True):
        """
        Create original_data.mem and original_labels.mem.
        :param dshape: Future original_data.shape[1:]
        """
        self.original_data.reset(
            numpy.zeros((self.total_samples, ) + dshape, self.dtype))
        if not labels:
            return
        self._mapped_original_labels_.reset(
            numpy.zeros(self.total_samples, Loader.LABEL_DTYPE))
        del self.original_labels[:]
        self.original_labels.extend(None for _ in range(self.total_samples))

    def fill_indices(self, start_offset, count):
        if isinstance(self.device, NumpyDevice):
            return super(FullBatchLoader,
                         self).fill_indices(start_offset, count)

        self.unmap_vectors(self.original_data, self.minibatch_data,
                           self.shuffled_indices, self.minibatch_indices)

        if self.has_labels:
            self.unmap_vectors(self._mapped_original_labels_,
                               self.minibatch_labels)

        self._krn_const[0:2] = start_offset, count
        self._kernel_.set_arg(2, self._krn_const[0:1])
        self._kernel_.set_arg(3, self._krn_const[1:2])
        self.execute_kernel(self._global_size, self._local_size)

        # No further processing needed, so return True
        return True

    def fill_minibatch(self):
        for i, sample_index in enumerate(
                self.minibatch_indices.mem[:self.minibatch_size]):
            # int() is required by (guess what...) PyPy
            self.minibatch_data[i] = self.original_data[int(sample_index)]
            if self.has_labels:
                self.minibatch_labels[i] = \
                    self._mapped_original_labels_[int(sample_index)]

    def map_minibatch_labels(self):
        pass

    def analyze_dataset(self):
        """
        Override.
        """
        pass

    def normalize_minibatch(self):
        """
        Override.
        """
        pass

    def analyze_original_dataset(self):
        self.info("Normalizing to %s...", self.normalization_type)
        self.debug("Data range: (%.6f, %.6f), " %
                   (self.original_data.min(), self.original_data.max()))
        if self.class_lengths[TRAIN] > 0:
            self.normalizer.analyze(
                self.original_data[self.class_end_offsets[VALID]:])
        self.normalizer.normalize(self.original_data.mem)
        self.debug("Normalized data range: (%.6f, %.6f), " %
                   (self.original_data.min(), self.original_data.max()))

    def _resize_validation(self):
        """Extracts validation dataset from joined validation and train
        datasets randomly.

        We will rearrange indexes only.
        """
        rand = self.prng
        ratio = self.validation_ratio
        if ratio is None:
            return
        if ratio <= 0:  # Dispose validation set
            self.class_lengths[TRAIN] += self.class_lengths[VALID]
            self.class_lengths[VALID] = 0
            if self.shuffled_indices.mem is None:
                self.shuffled_indices.mem = numpy.arange(
                    self.total_samples, dtype=Loader.LABEL_DTYPE)
            return
        offs_test = self.class_lengths[TEST]
        offs = offs_test
        train_samples = self.class_lengths[VALID] + self.class_lengths[TRAIN]
        total_samples = train_samples + offs
        original_labels = self.original_labels

        if self.shuffled_indices.mem is None:
            self.shuffled_indices.mem = numpy.arange(total_samples,
                                                     dtype=Loader.LABEL_DTYPE)
        self.shuffled_indices.map_write()
        shuffled_indices = self.shuffled_indices.mem

        # If there are no labels
        if not self.has_labels:
            n = int(numpy.round(ratio * train_samples))
            while n > 0:
                i = rand.randint(offs, offs + train_samples)

                # Swap indexes
                shuffled_indices[offs], shuffled_indices[i] = \
                    shuffled_indices[i], shuffled_indices[offs]

                offs += 1
                n -= 1
            self.class_lengths[VALID] = offs - offs_test
            self.class_lengths[TRAIN] = \
                total_samples - self.class_lengths[VALID] - offs_test
            return

        # If there are labels
        nn = {}
        for i in shuffled_indices[offs:]:
            l = original_labels[i]
            nn[l] = nn.get(l, 0) + 1
        n = 0
        for l in nn.keys():
            n_train = nn[l]
            nn[l] = max(int(numpy.round(ratio * nn[l])), 1)
            if nn[l] >= n_train:
                raise ValueError("There are too few labels for class %s: %s" %
                                 (l, n_train))
            n += nn[l]
        while n > 0:
            i = rand.randint(offs, offs_test + train_samples)
            l = original_labels[shuffled_indices[i]]
            if nn[l] <= 0:
                # Move unused label to the end

                # Swap indexes
                ii = shuffled_indices[offs_test + train_samples - 1]
                shuffled_indices[offs_test + train_samples -
                                 1] = shuffled_indices[i]
                shuffled_indices[i] = ii

                train_samples -= 1
                continue
            # Swap indexes
            ii = shuffled_indices[offs]
            shuffled_indices[offs] = shuffled_indices[i]
            shuffled_indices[i] = ii

            nn[l] -= 1
            n -= 1
            offs += 1
        self.class_lengths[VALID] = offs - offs_test
        self.class_lengths[TRAIN] = (total_samples -
                                     self.class_lengths[VALID] - offs_test)

    def _map_original_labels(self):
        self._has_labels = len(self.original_labels) > 0
        if not self.has_labels:
            return
        if len(self.labels_mapping) > 0:
            self._init_mapped_original_labels()
            return
        if len(self.original_labels) != self.original_data.shape[0]:
            raise ValueError(
                "original_labels and original_data must have the same length "
                "(%d vs %d)" %
                (len(self.original_labels), self.original_data.shape[0]))

        for ind, lbl in enumerate(self.original_labels):
            self._samples_mapping[lbl].add(ind)

        different_labels = tuple(
            Counter(self.original_labels[i]
                    for i in self.shuffled_indices[self.class_end_offsets[c] -
                                                   self.class_lengths[c]:self.
                                                   class_end_offsets[c]])
            for c in range(3))
        self._setup_labels_mapping(different_labels)
        self._init_mapped_original_labels()

    def _init_mapped_original_labels(self):
        self._mapped_original_labels_.reset(
            numpy.zeros(self.total_samples, Loader.LABEL_DTYPE))
        for i, label in enumerate(self.original_labels):
            self._mapped_original_labels_[i] = self.labels_mapping[label]
Exemple #11
0
class FullBatchLoader(AcceleratedUnit, FullBatchLoaderBase):
    """Loads data entire in memory.

    Attributes:
        validation_ratio: specifies which part of the merged train +
        validation set goes to validation.
        original_data: original data (Array).
        original_labels: original labels (Array, dtype=Loader.LABEL_DTYPE)
            (in case of classification).

    Should be overriden in child class:
        load_data()
    """

    def __init__(self, workflow, **kwargs):
        super(FullBatchLoader, self).__init__(workflow, **kwargs)
        self.verify_interface(IFullBatchLoader)

    def init_unpickled(self):
        super(FullBatchLoader, self).init_unpickled()
        self._original_data_ = memory.Array()
        self._original_labels_ = []
        self._mapped_original_labels_ = memory.Array()
        self.sources_["fullbatch_loader"] = {}
        self._global_size = None
        self._krn_const = numpy.zeros(2, dtype=Loader.LABEL_DTYPE)

    @Loader.shape.getter
    def shape(self):
        """
        Takes the shape from original_data.
        :return: Sample's shape.
        """
        if not self.original_data:
            raise AttributeError("Must first initialize original_data")
        return self.original_data[0].shape

    @property
    def on_device(self):
        return not self.force_numpy

    @on_device.setter
    def on_device(self, value):
        if not isinstance(value, bool):
            raise TypeError("on_device must be boolean (got %s)" % type(value))
        self.force_numpy = not value

    @property
    def original_data(self):
        return self._original_data_

    @property
    def original_labels(self):
        return self._original_labels_

    @property
    def validation_ratio(self):
        return getattr(self, "_validation_ratio", None)

    @validation_ratio.setter
    def validation_ratio(self, value):
        if value is None:
            self._validation_ratio = None
            return
        if isinstance(value, int):
            if value != 0:
                raise ValueError("validation_ratio must be in [0, 1).")
            self._validation_ratio = 0.0
            return
        if not isinstance(value, float):
            raise TypeError("validation_ratio must be a float")
        if value < 0 or value >= 1:
            raise ValueError("validation_ratio must be in [0, 1).")
        self._validation_ratio = value

    def get_ocl_defines(self):
        """Add definitions before building the kernel during initialize().
        """
        return {}

    def initialize(self, device, **kwargs):
        super(FullBatchLoader, self).initialize(device=device, **kwargs)
        assert self.total_samples > 0
        self.analyze_original_dataset()
        self._map_original_labels()

        if isinstance(self.device, NumpyDevice):
            return

        self.info("Will try to store the entire dataset on the device")
        try:
            self.init_vectors(self.original_data, self.minibatch_data)
        except CLRuntimeError as e:
            if e.code == CL_MEM_OBJECT_ALLOCATION_FAILURE:
                self.warning("Failed to store the entire dataset on the " "device")
                self.force_numpy = True
                self.device = NumpyDevice()
                return
            else:
                raise from_none(e)
        except CUDARuntimeError as e:
            if e.code == CUDA_ERROR_OUT_OF_MEMORY:
                self.warning("Failed to store the entire dataset on the " "device")
                self.force_numpy = True
                self.device = NumpyDevice()
                return
            else:
                raise from_none(e)
        if self.has_labels:
            self.init_vectors(self._mapped_original_labels_, self.minibatch_labels)

        if not self.shuffled_indices:
            self.shuffled_indices.mem = numpy.arange(self.total_samples, dtype=Loader.LABEL_DTYPE)
        self.init_vectors(self.shuffled_indices, self.minibatch_indices)

    def _gpu_init(self):
        defines = {
            "LABELS": int(self.has_labels),
            "SAMPLE_SIZE": self.original_data.sample_size,
            "MAX_MINIBATCH_SIZE": self.max_minibatch_size,
            "original_data_dtype": numpy_dtype_to_opencl(self.original_data.dtype),
            "minibatch_data_dtype": numpy_dtype_to_opencl(self.minibatch_data.dtype),
        }
        defines.update(self.get_ocl_defines())

        self.build_program(defines, "fullbatch_loader", dtype=self.minibatch_data.dtype)
        self.assign_kernel("fill_minibatch_data_labels")

        if not self.has_labels:
            self.set_args(
                self.original_data,
                self.minibatch_data,
                self.device.skip(2),
                self.shuffled_indices,
                self.minibatch_indices,
            )
        else:
            self.set_args(
                self.original_data,
                self.minibatch_data,
                self.device.skip(2),
                self._mapped_original_labels_,
                self.minibatch_labels,
                self.shuffled_indices,
                self.minibatch_indices,
            )

    def _after_backend_init(self):
        try:
            self.fill_indices(0, min(self.max_minibatch_size, self.total_samples))
        except CLRuntimeError as e:
            if e.code == CL_MEM_OBJECT_ALLOCATION_FAILURE:
                self.warning("Failed to store the entire dataset on the " "device")
                self.force_numpy = True
                self.device = NumpyDevice()
            else:
                raise from_none(e)
        except CUDARuntimeError as e:
            if e.code == CUDA_ERROR_OUT_OF_MEMORY:
                self.warning("Failed to store the entire dataset on the " "device")
                self.force_numpy = True
                self.device = NumpyDevice()
            else:
                raise from_none(e)

    def numpy_run(self):
        Loader.run(self)

    def ocl_run(self):
        self.numpy_run()

    def cuda_run(self):
        self.numpy_run()

    def ocl_init(self):
        self._gpu_init()
        self._global_size = (self.max_minibatch_size, self.minibatch_data.sample_size)
        self._local_size = None

    def cuda_init(self):
        self._gpu_init()
        block_size = self.device.suggest_block_size(self._kernel_)
        self._global_size = (int(numpy.ceil(self.minibatch_data.size / block_size)), 1, 1)
        self._local_size = (block_size, 1, 1)

    def on_before_create_minibatch_data(self):
        self._has_labels = len(self.original_labels) > 0
        try:
            super(FullBatchLoader, self).on_before_create_minibatch_data()
        except AttributeError:
            pass
        self._resize_validation()

    def create_minibatch_data(self):
        self.minibatch_data.reset(numpy.zeros((self.max_minibatch_size,) + self.shape, dtype=self.dtype))

    def create_originals(self, dshape, labels=True):
        """
        Create original_data.mem and original_labels.mem.
        :param dshape: Future original_data.shape[1:]
        """
        self.original_data.reset(numpy.zeros((self.total_samples,) + dshape, self.dtype))
        if not labels:
            return
        self._mapped_original_labels_.reset(numpy.zeros(self.total_samples, Loader.LABEL_DTYPE))
        del self.original_labels[:]
        self.original_labels.extend(None for _ in range(self.total_samples))

    def fill_indices(self, start_offset, count):
        if isinstance(self.device, NumpyDevice):
            return super(FullBatchLoader, self).fill_indices(start_offset, count)

        self.unmap_vectors(self.original_data, self.minibatch_data, self.shuffled_indices, self.minibatch_indices)

        if self.has_labels:
            self.unmap_vectors(self._mapped_original_labels_, self.minibatch_labels)

        self._krn_const[0:2] = start_offset, count
        self._kernel_.set_arg(2, self._krn_const[0:1])
        self._kernel_.set_arg(3, self._krn_const[1:2])
        self.execute_kernel(self._global_size, self._local_size)

        # No further processing needed, so return True
        return True

    def fill_minibatch(self):
        for i, sample_index in enumerate(self.minibatch_indices.mem[: self.minibatch_size]):
            # int() is required by (guess what...) PyPy
            self.minibatch_data[i] = self.original_data[int(sample_index)]
            if self.has_labels:
                self.minibatch_labels[i] = self._mapped_original_labels_[int(sample_index)]

    def map_minibatch_labels(self):
        pass

    def analyze_dataset(self):
        """
        Override.
        """
        pass

    def normalize_minibatch(self):
        """
        Override.
        """
        pass

    def analyze_original_dataset(self):
        self.info("Normalizing to %s...", self.normalization_type)
        self.debug("Data range: (%.6f, %.6f), " % (self.original_data.min(), self.original_data.max()))
        if self.class_lengths[TRAIN] > 0:
            self.normalizer.analyze(self.original_data[self.class_end_offsets[VALID] :])
        self.normalizer.normalize(self.original_data.mem)
        self.debug("Normalized data range: (%.6f, %.6f), " % (self.original_data.min(), self.original_data.max()))

    def _resize_validation(self):
        """Extracts validation dataset from joined validation and train
        datasets randomly.

        We will rearrange indexes only.
        """
        rand = self.prng
        ratio = self.validation_ratio
        if ratio is None:
            return
        if ratio <= 0:  # Dispose validation set
            self.class_lengths[TRAIN] += self.class_lengths[VALID]
            self.class_lengths[VALID] = 0
            if self.shuffled_indices.mem is None:
                self.shuffled_indices.mem = numpy.arange(self.total_samples, dtype=Loader.LABEL_DTYPE)
            return
        offs_test = self.class_lengths[TEST]
        offs = offs_test
        train_samples = self.class_lengths[VALID] + self.class_lengths[TRAIN]
        total_samples = train_samples + offs
        original_labels = self.original_labels

        if self.shuffled_indices.mem is None:
            self.shuffled_indices.mem = numpy.arange(total_samples, dtype=Loader.LABEL_DTYPE)
        self.shuffled_indices.map_write()
        shuffled_indices = self.shuffled_indices.mem

        # If there are no labels
        if not self.has_labels:
            n = int(numpy.round(ratio * train_samples))
            while n > 0:
                i = rand.randint(offs, offs + train_samples)

                # Swap indexes
                shuffled_indices[offs], shuffled_indices[i] = shuffled_indices[i], shuffled_indices[offs]

                offs += 1
                n -= 1
            self.class_lengths[VALID] = offs - offs_test
            self.class_lengths[TRAIN] = total_samples - self.class_lengths[VALID] - offs_test
            return

        # If there are labels
        nn = {}
        for i in shuffled_indices[offs:]:
            l = original_labels[i]
            nn[l] = nn.get(l, 0) + 1
        n = 0
        for l in nn.keys():
            n_train = nn[l]
            nn[l] = max(int(numpy.round(ratio * nn[l])), 1)
            if nn[l] >= n_train:
                raise ValueError("There are too few labels for class %s: %s" % (l, n_train))
            n += nn[l]
        while n > 0:
            i = rand.randint(offs, offs_test + train_samples)
            l = original_labels[shuffled_indices[i]]
            if nn[l] <= 0:
                # Move unused label to the end

                # Swap indexes
                ii = shuffled_indices[offs_test + train_samples - 1]
                shuffled_indices[offs_test + train_samples - 1] = shuffled_indices[i]
                shuffled_indices[i] = ii

                train_samples -= 1
                continue
            # Swap indexes
            ii = shuffled_indices[offs]
            shuffled_indices[offs] = shuffled_indices[i]
            shuffled_indices[i] = ii

            nn[l] -= 1
            n -= 1
            offs += 1
        self.class_lengths[VALID] = offs - offs_test
        self.class_lengths[TRAIN] = total_samples - self.class_lengths[VALID] - offs_test

    def _map_original_labels(self):
        self._has_labels = len(self.original_labels) > 0
        if not self.has_labels:
            return
        if len(self.labels_mapping) > 0:
            self._init_mapped_original_labels()
            return
        if len(self.original_labels) != self.original_data.shape[0]:
            raise ValueError(
                "original_labels and original_data must have the same length "
                "(%d vs %d)" % (len(self.original_labels), self.original_data.shape[0])
            )

        for ind, lbl in enumerate(self.original_labels):
            self._samples_mapping[lbl].add(ind)

        different_labels = tuple(
            Counter(
                self.original_labels[i]
                for i in self.shuffled_indices[
                    self.class_end_offsets[c] - self.class_lengths[c] : self.class_end_offsets[c]
                ]
            )
            for c in range(3)
        )
        self._setup_labels_mapping(different_labels)
        self._init_mapped_original_labels()

    def _init_mapped_original_labels(self):
        self._mapped_original_labels_.reset(numpy.zeros(self.total_samples, Loader.LABEL_DTYPE))
        for i, label in enumerate(self.original_labels):
            self._mapped_original_labels_[i] = self.labels_mapping[label]