コード例 #1
0
    def compute_indices(self, dataset):
        """Compute training set and test set indices for each fold.

        Parameters
        ----------
        dataset : CDataset
            Dataset to split.

        Returns
        -------
        tr_idx, ts_idx : CArray
            Flat arrays with the tr/ts indices.

        """
        min_set_perc = 1 / dataset.num_samples
        if (is_float(self.train_size) and self.train_size < min_set_perc) or \
                (is_int(self.train_size) and self.train_size < 1):
            raise ValueError(
                "train_size should be at least 1 or {:}".format(min_set_perc))
        if (is_float(self.test_size) and self.test_size < min_set_perc) or \
                (is_int(self.test_size) and self.test_size < 1):
            raise ValueError(
                "test_size should be at least 1 or {:}".format(min_set_perc))

        tr_idx, ts_idx = train_test_split(CArray.arange(
            dataset.num_samples).tondarray(),
                                          train_size=self.train_size,
                                          test_size=self.test_size,
                                          random_state=self.random_state,
                                          shuffle=self.shuffle)

        self._tr_idx = CArray(tr_idx)
        self._ts_idx = CArray(ts_idx)

        return self.tr_idx, self.ts_idx
コード例 #2
0
        def _check_tolist(array):
            self.logger.info("array:\n{:}".format(array))

            for shape in [
                    None, array.size, (array.size, ), (1, array.size),
                (array.size, 1), (1, 1, array.size)
            ]:

                array_list = array.tolist(shape=shape)
                self.logger.info("array.tolist(shape={:}):\n{:}".format(
                    shape, array_list))

                self.assertIsInstance(array_list, list)

                if shape is None:
                    self.assertEqual(len(array_list), array.shape[0])
                    if array.ndim > 1:
                        for elem in array_list:
                            self.assertEqual(len(elem), array.shape[1])
                else:  # Reshape after casting
                    if is_int(shape):  # Fake 1-dim shape
                        shape = (shape, )
                    self.assertEqual(len(array_list), shape[0])
                    if len(shape) > 1:
                        for elem in array_list:
                            self.assertEqual(len(elem), shape[1])
コード例 #3
0
    def is_attack_class(self, y):
        """Returns True/False if the input class can be attacked.

        Parameters
        ----------
        y : int or CArray
            CArray or single label of the class to to be checked.

        Returns
        -------
        bool or CArray
            True if class y can be manipulated by the attacker,
             False otherwise. If CArray, a True/False value for each
             input label will be returned.

        """
        if is_int(y):
            if self._attack_classes == 'all':
                return True  # all classes can be manipulated
            elif CArray(y == self._attack_classes).any():
                return True  # y can be manipulated
            else:
                return False
        elif isinstance(y, CArray):
            v = CArray.zeros(shape=y.shape, dtype=bool)
            if self.attack_classes == 'all':
                v[:] = True  # all classes can be manipulated
                return v
            for i in range(self.attack_classes.size):
                v[y == self.attack_classes[i]] = True  # y can be manipulated
            return v
        else:
            raise TypeError("y can be an integer or a CArray")
    def __getitem__(self, i):
        """Return desired pair (sample, label) from the dataset."""
        if not is_int(i):
            raise ValueError("only integer indexing is supported")

        sample = CArray(self._samples[i, :]).tondarray()

        if self.transform is not None:
            sample = self.transform(sample)

        # Ensure we return tensors
        if not isinstance(sample, torch.Tensor):
            sample = torch.from_numpy(sample)

        if self._labels is not None:
            if self._labels.ndim == 1:  # (num_samples, )
                label = torch.tensor(self._labels[i].item())
            else:  # (num_samples, num_classes)
                label = CArray(self._labels[i, :]).tondarray()
                if not isinstance(label, torch.Tensor):
                    label = torch.from_numpy(label)
        else:
            label = torch.tensor(-1)  # Tensor with null label

        return sample.float(), label
コード例 #5
0
def check_binary_labels(labels):
    """Check if input labels are binary {0, +1}.

    Parameters
    ----------
    labels : CArray or int
        Binary labels to be converted.
        As of PRALib convention, binary labels are {0, +1}.

    Raises
    ------
    ValueError
        If input labels are not binary.


    """
    if (is_int(labels) and not (labels == 0 or labels == 1)) or \
            (isinstance(labels, CArray) and
             (labels != 0).logical_and(labels != 1).any()):
        raise ValueError("input labels should be binary in {0, +1} interval.")
def tuple_atomic_tolist(idx):
    """Convert tuple atomic elements to list.

    Atomic objects converted:
        - `int`, `np.integer`
        - `bool`, `np.bool_`

    Parameters
    ----------
    idx : tuple
        Tuple which elements have to be converted.

    Returns
    -------
    out_tuple : tuple
        Converted tuple.

    """
    if not is_tuple(idx):
        raise TypeError("input must be a tuple")
    return tuple([[elem] if is_int(elem) or is_bool(elem) else elem
                  for elem in idx])
コード例 #7
0
        def _check_tondarray(array):
            self.logger.info("array:\n{:}".format(array))

            for shape in [
                    None, array.size, (array.size, ), (1, array.size),
                (array.size, 1), (1, 1, array.size)
            ]:

                ndarray = array.tondarray(shape=shape)
                self.logger.info("array.tondarray(shape={:}):\n{:}".format(
                    shape, ndarray))

                self.assertIsInstance(ndarray, np.ndarray)

                self.assertEqual(array.size, ndarray.size)

                if shape is None:
                    self.assertEqual(array.shape, ndarray.shape)
                else:  # Reshape after casting
                    if is_int(shape):  # Fake 1-dim shape
                        shape = (shape, )
                    self.assertEqual(shape, ndarray.shape)
    def compute_indices(self, dataset):
        """Compute training set and test set indices.

        Parameters
        ----------
        dataset : CDataset
            Dataset to split.

        Returns
        -------
        tr_idx, ts_idx : CArray
            Flat arrays with the tr/ts indices.

        """
        if not hasattr(dataset.header, 'timestamp') or \
                not hasattr(dataset.header, 'timestamp_fmt'):
            raise AttributeError("dataset must contain `timestamp` and "
                                 "'timestamp_fmt' information")

        timestamps = dataset.header.timestamp
        fmt = dataset.header.timestamp_fmt

        # Pick the samples having `timestamp <= th` to build the training set
        tr_mask = CArray(list(map(
            lambda tstmp: datetime.strptime(tstmp, fmt) <= self.th_timestamp,
            timestamps)))
        # Test set samples are all the other samples
        ts_mask = tr_mask.logical_not()

        # Compute the number of train/test samples
        max_tr = tr_mask.sum()
        max_ts = dataset.num_samples - max_tr

        if max_tr == 0:
            raise ValueError("no samples with timestamp <= {:}. "
                             "Cannot split dataset.".format(self.th_timestamp))

        if max_ts == 0:
            raise ValueError("no samples with timestamp > {:}. "
                             "Cannot split dataset.".format(self.th_timestamp))

        # Compute the actual number of desired train/test samples

        if is_int(self.train_size):
            if self.train_size < 1 or self.train_size > max_tr:
                raise ValueError(
                    "train_size should be between 1 and {:}".format(max_tr))
            else:  # train_size is a valid integer, use it directly
                tr_size = self.train_size
        else:  # Compute the proportion of train samples (at least 1)
            tr_size = int(max(1, round(max_tr * self.train_size)))

        if is_int(self.test_size):
            if self.test_size < 1 or self.test_size > max_ts:
                raise ValueError(
                    "test_size should be between 1 and {:}".format(max_ts))
            else:  # test_size is a valid integer, use it directly
                ts_size = self.test_size
        else:  # Compute the proportion of train samples (at least 1)
            ts_size = int(max(1, round(max_ts * self.test_size)))

        # Get the indices of samples from boolean masks
        tr_idx = CArray(tr_mask.find(tr_mask))
        ts_idx = CArray(ts_mask.find(ts_mask))

        # Get the subset of indices to include in train/test set
        # If shuffle is True, randomize the indices

        if self.shuffle is True:
            tr_idx = CArray.randsample(
                tr_idx, shape=(tr_size, ), random_state=self.random_state)
            ts_idx = CArray.randsample(
                ts_idx, shape=(ts_size, ), random_state=self.random_state)
        else:  # Just slice the arrays of indices
            tr_idx = tr_idx[:tr_size]
            ts_idx = ts_idx[:ts_size]

        self._tr_idx = tr_idx
        self._ts_idx = ts_idx

        return self.tr_idx, self.ts_idx