def compute_indices(self, dataset): """Compute training set and test set indices for each fold. Parameters ---------- dataset : CDataset Dataset to split. Returns ------- tr_idx, ts_idx : CArray Flat arrays with the tr/ts indices. """ min_set_perc = 1 / dataset.num_samples if (is_float(self.train_size) and self.train_size < min_set_perc) or \ (is_int(self.train_size) and self.train_size < 1): raise ValueError( "train_size should be at least 1 or {:}".format(min_set_perc)) if (is_float(self.test_size) and self.test_size < min_set_perc) or \ (is_int(self.test_size) and self.test_size < 1): raise ValueError( "test_size should be at least 1 or {:}".format(min_set_perc)) tr_idx, ts_idx = train_test_split(CArray.arange( dataset.num_samples).tondarray(), train_size=self.train_size, test_size=self.test_size, random_state=self.random_state, shuffle=self.shuffle) self._tr_idx = CArray(tr_idx) self._ts_idx = CArray(ts_idx) return self.tr_idx, self.ts_idx
def _check_tolist(array): self.logger.info("array:\n{:}".format(array)) for shape in [ None, array.size, (array.size, ), (1, array.size), (array.size, 1), (1, 1, array.size) ]: array_list = array.tolist(shape=shape) self.logger.info("array.tolist(shape={:}):\n{:}".format( shape, array_list)) self.assertIsInstance(array_list, list) if shape is None: self.assertEqual(len(array_list), array.shape[0]) if array.ndim > 1: for elem in array_list: self.assertEqual(len(elem), array.shape[1]) else: # Reshape after casting if is_int(shape): # Fake 1-dim shape shape = (shape, ) self.assertEqual(len(array_list), shape[0]) if len(shape) > 1: for elem in array_list: self.assertEqual(len(elem), shape[1])
def is_attack_class(self, y): """Returns True/False if the input class can be attacked. Parameters ---------- y : int or CArray CArray or single label of the class to to be checked. Returns ------- bool or CArray True if class y can be manipulated by the attacker, False otherwise. If CArray, a True/False value for each input label will be returned. """ if is_int(y): if self._attack_classes == 'all': return True # all classes can be manipulated elif CArray(y == self._attack_classes).any(): return True # y can be manipulated else: return False elif isinstance(y, CArray): v = CArray.zeros(shape=y.shape, dtype=bool) if self.attack_classes == 'all': v[:] = True # all classes can be manipulated return v for i in range(self.attack_classes.size): v[y == self.attack_classes[i]] = True # y can be manipulated return v else: raise TypeError("y can be an integer or a CArray")
def __getitem__(self, i): """Return desired pair (sample, label) from the dataset.""" if not is_int(i): raise ValueError("only integer indexing is supported") sample = CArray(self._samples[i, :]).tondarray() if self.transform is not None: sample = self.transform(sample) # Ensure we return tensors if not isinstance(sample, torch.Tensor): sample = torch.from_numpy(sample) if self._labels is not None: if self._labels.ndim == 1: # (num_samples, ) label = torch.tensor(self._labels[i].item()) else: # (num_samples, num_classes) label = CArray(self._labels[i, :]).tondarray() if not isinstance(label, torch.Tensor): label = torch.from_numpy(label) else: label = torch.tensor(-1) # Tensor with null label return sample.float(), label
def check_binary_labels(labels): """Check if input labels are binary {0, +1}. Parameters ---------- labels : CArray or int Binary labels to be converted. As of PRALib convention, binary labels are {0, +1}. Raises ------ ValueError If input labels are not binary. """ if (is_int(labels) and not (labels == 0 or labels == 1)) or \ (isinstance(labels, CArray) and (labels != 0).logical_and(labels != 1).any()): raise ValueError("input labels should be binary in {0, +1} interval.")
def tuple_atomic_tolist(idx): """Convert tuple atomic elements to list. Atomic objects converted: - `int`, `np.integer` - `bool`, `np.bool_` Parameters ---------- idx : tuple Tuple which elements have to be converted. Returns ------- out_tuple : tuple Converted tuple. """ if not is_tuple(idx): raise TypeError("input must be a tuple") return tuple([[elem] if is_int(elem) or is_bool(elem) else elem for elem in idx])
def _check_tondarray(array): self.logger.info("array:\n{:}".format(array)) for shape in [ None, array.size, (array.size, ), (1, array.size), (array.size, 1), (1, 1, array.size) ]: ndarray = array.tondarray(shape=shape) self.logger.info("array.tondarray(shape={:}):\n{:}".format( shape, ndarray)) self.assertIsInstance(ndarray, np.ndarray) self.assertEqual(array.size, ndarray.size) if shape is None: self.assertEqual(array.shape, ndarray.shape) else: # Reshape after casting if is_int(shape): # Fake 1-dim shape shape = (shape, ) self.assertEqual(shape, ndarray.shape)
def compute_indices(self, dataset): """Compute training set and test set indices. Parameters ---------- dataset : CDataset Dataset to split. Returns ------- tr_idx, ts_idx : CArray Flat arrays with the tr/ts indices. """ if not hasattr(dataset.header, 'timestamp') or \ not hasattr(dataset.header, 'timestamp_fmt'): raise AttributeError("dataset must contain `timestamp` and " "'timestamp_fmt' information") timestamps = dataset.header.timestamp fmt = dataset.header.timestamp_fmt # Pick the samples having `timestamp <= th` to build the training set tr_mask = CArray(list(map( lambda tstmp: datetime.strptime(tstmp, fmt) <= self.th_timestamp, timestamps))) # Test set samples are all the other samples ts_mask = tr_mask.logical_not() # Compute the number of train/test samples max_tr = tr_mask.sum() max_ts = dataset.num_samples - max_tr if max_tr == 0: raise ValueError("no samples with timestamp <= {:}. " "Cannot split dataset.".format(self.th_timestamp)) if max_ts == 0: raise ValueError("no samples with timestamp > {:}. " "Cannot split dataset.".format(self.th_timestamp)) # Compute the actual number of desired train/test samples if is_int(self.train_size): if self.train_size < 1 or self.train_size > max_tr: raise ValueError( "train_size should be between 1 and {:}".format(max_tr)) else: # train_size is a valid integer, use it directly tr_size = self.train_size else: # Compute the proportion of train samples (at least 1) tr_size = int(max(1, round(max_tr * self.train_size))) if is_int(self.test_size): if self.test_size < 1 or self.test_size > max_ts: raise ValueError( "test_size should be between 1 and {:}".format(max_ts)) else: # test_size is a valid integer, use it directly ts_size = self.test_size else: # Compute the proportion of train samples (at least 1) ts_size = int(max(1, round(max_ts * self.test_size))) # Get the indices of samples from boolean masks tr_idx = CArray(tr_mask.find(tr_mask)) ts_idx = CArray(ts_mask.find(ts_mask)) # Get the subset of indices to include in train/test set # If shuffle is True, randomize the indices if self.shuffle is True: tr_idx = CArray.randsample( tr_idx, shape=(tr_size, ), random_state=self.random_state) ts_idx = CArray.randsample( ts_idx, shape=(ts_size, ), random_state=self.random_state) else: # Just slice the arrays of indices tr_idx = tr_idx[:tr_size] ts_idx = ts_idx[:ts_size] self._tr_idx = tr_idx self._ts_idx = ts_idx return self.tr_idx, self.ts_idx