Exemple #1
0
    def __init__(self, class_vector, p_vec, batch_size, verbose=False):
        """Stratified Sampling

        Args:
          class_vector (np.array): a vector of class labels
          p_vec (list[float]): list of probabilities for each class
          batch_size (int): batch_size
          verbose
        """
        self.n_splits = int(class_vector.shape[0] / batch_size)
        self.class_vector = class_vector
        self.p_vec = p_vec
        self.batch_size = batch_size

        self.batch_sizes = get_batch_sizes(self.p_vec,
                                           self.batch_size,
                                           verbose=verbose)

        # check that the individual batch size will always be > 0
        for i, batch_size in enumerate(self.batch_sizes):
            if batch_size == 0:
                warnings.warn("Batch size for class {} is 0.".format(i))

        self.classes = np.arange(len(p_vec))
        assert np.all(
            np.sort(pd.Series(self.class_vector).unique()) == self.classes)

        idx_all = np.arange(len(self.class_vector))
        self.class_idx_iterators = [
            iterable_cycle(
                np.random.permutation(idx_all[self.class_vector == cls]))
            for cls in self.classes
        ]
Exemple #2
0
 def batch_train_iter(self, cycle=True, **kwargs):
     """Returns samples directly useful for training the model:
     (x["inputs"],x["targets"])
     Args:
       cycle: when True, the returned iterator will run indefinitely go through the dataset
         Use True with `fit_generator` in Keras.
       **kwargs: Arguments passed to self.batch_iter(**kwargs)
     """
     if cycle:
         return ((to_numpy(x["inputs"]), to_numpy(x["targets"]))
                 for x in iterable_cycle(self._batch_iterable(**kwargs)))
     else:
         return ((x["inputs"], x["targets"]) for x in self.batch_iter(**kwargs))