def __init__(self, class_vector, p_vec, batch_size, verbose=False): """Stratified Sampling Args: class_vector (np.array): a vector of class labels p_vec (list[float]): list of probabilities for each class batch_size (int): batch_size verbose """ self.n_splits = int(class_vector.shape[0] / batch_size) self.class_vector = class_vector self.p_vec = p_vec self.batch_size = batch_size self.batch_sizes = get_batch_sizes(self.p_vec, self.batch_size, verbose=verbose) # check that the individual batch size will always be > 0 for i, batch_size in enumerate(self.batch_sizes): if batch_size == 0: warnings.warn("Batch size for class {} is 0.".format(i)) self.classes = np.arange(len(p_vec)) assert np.all( np.sort(pd.Series(self.class_vector).unique()) == self.classes) idx_all = np.arange(len(self.class_vector)) self.class_idx_iterators = [ iterable_cycle( np.random.permutation(idx_all[self.class_vector == cls])) for cls in self.classes ]
def batch_train_iter(self, cycle=True, **kwargs): """Returns samples directly useful for training the model: (x["inputs"],x["targets"]) Args: cycle: when True, the returned iterator will run indefinitely go through the dataset Use True with `fit_generator` in Keras. **kwargs: Arguments passed to self.batch_iter(**kwargs) """ if cycle: return ((to_numpy(x["inputs"]), to_numpy(x["targets"])) for x in iterable_cycle(self._batch_iterable(**kwargs))) else: return ((x["inputs"], x["targets"]) for x in self.batch_iter(**kwargs))