Exemple #1
0
    def sample(self, n, seed=900):
        pdata = self.pdata
        n_sam = pdata.sample_size()
        if n > n_sam:
            raise ValueError("Cannot subsample %d points from %d points." %
                             (n, n_sam))

        X, Y = pdata.xy()
        # permute X, Y. Keep pairs
        I = util.subsample_ind(n_sam, n_sam, seed=seed + 3)
        X = X[I, :]
        Y = Y[I, :]
        perm_pivot = self.pivot[I]
        list_chosenI = []
        for ui, v in enumerate(self._uniques):
            Iv = np.nonzero(np.abs(perm_pivot - v) <= 1e-8)
            Iv = Iv[0]
            niv = self._counts[ui]
            # ceil guarantees that at least 1 instance will be chosen
            # from each class.
            n_class = int(math.ceil(niv / n_sam * n))
            chosenI = Iv[:n_class]
            list_chosenI.append(chosenI)
        final_chosenI = np.hstack(list_chosenI)
        reduceI = util.subsample_ind(len(final_chosenI),
                                     min(n, len(final_chosenI)), seed + 5)
        final_chosenI = final_chosenI[reduceI]
        assert len(final_chosenI) == n, (
            "final_chosenI has length %d which is not n=%d" %
            (len(final_chosenI), n))

        Xsam = X[final_chosenI, :]
        Ysam = Y[final_chosenI, :]
        new_label = None if pdata.label is None else pdata.label + "_stra"
        return PairedData(Xsam, Ysam, label=new_label)
Exemple #2
0
 def subsample(self, n, seed=87):
     """Subsample without replacement. Return a new PairedData """
     if n > self.X.shape[0] or n > self.Y.shape[0]:
         raise ValueError('n should not be larger than sizes of X, Y.')
     ind_x = util.subsample_ind(self.X.shape[0], n, seed)
     ind_y = util.subsample_ind(self.Y.shape[0], n, seed)
     return PairedData(self.X[ind_x, :], self.Y[ind_y, :], self.label)
Exemple #3
0
 def sample(self, n, seed=981):
     if n > self.pdata.sample_size():
         raise ValueError('cannot sample more points than what the original dataset has')
     X, Y =  self.pdata.xy()
     if n == 1:
         ind = util.subsample_ind(self.pdata.sample_size(), 2, seed=seed)
         nX = X[[ind[0]], :] 
         nY = Y[[ind[1]], :]
     else:
         ind = util.subsample_ind(self.pdata.sample_size(), n, seed=seed)
         nX = X[ind, :]
         ind_shift1 = np.roll(ind, 1)
         nY = Y[ind_shift1, :]
     new_label = None if self.pdata.label is None else self.pdata.label + '_shuf'
     return PairedData(nX, nY, label=new_label)