예제 #1
0
    def __init__(self,labeled_per_class=10):
        """
        Divides training set into labeled and unlabeled data sets.  For valid and test sets,
        the unlabeled and labeled X are exactly the same.  SemiSupervisedMNIST() objects have
        self.train, self.valid, and self.test attributes, each of which is a DataNestD
        class.  To index one of these sets, use regular numpy slicing, e.g. self.train[idx]

        labeled_per_class: {int} default to 10, sets the number of labeled samples per digit 
                                 class in the training set.  E.g. labeled_per_class=10 will 
                                 result in 100 labeled training samples and 50000 unlabeled 
                                 training samples.
        """
        self.raw = loadDataset('mnist')
        self.nclasses = 10
        self.dim_observations = 784
        data = self.raw
        X = data['train']
        Y = data['train_y'].astype('int32')
        classes = range(self.nclasses)
        XL = []; YL = [];
        for c in classes:
            sel = Y == c
            nc = sel.sum()
            Xc = X[sel]
            Yc = Y[sel]
            idx = np.arange(nc)
            np.random.shuffle(idx)
            Xc = Xc[idx[:labeled_per_class]]
            Yc = Yc[idx[:labeled_per_class]]
            XL.append(Xc)
            YL.append(Yc)
        XL = np.vstack(XL)
        YL = np.hstack(YL)

        ntrainU = len(Y)
        ntrainL = len(YL)
        nvalid = len(data['valid_y'])
        ntest = len(data['test_y'])

        sample_func=lambda x: (x>=np.random.uniform(low=0,high=1,size=x.shape)).astype(float)
        self.train = SemiSupervisedDataTrain(
                        XU=X,
                        YU=Y,
                        XL=XL,
                        YL=YL,
                        sample_func=sample_func)
        self.valid = SemiSupervisedDataEvaluate(
                        X=data['valid'],
                        Y=data['valid_y'],
                        sample_func=sample_func)
        self.test = SemiSupervisedDataEvaluate(
                        X=data['test'],
                        Y=data['test_y'],
                        sample_func=sample_func)

        self.data = NestD({
            'train':self.train,
            'valid':self.valid,
            'test':self.test
        })
예제 #2
0
 def __init__(self,X,Y,n=None,sample_func=None):
     self.data = NestD({'U':{'X':X,'Y':Y},
                        'L':{'X':X,'Y':Y}})
     if n is None:
         n = len(X)
     self.n = n
     self.sample_func = sample_func
예제 #3
0
 def __init__(self,data={},n=None,*args,**kwargs):
     """
     Converts data to a NestD object.
     data: {dict,NestD}
     n: {int}
     """
     self.n = n
     self.data = NestD(data)
예제 #4
0
 def __init__(self,XL,YL,XU,YU=None,nL=None,nU=None,sample_func=None):
     self.data = NestD({'U':{'X':XU},
                        'L':{'X':XL,'Y':YL}})
     if YU is not None:
         self.data['U']['Y']=YU
     if nU is None:
         nU = len(XU)
     self.nU = nU
     if nL is None:
         nL = len(XL)
     self.nL = nL
     self.sample_func = sample_func
예제 #5
0
 def __getitem__(self,idx):
     if isinstance(idx,slice):
         idx = infer_slice(idx,self.nU)
     idx_U = idx
     idx_L = np.random.randint(low=0,high=self.nL,size=len(idx))
     U = self.data['U'].apply(lambda x:x[idx_U])
     L = self.data['L'].apply(lambda x:x[idx_L])
     rval = NestD({'U':U,'L':L})
     if self.sample_func:
         X = rval[:,['X']].apply(self.sample_func)
         Y = rval[:,['Y']]
         rval = X.updatepaths(*zip(*Y.walk()))
     return rval