def __init__(self,X,Y,n=None,sample_func=None): self.data = NestD({'U':{'X':X,'Y':Y}, 'L':{'X':X,'Y':Y}}) if n is None: n = len(X) self.n = n self.sample_func = sample_func
class SemiSupervisedDataEvaluate(object): def __init__(self,X,Y,n=None,sample_func=None): self.data = NestD({'U':{'X':X,'Y':Y}, 'L':{'X':X,'Y':Y}}) if n is None: n = len(X) self.n = n self.sample_func = sample_func def __len__(self): return self.n def __getitem__(self,idx): rval = self.data.apply(lambda x:x[idx]) if self.sample_func: X = rval[:,['X']].apply(self.sample_func) Y = rval[:,['Y']] rval = X.updatepaths(*zip(*Y.walk())) return rval def __repr__(self): header = self.__class__.__name__ subrepr = '\n '.join(str(self.data.apply(np.shape)).split('\n')) return header + ': ' + subrepr
def __init__(self,data={},n=None,*args,**kwargs): """ Converts data to a NestD object. data: {dict,NestD} n: {int} """ self.n = n self.data = NestD(data)
def __init__(self,XL,YL,XU,YU=None,nL=None,nU=None,sample_func=None): self.data = NestD({'U':{'X':XU}, 'L':{'X':XL,'Y':YL}}) if YU is not None: self.data['U']['Y']=YU if nU is None: nU = len(XU) self.nU = nU if nL is None: nL = len(XL) self.nL = nL self.sample_func = sample_func
def __init__(self,labeled_per_class=10): """ Divides training set into labeled and unlabeled data sets. For valid and test sets, the unlabeled and labeled X are exactly the same. SemiSupervisedMNIST() objects have self.train, self.valid, and self.test attributes, each of which is a DataNestD class. To index one of these sets, use regular numpy slicing, e.g. self.train[idx] labeled_per_class: {int} default to 10, sets the number of labeled samples per digit class in the training set. E.g. labeled_per_class=10 will result in 100 labeled training samples and 50000 unlabeled training samples. """ self.raw = loadDataset('mnist') self.nclasses = 10 self.dim_observations = 784 data = self.raw X = data['train'] Y = data['train_y'].astype('int32') classes = range(self.nclasses) XL = []; YL = []; for c in classes: sel = Y == c nc = sel.sum() Xc = X[sel] Yc = Y[sel] idx = np.arange(nc) np.random.shuffle(idx) Xc = Xc[idx[:labeled_per_class]] Yc = Yc[idx[:labeled_per_class]] XL.append(Xc) YL.append(Yc) XL = np.vstack(XL) YL = np.hstack(YL) ntrainU = len(Y) ntrainL = len(YL) nvalid = len(data['valid_y']) ntest = len(data['test_y']) sample_func=lambda x: (x>=np.random.uniform(low=0,high=1,size=x.shape)).astype(float) self.train = SemiSupervisedDataTrain( XU=X, YU=Y, XL=XL, YL=YL, sample_func=sample_func) self.valid = SemiSupervisedDataEvaluate( X=data['valid'], Y=data['valid_y'], sample_func=sample_func) self.test = SemiSupervisedDataEvaluate( X=data['test'], Y=data['test_y'], sample_func=sample_func) self.data = NestD({ 'train':self.train, 'valid':self.valid, 'test':self.test })
def __getitem__(self,idx): if isinstance(idx,slice): idx = infer_slice(idx,self.nU) idx_U = idx idx_L = np.random.randint(low=0,high=self.nL,size=len(idx)) U = self.data['U'].apply(lambda x:x[idx_U]) L = self.data['L'].apply(lambda x:x[idx_L]) rval = NestD({'U':U,'L':L}) if self.sample_func: X = rval[:,['X']].apply(self.sample_func) Y = rval[:,['Y']] rval = X.updatepaths(*zip(*Y.walk())) return rval
class Data(object): def __init__(self,data={},n=None,*args,**kwargs): """ Converts data to a NestD object. data: {dict,NestD} n: {int} """ self.n = n self.data = NestD(data) def __recreate__(self,data={},n=None,*args,**kwargs): if n is None: n = self.n return self.__class__(data,n,*args,**kwargs) def __len__(self): return self.n def __getitem__(self,idx): return self.data.apply(lambda x:x[idx]) def apply(self,func,*args,**kwargs): def _apply(x): if isinstance(x,Data): return x.apply(func,*args,**kwargs) else: return func(x,*args,**kwargs) return self.data.apply(_apply,*args,**kwargs) def __repr_header__(self): name = self.__class__.__name__ return name + '{' def __repr__(self): return self.__repr_header__() + self.data.__repr__()
class SemiSupervisedDataTrain(object): def __init__(self,XL,YL,XU,YU=None,nL=None,nU=None,sample_func=None): self.data = NestD({'U':{'X':XU}, 'L':{'X':XL,'Y':YL}}) if YU is not None: self.data['U']['Y']=YU if nU is None: nU = len(XU) self.nU = nU if nL is None: nL = len(XL) self.nL = nL self.sample_func = sample_func def __len__(self): return self.nU def __getitem__(self,idx): if isinstance(idx,slice): idx = infer_slice(idx,self.nU) idx_U = idx idx_L = np.random.randint(low=0,high=self.nL,size=len(idx)) U = self.data['U'].apply(lambda x:x[idx_U]) L = self.data['L'].apply(lambda x:x[idx_L]) rval = NestD({'U':U,'L':L}) if self.sample_func: X = rval[:,['X']].apply(self.sample_func) Y = rval[:,['Y']] rval = X.updatepaths(*zip(*Y.walk())) return rval def __repr__(self): header = self.__class__.__name__ subrepr = '\n '.join(str(self.data.apply(np.shape)).split('\n')) return header + ': ' + subrepr