def _train_cross_valid(self, _skiptrain=False): df = DataFeeder(*(self.traindata + [self.traingold])) splitter = SplitIdxIterator(df.size, split=self.validsplits, random=self.validrandom, folds=self.validsplits) err = [] verr = [] c = 0 for splitidxs in splitter: trainf = self.buildtrainfun(self.model) validf = self.getvalidfun(self.model) tf, vf = df.isplit(splitidxs, df_randoms=(True, False)) tf.numbats(self.numbats) vf.batsize = tf.batsize serr, sverr = self.trainloop(trainf=self.getbatchloop(trainf, tf), validf=self.getbatchloop(validf, vf), _skiptrain=_skiptrain) err.append(serr) verr.append(sverr) self.resetmodel(self.model) err = np.asarray(err) avgerr = np.mean(err, axis=0) verr = np.asarray(verr) avgverr = np.mean(verr, axis=0) self.tt.tock("done") return avgerr, avgverr, err, verr
def _train_split(self): trainf = self.buildtrainfun(self.model) validf = self.buildvalidfun(self.model) df = DataFeeder(*(self.traindata + [self.traingold])) dftrain, dfvalid = df.split(self.validsplits, self.validrandom) err, verr = self.trainloop( trainf=self.getbatchloop(trainf, dftrain.numbats(self.numbats)), validf=self.getbatchloop(validf, dfvalid)) return err, verr, None, None
def _train_split(self): trainf = self.buildtrainfun(self.model) validf = self.buildvalidfun(self.model) df = DataFeeder(*(self.traindata + [self.traingold])) dftrain, dfvalid = df.split(self.validsplits, self.validrandom) err, verr = self.trainloop(trainf=self.getbatchloop( trainf, dftrain.numbats(self.numbats)), validf=self.getbatchloop(validf, dfvalid)) return err, verr, None, None
def _train_validdata(self): validf = self.buildvalidfun(self.model) trainf = self.buildtrainfun(self.model) df = DataFeeder(*(self.traindata + [self.traingold])) vdf = DataFeeder(*(self.validdata + [self.validgold])) #dfvalid = df.osplit(split=self.validsplits, random=self.validrandom) err, verr = self.trainloop( trainf=self.getbatchloop(trainf, df.numbats(self.numbats)), validf=self.getbatchloop(validf, vdf)) return err, verr, None, None
def _train_validdata(self): validf = self.buildvalidfun(self.model) trainf = self.buildtrainfun(self.model) df = DataFeeder(*(self.traindata + [self.traingold])) vdf = DataFeeder(*(self.validdata + [self.validgold])) #embed() #dfvalid = df.osplit(split=self.validsplits, random=self.validrandom) err, verr = self.trainloop(trainf=self.getbatchloop( trainf, df.numbats(self.numbats)), validf=self.getbatchloop(validf, vdf)) return err, verr, None, None
def _train_split(self, _skiptrain=False): trainf = self.buildtrainfun(self.model) validf = self.getvalidfun(self.model) df = DataFeeder(*(self.traindata + [self.traingold])) dftrain, dfvalid = df.split(self.validsplits, self.validrandom, df_randoms=(True, False)) dftrain.numbats(self.numbats) dfvalid.batsize = dftrain.batsize err, verr = self.trainloop(trainf=self.getbatchloop(trainf, dftrain), validf=self.getbatchloop(validf, dfvalid), _skiptrain=_skiptrain) return err, verr, None, None
def _train_validdata(self, _skiptrain=False): validf = self.getvalidfun(self.model) trainf = self.buildtrainfun(self.model) df = DataFeeder(*(self.traindata + [self.traingold])).numbats( self.numbats) vdf = DataFeeder(*(self.validdata + [self.validgold]), random=False) vdf.batsize = df.batsize #embed() #dfvalid = df.osplit(split=self.validsplits, random=self.validrandom) err, verr = self.trainloop(trainf=self.getbatchloop(trainf, df), validf=self.getbatchloop(validf, vdf), _skiptrain=_skiptrain) return err, verr, None, None
def _train_full(self): # train on all data, no validation trainf = self.buildtrainfun(self.model) err, _ = self.trainloop(trainf=self.getbatchloop( trainf, DataFeeder(*(self.traindata + [self.traingold])).numbats(self.numbats))) return err, None, None, None
def test_fb_datafeed_validosplit(self): gd, gmaxi = getglovedict(os.path.join(os.path.dirname(__file__), "../data/glove/miniglove.50d.txt")) ed, emaxid = getentdict(os.path.join(os.path.dirname(__file__), "../data/freebase/entdic.small.map"), top=50) dp = os.path.join(os.path.dirname(__file__), "../data/freebase/labelsrevlex.map.sample") f = FreebaseEntFeedsMaker(dp, gd, ed, numwords=10, numchars=30) self.assertEqual(f.worddic, gd) dfeeder = DataFeeder(*([f.trainfeed] + [f.goldfeed])) splits = 1 dfsplit = dfeeder.osplit(split=splits, random=False) dfeeds = dfeeder.feeds splitfeeds = dfsplit.feeds for x, y in zip(dfeeds, splitfeeds): self.assertEqual(x.__class__, y.__class__) self.assertEqual(x.ndim, y.ndim) self.assertEqual(y.shape[0], int(math.ceil(1.*x.shape[0]/splits))) for dim in range(1, len(x.shape)): self.assertEqual(x.shape[dim], y.shape[dim])
def _train_split(self, _lambda=False, _skiptrain=False): df = DataFeeder(*(self.traindata + [self.traingold])) dftrain, dfvalid = df.split(self.validsplits, self.validrandom, df_randoms=(True, False)) dftrain.numbats(self.numbats) dfvalid.batsize = dftrain.batsize trainf = self.buildtrainfun(self.model, dftrain.batsize) validf = self.getvalidfun(self.model, dfvalid.batsize) if _lambda: return trainf, validf, dftrain, dfvalid else: err, verr = self.trainloop(trainf=self.getbatchloop(trainf, dftrain, phase="TRAIN"), validf=self.getbatchloop(validf, dfvalid, phase="VALID"), _skiptrain=_skiptrain) return err, verr, None, None
def _train_validdata(self, _lambda=False, _skiptrain=False): df = DataFeeder(*(self.traindata + [self.traingold])).numbats( self.numbats) vdf = DataFeeder(*(self.validdata + [self.validgold]), random=False) vdf.batsize = df.batsize trainf = self.buildtrainfun(self.model, df.batsize) validf = self.getvalidfun(self.model, vdf.batsize) #embed() #dfvalid = df.osplit(split=self.validsplits, random=self.validrandom) if _lambda: return trainf, validf, df, vdf else: err, verr = self.trainloop(trainf=self.getbatchloop(trainf, df, phase="TRAIN"), validf=self.getbatchloop(validf, vdf, phase="VALID"), _skiptrain=_skiptrain) return err, verr, None, None
def _train_cross_valid(self): df = DataFeeder(*(self.traindata + [self.traingold])) splitter = SplitIdxIterator(df.size, split=self.validsplits, random=self.validrandom, folds=self.validsplits) err = [] verr = [] c = 0 for splitidxs in splitter: trainf = self.buildtrainfun(self.model) validf = self.buildvalidfun(self.model) tf, vf = df.isplit(splitidxs) serr, sverr = self.trainloop( trainf=self.getbatchloop(trainf, tf.numbats(self.numbats)), validf=self.getbatchloop(validf, vf)) err.append(serr) verr.append(sverr) self.resetmodel(self.model) err = np.asarray(err) avgerr = np.mean(err, axis=0) verr = np.asarray(verr) avgverr = np.mean(verr, axis=0) self.tt.tock("done") return avgerr, avgverr, err, verr
def _train_full(self, _lambda=False, _skiptrain=False): # on all data, no validation df = DataFeeder(*(self.traindata + [self.traingold])).numbats( self.numbats) trainf = self.buildtrainfun(self.model, df.batsize) if _lambda: return trainf, None, df, None else: err, _ = self.trainloop(trainf=self.getbatchloop(trainf, df, phase="TRAIN"), _skiptrain=_skiptrain) return err, None, None, None
def test_fb_datafeed_validosplit(self): gd, gmaxi = getglovedict( os.path.join(os.path.dirname(__file__), "../data/glove/miniglove.50d.txt")) ed, emaxid = getentdict(os.path.join( os.path.dirname(__file__), "../data/freebase/entdic.small.map"), top=50) dp = os.path.join(os.path.dirname(__file__), "../data/freebase/labelsrevlex.map.sample") f = FreebaseEntFeedsMaker(dp, gd, ed, numwords=10, numchars=30) self.assertEqual(f.worddic, gd) dfeeder = DataFeeder(*([f.trainfeed] + [f.goldfeed])) splits = 1 dfsplit = dfeeder.osplit(split=splits, random=False) dfeeds = dfeeder.feeds splitfeeds = dfsplit.feeds for x, y in zip(dfeeds, splitfeeds): self.assertEqual(x.__class__, y.__class__) self.assertEqual(x.ndim, y.ndim) self.assertEqual(y.shape[0], int(math.ceil(1. * x.shape[0] / splits))) for dim in range(1, len(x.shape)): self.assertEqual(x.shape[dim], y.shape[dim])