Ejemplo n.º 1
0
 def _train_cross_valid(self, _skiptrain=False):
     df = DataFeeder(*(self.traindata + [self.traingold]))
     splitter = SplitIdxIterator(df.size,
                                 split=self.validsplits,
                                 random=self.validrandom,
                                 folds=self.validsplits)
     err = []
     verr = []
     c = 0
     for splitidxs in splitter:
         trainf = self.buildtrainfun(self.model)
         validf = self.getvalidfun(self.model)
         tf, vf = df.isplit(splitidxs, df_randoms=(True, False))
         tf.numbats(self.numbats)
         vf.batsize = tf.batsize
         serr, sverr = self.trainloop(trainf=self.getbatchloop(trainf, tf),
                                      validf=self.getbatchloop(validf, vf),
                                      _skiptrain=_skiptrain)
         err.append(serr)
         verr.append(sverr)
         self.resetmodel(self.model)
     err = np.asarray(err)
     avgerr = np.mean(err, axis=0)
     verr = np.asarray(verr)
     avgverr = np.mean(verr, axis=0)
     self.tt.tock("done")
     return avgerr, avgverr, err, verr
Ejemplo n.º 2
0
 def _train_split(self):
     trainf = self.buildtrainfun(self.model)
     validf = self.buildvalidfun(self.model)
     df = DataFeeder(*(self.traindata + [self.traingold]))
     dftrain, dfvalid = df.split(self.validsplits, self.validrandom)
     err, verr = self.trainloop(
             trainf=self.getbatchloop(trainf, dftrain.numbats(self.numbats)),
             validf=self.getbatchloop(validf, dfvalid))
     return err, verr, None, None
Ejemplo n.º 3
0
 def _train_split(self):
     trainf = self.buildtrainfun(self.model)
     validf = self.buildvalidfun(self.model)
     df = DataFeeder(*(self.traindata + [self.traingold]))
     dftrain, dfvalid = df.split(self.validsplits, self.validrandom)
     err, verr = self.trainloop(trainf=self.getbatchloop(
         trainf, dftrain.numbats(self.numbats)),
                                validf=self.getbatchloop(validf, dfvalid))
     return err, verr, None, None
Ejemplo n.º 4
0
 def _train_validdata(self):
     validf = self.buildvalidfun(self.model)
     trainf = self.buildtrainfun(self.model)
     df = DataFeeder(*(self.traindata + [self.traingold]))
     vdf = DataFeeder(*(self.validdata + [self.validgold]))
     #dfvalid = df.osplit(split=self.validsplits, random=self.validrandom)
     err, verr = self.trainloop(
             trainf=self.getbatchloop(trainf, df.numbats(self.numbats)),
             validf=self.getbatchloop(validf, vdf))
     return err, verr, None, None
Ejemplo n.º 5
0
 def _train_validdata(self):
     validf = self.buildvalidfun(self.model)
     trainf = self.buildtrainfun(self.model)
     df = DataFeeder(*(self.traindata + [self.traingold]))
     vdf = DataFeeder(*(self.validdata + [self.validgold]))
     #embed()
     #dfvalid = df.osplit(split=self.validsplits, random=self.validrandom)
     err, verr = self.trainloop(trainf=self.getbatchloop(
         trainf, df.numbats(self.numbats)),
                                validf=self.getbatchloop(validf, vdf))
     return err, verr, None, None
Ejemplo n.º 6
0
 def _train_split(self, _skiptrain=False):
     trainf = self.buildtrainfun(self.model)
     validf = self.getvalidfun(self.model)
     df = DataFeeder(*(self.traindata + [self.traingold]))
     dftrain, dfvalid = df.split(self.validsplits,
                                 self.validrandom,
                                 df_randoms=(True, False))
     dftrain.numbats(self.numbats)
     dfvalid.batsize = dftrain.batsize
     err, verr = self.trainloop(trainf=self.getbatchloop(trainf, dftrain),
                                validf=self.getbatchloop(validf, dfvalid),
                                _skiptrain=_skiptrain)
     return err, verr, None, None
Ejemplo n.º 7
0
 def _train_validdata(self, _skiptrain=False):
     validf = self.getvalidfun(self.model)
     trainf = self.buildtrainfun(self.model)
     df = DataFeeder(*(self.traindata + [self.traingold])).numbats(
         self.numbats)
     vdf = DataFeeder(*(self.validdata + [self.validgold]), random=False)
     vdf.batsize = df.batsize
     #embed()
     #dfvalid = df.osplit(split=self.validsplits, random=self.validrandom)
     err, verr = self.trainloop(trainf=self.getbatchloop(trainf, df),
                                validf=self.getbatchloop(validf, vdf),
                                _skiptrain=_skiptrain)
     return err, verr, None, None
Ejemplo n.º 8
0
 def _train_full(self):  # train on all data, no validation
     trainf = self.buildtrainfun(self.model)
     err, _ = self.trainloop(trainf=self.getbatchloop(
         trainf,
         DataFeeder(*(self.traindata +
                      [self.traingold])).numbats(self.numbats)))
     return err, None, None, None
Ejemplo n.º 9
0
    def test_fb_datafeed_validosplit(self):
        gd, gmaxi = getglovedict(os.path.join(os.path.dirname(__file__), "../data/glove/miniglove.50d.txt"))
        ed, emaxid = getentdict(os.path.join(os.path.dirname(__file__), "../data/freebase/entdic.small.map"), top=50)
        dp = os.path.join(os.path.dirname(__file__), "../data/freebase/labelsrevlex.map.sample")
        f = FreebaseEntFeedsMaker(dp, gd, ed, numwords=10, numchars=30)
        self.assertEqual(f.worddic, gd)

        dfeeder = DataFeeder(*([f.trainfeed] + [f.goldfeed]))
        splits = 1
        dfsplit = dfeeder.osplit(split=splits, random=False)
        dfeeds = dfeeder.feeds
        splitfeeds = dfsplit.feeds
        for x, y in zip(dfeeds, splitfeeds):
            self.assertEqual(x.__class__, y.__class__)
            self.assertEqual(x.ndim, y.ndim)
            self.assertEqual(y.shape[0], int(math.ceil(1.*x.shape[0]/splits)))
            for dim in range(1, len(x.shape)):
                self.assertEqual(x.shape[dim], y.shape[dim])
Ejemplo n.º 10
0
 def _train_split(self, _lambda=False, _skiptrain=False):
     df = DataFeeder(*(self.traindata + [self.traingold]))
     dftrain, dfvalid = df.split(self.validsplits,
                                 self.validrandom,
                                 df_randoms=(True, False))
     dftrain.numbats(self.numbats)
     dfvalid.batsize = dftrain.batsize
     trainf = self.buildtrainfun(self.model, dftrain.batsize)
     validf = self.getvalidfun(self.model, dfvalid.batsize)
     if _lambda:
         return trainf, validf, dftrain, dfvalid
     else:
         err, verr = self.trainloop(trainf=self.getbatchloop(trainf,
                                                             dftrain,
                                                             phase="TRAIN"),
                                    validf=self.getbatchloop(validf,
                                                             dfvalid,
                                                             phase="VALID"),
                                    _skiptrain=_skiptrain)
         return err, verr, None, None
Ejemplo n.º 11
0
 def _train_validdata(self, _lambda=False, _skiptrain=False):
     df = DataFeeder(*(self.traindata + [self.traingold])).numbats(
         self.numbats)
     vdf = DataFeeder(*(self.validdata + [self.validgold]), random=False)
     vdf.batsize = df.batsize
     trainf = self.buildtrainfun(self.model, df.batsize)
     validf = self.getvalidfun(self.model, vdf.batsize)
     #embed()
     #dfvalid = df.osplit(split=self.validsplits, random=self.validrandom)
     if _lambda:
         return trainf, validf, df, vdf
     else:
         err, verr = self.trainloop(trainf=self.getbatchloop(trainf,
                                                             df,
                                                             phase="TRAIN"),
                                    validf=self.getbatchloop(validf,
                                                             vdf,
                                                             phase="VALID"),
                                    _skiptrain=_skiptrain)
         return err, verr, None, None
Ejemplo n.º 12
0
 def _train_cross_valid(self):
     df = DataFeeder(*(self.traindata + [self.traingold]))
     splitter = SplitIdxIterator(df.size, split=self.validsplits, random=self.validrandom, folds=self.validsplits)
     err = []
     verr = []
     c = 0
     for splitidxs in splitter:
         trainf = self.buildtrainfun(self.model)
         validf = self.buildvalidfun(self.model)
         tf, vf = df.isplit(splitidxs)
         serr, sverr = self.trainloop(
             trainf=self.getbatchloop(trainf, tf.numbats(self.numbats)),
             validf=self.getbatchloop(validf, vf))
         err.append(serr)
         verr.append(sverr)
         self.resetmodel(self.model)
     err = np.asarray(err)
     avgerr = np.mean(err, axis=0)
     verr = np.asarray(verr)
     avgverr = np.mean(verr, axis=0)
     self.tt.tock("done")
     return avgerr, avgverr, err, verr
Ejemplo n.º 13
0
 def _train_full(self,
                 _lambda=False,
                 _skiptrain=False):  # on all data, no validation
     df = DataFeeder(*(self.traindata + [self.traingold])).numbats(
         self.numbats)
     trainf = self.buildtrainfun(self.model, df.batsize)
     if _lambda:
         return trainf, None, df, None
     else:
         err, _ = self.trainloop(trainf=self.getbatchloop(trainf,
                                                          df,
                                                          phase="TRAIN"),
                                 _skiptrain=_skiptrain)
         return err, None, None, None
Ejemplo n.º 14
0
    def test_fb_datafeed_validosplit(self):
        gd, gmaxi = getglovedict(
            os.path.join(os.path.dirname(__file__),
                         "../data/glove/miniglove.50d.txt"))
        ed, emaxid = getentdict(os.path.join(
            os.path.dirname(__file__), "../data/freebase/entdic.small.map"),
                                top=50)
        dp = os.path.join(os.path.dirname(__file__),
                          "../data/freebase/labelsrevlex.map.sample")
        f = FreebaseEntFeedsMaker(dp, gd, ed, numwords=10, numchars=30)
        self.assertEqual(f.worddic, gd)

        dfeeder = DataFeeder(*([f.trainfeed] + [f.goldfeed]))
        splits = 1
        dfsplit = dfeeder.osplit(split=splits, random=False)
        dfeeds = dfeeder.feeds
        splitfeeds = dfsplit.feeds
        for x, y in zip(dfeeds, splitfeeds):
            self.assertEqual(x.__class__, y.__class__)
            self.assertEqual(x.ndim, y.ndim)
            self.assertEqual(y.shape[0],
                             int(math.ceil(1. * x.shape[0] / splits)))
            for dim in range(1, len(x.shape)):
                self.assertEqual(x.shape[dim], y.shape[dim])