예제 #1
0
 def __init__(self, model, gold):
     self.model = model
     self.goldvar = gold
     self.validsetmode = False
     self.average_err = True  # TODO: do we still need this?
     self._autosave = False
     self._autosavepath = None
     self._autosaveblock = None
     # training settings
     self.learning_rate = None
     self.dynamic_lr = None
     self.objective = None
     self.regularizer = None
     self.optimizer = None
     self.traindata = None
     self.traingold = None
     self.gradconstraints = []
     # validation settings
     self._validinter = 1
     self.trainstrategy = self._train_full
     self.validsplits = 0
     self.validrandom = False
     self.validata = None
     self.validgold = None
     self.validation = None
     self.validators = []
     self.external_validators = []
     self.tt = TT("FluentTrainer")
     # taking best
     self.besttaker = None
     self.bestmodel = None
     self.savebest = None
     self.smallerbetter = True
     # writing
     self._writeresultspath = None
예제 #2
0
 def batchloop():
     c = 0
     prevperc = -1.
     terr = [0.0]
     numdigs = 2
     tt = TT("iter progress", verbose=verbose)
     tt.tick()
     while datafeeder.hasnextbatch():
         perc = round(c * 100. * (10**numdigs) /
                      datafeeder._numbats) / (10**numdigs)
         if perc > prevperc:
             s = ("%." + str(numdigs) +
                  "f%% \t error: %.3f") % (perc, terr[0])
             tt.live(s)
             prevperc = perc
         sampleinps = datafeeder.nextbatch()
         sampleinps = sampletransf(*sampleinps)
         try:
             eterr = trainf(*sampleinps)
             if len(terr) != len(eterr) and terr.count(0.0) == len(
                     terr):
                 terr = [0.0] * len(eterr)
         except Exception, e:
             raise e
         if self.average_err is True:
             terr = [
                 xterr * (1.0 * (c) / (c + 1)) + xeterr *
                 (1.0 / (c + 1)) for xterr, xeterr in zip(terr, eterr)
             ]
         else:
             terr = [
                 xterr + xeterr for xterr, xeterr in zip(terr, eterr)
             ]
         c += 1
예제 #3
0
    def trainloop(self, tf, vf):
        self.tt.tick("training")
        stop = self.maxiter == 0
        self.currentiter = 1
        evalinter = self._validinter
        evalcount = evalinter
        tt = TT("iter")
        err = []
        verr = []
        prevverre = [[float("inf")] * len(subt.original.validators)
                     for subt in self.spts]
        while not stop:
            tt.tick("%d/%d" % (self.currentiter, int(self.maxiter)))
            erre = tf()
            if self.currentiter == self.maxiter:
                stop = True
            self.currentiter += 1
            err.append(erre)
            # print "done training"
            verre = prevverre
            if self.currentiter % evalinter == 0:  # validate and print
                verre = vf()
                prevverre = verre
                verr.append(verre)
                #embed()     # TODO
            # retaining the best of main trainer
            if self.spts[0].original.besttaker is not None:
                modelscore = self.spts[0].original.besttaker(
                    ([erre[0]] + verre[0] + [self.currentiter]))
                if modelscore < self.spts[0].original.bestmodel[1]:
                    # tt.tock("freezing best with score %.3f (prev: %.3f)" % (modelscore, self.bestmodel[1]), prefix="-").tick()
                    self.spts[0].original.bestmodel = (
                        self.spts[0].original.model.freeze(), modelscore)

            ttlines = []
            for i in range(len(erre)):
                if verre[i] is not None:
                    ttlines.append("\t%s:\ttraining error: %s \t validation error: %s" \
                            % (i+1, "%.4f" % erre[i][0],
                               " - ".join(map(lambda x: "%.4f" % x, verre[i]))))
                else:
                    ttlines.append("\t%s:\ttraining error: %s" %
                                   (i + 1, " - ".join(
                                       map(lambda x: "%.4f" % x, erre[i]))))
            tt.tock("\n".join(ttlines) + "\n", prefix="-")
            for i, subt in enumerate(self.spts):
                subt.original._update_lr(self.currentiter, self.maxiter,
                                         [errx[i] for errx in err],
                                         [verrx[i] for verrx in verr])
            evalcount += 1
            # embed()
            for subt in self.spts:
                if subt.original._autosave:
                    subt.original.save()
        self.tt.tock("trained").tick()
        return err, verr
예제 #4
0
 def __init__(self,
              dim,
              vocabsize=None,
              trainfrac=0.0):  # if dim is None, import all
     self.D = OrderedDict()
     self.tt = TT(self.__class__.__name__)
     self.dim = dim
     self.indim = vocabsize
     self.W = [np.zeros((1, self.dim))]
     self._block = None
     self.trainfrac = trainfrac
예제 #5
0
 def test_ticktock_duration_string(self):
     tt = TT()
     testdata = [(1, "1.000 second"), (0.5689, "0.569 second"),
                 (0.9999, "1.000 second"), (59, "59.000 seconds"),
                 (59.00001, "59.000 seconds"), (59.0005, "59.001 seconds"),
                 (60, "1 minute"), (60.005, "1 minute"),
                 (61, "1 minute, 1 second"), (62, "1 minute, 2 seconds"),
                 (121, "2 minutes, 1 second"), (120, "2 minutes"),
                 (3656, "1 hour, 56 seconds"), (2 * 3600, "2 hours"),
                 (24 * 3600 + 125, "1 day, 2 minutes, 5 seconds"),
                 (25 * 3600 + 126, "1 day, 1 hour, 2 minutes, 6 seconds"),
                 (50 * 3600, "2 days, 2 hours")]
     for seconds, text in testdata:
         self.assertEqual(text, tt._getdurationstr(seconds))
예제 #6
0
 def batchloop():
     c = 0
     prevperc = -1.
     terrs = [[0.0] if tf is not None else None for tf in trainfs]
     numdigs = 2
     tt = TT("iter progress", verbose=verbose)
     tt.tick()
     for dataf in datafeeders:
         if dataf is not None:
             dataf.reset()
     while datafeeders[0].hasnextbatch():
         perc = round(c * 100. * (10**numdigs) /
                      datafeeders[0].getnumbats()) / (10**numdigs)
         if perc > prevperc:
             s = ("%." + str(numdigs) + "f%% \t error: %s") \
                 % (perc, " - ".join(map(lambda x: "%.3f" % x[0], terrs)))
             tt.live(s)
             prevperc = perc
         for df in datafeeders:
             if not df.hasnextbatch():
                 df.reset()
         sampleinps = [df.nextbatch() for df in datafeeders]
         # embed()
         sampleinps = [
             stf(*si, phase=phase)
             for (stf, si) in zip(sampletransfs, sampleinps)
         ]
         try:
             eterrs = [tf(*si) for (tf, si) in zip(trainfs, sampleinps)]
             for i in range(len(terrs)):
                 if len(terrs[i]) != len(
                         eterrs[i]) and terrs[i].count(0.0) == len(
                             terrs[i]):
                     terrs[i] = [0.0] * len(eterrs[i])
         except Exception, e:
             raise e
         for i, subt in enumerate(this.spts):
             if subt.original.average_err is True:
                 terrs[i] = [
                     xterr * (1.0 * (c) / (c + 1)) + xeterr * (1.0 /
                                                               (c + 1))
                     for xterr, xeterr in zip(terrs[i], eterrs[i])
                 ]
             else:
                 terrs[i] = [
                     xterr + xeterr
                     for xterr, xeterr in zip(terrs[i], eterrs[i])
                 ]
         c += 1
예제 #7
0
 def loadvalue(self, path, dim, indim=None):
     tt = TT(self.__class__.__name__)
     tt.tick()
     W = [np.zeros((1, dim))]
     D = OrderedDict()
     i = 1
     for line in open(path):
         if indim is not None and i >= (indim + 1):
             break
         ls = line.split(" ")
         word = ls[0]
         D[word] = i
         W.append(np.asarray([map(lambda x: float(x), ls[1:])]))
         i += 1
     W = np.concatenate(W, axis=0)
     tt.tock("loaded")
     return W, D
예제 #8
0
 def trainloop(self, trainf, validf=None):
     self.tt.tick("training")
     err = []
     verr = []
     stop = self.maxiter == 0
     self.currentiter = 1
     evalinter = self._validinter
     evalcount = evalinter
     tt = TT("iter")
     prevverre = [float("inf")] * len(self.validators)
     while not stop:
         tt.tick("%d/%d" % (self.currentiter, int(self.maxiter)))
         erre = trainf()
         if self.currentiter == self.maxiter:
             stop = True
         self.currentiter += 1
         err.append(erre)
         print "done training"
         verre = prevverre
         if validf is not None and self.currentiter % evalinter == 0:  # validate and print
             verre = validf()
             prevverre = verre
             verr.append(verre)
             tt.msg("training error: %s \t validation error: %s" %
                    ("%.4f" % erre[0], " - ".join(
                        map(lambda x: "%.4f" % x, verre))),
                    prefix="-")
         else:
             tt.msg("training error: %s" %
                    " - ".join(map(lambda x: "%.4f" % x, erre)),
                    prefix="-")
         # retaining the best
         if self.besttaker is not None:
             modelscore = self.besttaker(
                 ([erre] + verre + [self.currentiter]))
             if modelscore < self.bestmodel[1]:
                 #tt.tock("freezing best with score %.3f (prev: %.3f)" % (modelscore, self.bestmodel[1]), prefix="-").tick()
                 self.bestmodel = (self.model.freeze(), modelscore)
         tt.tock("done", prefix="-")
         self._update_lr(self.currentiter, self.maxiter, err, verr)
         evalcount += 1
         #embed()
         if self._autosave:
             self.save(self.model)
     self.tt.tock("trained").tick()
     return err, verr
예제 #9
0
 def batchloop():
     c = 0
     numex = 0
     prevperc = -1.
     terr = [0.0]
     numdigs = 2
     tt = TT("iter progress", verbose=verbose)
     tt.tick()
     while datafeeder.hasnextbatch():
         perc = round(c * 100. * (10**numdigs) /
                      datafeeder._numbats) / (10**numdigs)
         if perc > prevperc:
             terr0 = terr[0] * 1.0 / numex if numex > 0 else 0.0
             s = ("%." + str(numdigs) + "f%% \t error: %.3f") % (perc,
                                                                 terr0)
             tt.live(s)
             prevperc = perc
         sampleinps, batsize = datafeeder.nextbatch(withbatchsize=True)
         numex += batsize
         sampleinps = sampletransf(*sampleinps)
         eterr = trainf(*sampleinps)
         if len(terr) != len(eterr) and terr.count(0.0) == len(terr):
             terr = [0.0] * len(
                 eterr
             )  # ensure compatible size of terr (number of output scores)
         if self.average_err is True:
             terr = [
                 xterr + xeterr * batsize
                 for xterr, xeterr in zip(terr, eterr)
             ]
         else:
             terr = [
                 xterr + xeterr for xterr, xeterr in zip(terr, eterr)
             ]
         c += 1
     tt.stoplive()
     if self.average_err is True:
         terr = [xterr * 1.0 / numex for xterr in terr]
     return terr
예제 #10
0
 def batchloop():
     c = 0
     numex = 0
     prevperc = -1.
     terr = [0.0]
     numdigs = 2
     tt = TT("iter progress", verbose=verbose)
     tt.tick()
     datafeeder.reset()
     while datafeeder.hasnextbatch():
         perc = round(c * 100. * (10**numdigs) /
                      datafeeder.getnumbats()) / (10**numdigs)
         if perc > prevperc:
             terr0 = terr[0] * 1.0 / numex if numex > 0 else 0.0
             s = ("%." + str(numdigs) + "f%% \t error: %.3f") % (perc,
                                                                 terr0)
             tt.live(s)
             prevperc = perc
         sampleinps, batsize = datafeeder.nextbatch(withbatchsize=True)
         numex += batsize
         #embed()
         sampleinps = sampletransf(*sampleinps, phase=phase)
         try:
             eterr = trainf(*sampleinps)
             if len(terr) != len(eterr) and terr.count(0.0) == len(
                     terr):
                 terr = [0.0] * len(eterr)
         except Exception, e:
             raise e
         if self.average_err is True:
             terr = [
                 xterr + xeterr * batsize
                 for xterr, xeterr in zip(terr, eterr)
             ]
         else:
             terr = [
                 xterr + xeterr for xterr, xeterr in zip(terr, eterr)
             ]
         c += 1
예제 #11
0
파일: fb5.py 프로젝트: nilesh-c/teafacto
def loaddata(p, top=np.infty):
    tt = TT("Dataloader")
    traindata = None
    golddata = None
    i = 0
    tt.tick("loading")
    with open(p) as f:
        numsam = 1
        for line in f:
            if traindata is None and golddata is None:  # first line
                numsam, numcol = map(int, line[:-1].split(" "))
                traindata = np.zeros((min(numsam,
                                          top), numcol - 1)).astype("float32")
                golddata = np.zeros((min(numsam, top), )).astype("int32")
            else:
                ns = line[:-1].split("\t")
                traindata[i, :] = map(float, ns[:-1])
                golddata[i] = int(ns[-1])
                i += 1
                tt.progress(i, numsam, live=True)
            if top is not None and i >= top:
                break
    tt.tock("loaded")
    return traindata, golddata
예제 #12
0
 rellits = rellits.union(trellits)
 types = types.union(ttypes)
 for ent in ents:
     g.add_rule_str("SENT", ent)
 for rel in rels:
     g.add_rule_str("REL", rel)
 for rellit in rellits:
     g.add_rule_str("RELLIT", rellit)
 for type in types:
     g.add_rule_str("TYPE", type)
 g.parse_info(typeinfo)
 #print g
 gen = Generator(g)
 #print "\n"
 from teafacto.util import ticktock as TT
 tt = TT()
 tt.tick()
 k = 50000
 outp = "../../data/semparse/geoquery.lbd.abstr.autogen"
 with open(outp, "w") as f:
     parser = LambdaParser()
     for i in range(k):
         x = " ".join(map(lambda x: x.name, gen.generate()))
         y = parser.parse(x)
         if y.name == "the":
             y.name = "lambda"
             y.children = (y.children[0], "e", y.children[1])
         print y.greedy_linearize()
         print y
         f.write("{}\n{}\n\n".format(y.greedy_linearize(), str(y)))
 tt.tock("generated {} samples".format(k))
예제 #13
0
    def trainloop(self, trainf, validf=None, _skiptrain=False):
        self.tt.tick("training")
        err = []
        verr = []
        stop = self.maxiter == 0
        self.currentiter = 1
        evalinter = self._validinter
        evalcount = evalinter
        tt = TT("iter")
        prevverre = [float("inf")] * len(self.validators)

        writeresf = None
        if self._writeresultspath is not None:
            writeresf = open(self._writeresultspath, "w", 1)

        while not stop:  # loop over epochs
            tt.tick("%d/%d" % (self.currentiter, int(self.maxiter)))
            if _skiptrain:
                tt.msg("skipping training")
                erre = [0.]
            else:
                erre = trainf()
            if self.currentiter == self.maxiter:
                stop = True
            self.currentiter += 1
            err.append(erre)
            #print "done training"
            verre = prevverre
            restowrite = ""
            if self._autosave:
                self.save()
            if validf is not None and self.currentiter % evalinter == 0:  # validate and print
                verre = validf()
                prevverre = verre
                verr.append(verre)
                ttmsg = "training error: %s \t validation error: %s" \
                       % ("%.4f" % erre[0],
                          " - ".join(map(lambda x: "%.4f" % x, verre)))
                restowrite = "\t".join(map(str, erre[0:1] + verre))
            else:
                ttmsg = "training error: %s" % " - ".join(
                    map(lambda x: "%.4f" % x, erre))
                restowrite = str(erre[0])
            if writeresf is not None:
                writeresf.write("{}\t{}\n".format(self.currentiter - 1,
                                                  restowrite))
            # retaining the best
            if self.besttaker is not None:
                modelscore = self.besttaker(
                    ([erre] + verre + [self.currentiter]))
                smallerbettermult = 1 if self.smallerbetter else -1
                if smallerbettermult * modelscore < smallerbettermult * self.bestmodel[
                        1]:
                    if self.savebest:
                        self.save(suffix=".best")
                        self.bestmodel = (None, modelscore)
                    else:
                        #tt.tock("freezing best with score %.3f (prev: %.3f)" % (modelscore, self.bestmodel[1]), prefix="-").tick()
                        self.bestmodel = (self.save(freeze=True,
                                                    filepath=False),
                                          modelscore)
            tt.tock(ttmsg + "\t", prefix="-")
            self._update_lr(self.currentiter, self.maxiter, err, verr)
            evalcount += 1
            #embed()
        if writeresf is not None:
            writeresf.close()
        self.tt.tock("trained").tick()
        return err, verr
예제 #14
0
 def __init__(self, maintrainer, *othertrainers):
     self.spts = [maintrainer] + list(othertrainers)
     self.tt = TT("InterleavedTrainer")
     self.currentiter = 0
     self._validinter = self.spts[0].original._validinter