def __init__(self, model, gold): self.model = model self.goldvar = gold self.validsetmode = False self.average_err = True # TODO: do we still need this? self._autosave = False self._autosavepath = None self._autosaveblock = None # training settings self.learning_rate = None self.dynamic_lr = None self.objective = None self.regularizer = None self.optimizer = None self.traindata = None self.traingold = None self.gradconstraints = [] # validation settings self._validinter = 1 self.trainstrategy = self._train_full self.validsplits = 0 self.validrandom = False self.validata = None self.validgold = None self.validation = None self.validators = [] self.external_validators = [] self.tt = TT("FluentTrainer") # taking best self.besttaker = None self.bestmodel = None self.savebest = None self.smallerbetter = True # writing self._writeresultspath = None
def batchloop(): c = 0 prevperc = -1. terr = [0.0] numdigs = 2 tt = TT("iter progress", verbose=verbose) tt.tick() while datafeeder.hasnextbatch(): perc = round(c * 100. * (10**numdigs) / datafeeder._numbats) / (10**numdigs) if perc > prevperc: s = ("%." + str(numdigs) + "f%% \t error: %.3f") % (perc, terr[0]) tt.live(s) prevperc = perc sampleinps = datafeeder.nextbatch() sampleinps = sampletransf(*sampleinps) try: eterr = trainf(*sampleinps) if len(terr) != len(eterr) and terr.count(0.0) == len( terr): terr = [0.0] * len(eterr) except Exception, e: raise e if self.average_err is True: terr = [ xterr * (1.0 * (c) / (c + 1)) + xeterr * (1.0 / (c + 1)) for xterr, xeterr in zip(terr, eterr) ] else: terr = [ xterr + xeterr for xterr, xeterr in zip(terr, eterr) ] c += 1
def trainloop(self, tf, vf): self.tt.tick("training") stop = self.maxiter == 0 self.currentiter = 1 evalinter = self._validinter evalcount = evalinter tt = TT("iter") err = [] verr = [] prevverre = [[float("inf")] * len(subt.original.validators) for subt in self.spts] while not stop: tt.tick("%d/%d" % (self.currentiter, int(self.maxiter))) erre = tf() if self.currentiter == self.maxiter: stop = True self.currentiter += 1 err.append(erre) # print "done training" verre = prevverre if self.currentiter % evalinter == 0: # validate and print verre = vf() prevverre = verre verr.append(verre) #embed() # TODO # retaining the best of main trainer if self.spts[0].original.besttaker is not None: modelscore = self.spts[0].original.besttaker( ([erre[0]] + verre[0] + [self.currentiter])) if modelscore < self.spts[0].original.bestmodel[1]: # tt.tock("freezing best with score %.3f (prev: %.3f)" % (modelscore, self.bestmodel[1]), prefix="-").tick() self.spts[0].original.bestmodel = ( self.spts[0].original.model.freeze(), modelscore) ttlines = [] for i in range(len(erre)): if verre[i] is not None: ttlines.append("\t%s:\ttraining error: %s \t validation error: %s" \ % (i+1, "%.4f" % erre[i][0], " - ".join(map(lambda x: "%.4f" % x, verre[i])))) else: ttlines.append("\t%s:\ttraining error: %s" % (i + 1, " - ".join( map(lambda x: "%.4f" % x, erre[i])))) tt.tock("\n".join(ttlines) + "\n", prefix="-") for i, subt in enumerate(self.spts): subt.original._update_lr(self.currentiter, self.maxiter, [errx[i] for errx in err], [verrx[i] for verrx in verr]) evalcount += 1 # embed() for subt in self.spts: if subt.original._autosave: subt.original.save() self.tt.tock("trained").tick() return err, verr
def __init__(self, dim, vocabsize=None, trainfrac=0.0): # if dim is None, import all self.D = OrderedDict() self.tt = TT(self.__class__.__name__) self.dim = dim self.indim = vocabsize self.W = [np.zeros((1, self.dim))] self._block = None self.trainfrac = trainfrac
def test_ticktock_duration_string(self): tt = TT() testdata = [(1, "1.000 second"), (0.5689, "0.569 second"), (0.9999, "1.000 second"), (59, "59.000 seconds"), (59.00001, "59.000 seconds"), (59.0005, "59.001 seconds"), (60, "1 minute"), (60.005, "1 minute"), (61, "1 minute, 1 second"), (62, "1 minute, 2 seconds"), (121, "2 minutes, 1 second"), (120, "2 minutes"), (3656, "1 hour, 56 seconds"), (2 * 3600, "2 hours"), (24 * 3600 + 125, "1 day, 2 minutes, 5 seconds"), (25 * 3600 + 126, "1 day, 1 hour, 2 minutes, 6 seconds"), (50 * 3600, "2 days, 2 hours")] for seconds, text in testdata: self.assertEqual(text, tt._getdurationstr(seconds))
def batchloop(): c = 0 prevperc = -1. terrs = [[0.0] if tf is not None else None for tf in trainfs] numdigs = 2 tt = TT("iter progress", verbose=verbose) tt.tick() for dataf in datafeeders: if dataf is not None: dataf.reset() while datafeeders[0].hasnextbatch(): perc = round(c * 100. * (10**numdigs) / datafeeders[0].getnumbats()) / (10**numdigs) if perc > prevperc: s = ("%." + str(numdigs) + "f%% \t error: %s") \ % (perc, " - ".join(map(lambda x: "%.3f" % x[0], terrs))) tt.live(s) prevperc = perc for df in datafeeders: if not df.hasnextbatch(): df.reset() sampleinps = [df.nextbatch() for df in datafeeders] # embed() sampleinps = [ stf(*si, phase=phase) for (stf, si) in zip(sampletransfs, sampleinps) ] try: eterrs = [tf(*si) for (tf, si) in zip(trainfs, sampleinps)] for i in range(len(terrs)): if len(terrs[i]) != len( eterrs[i]) and terrs[i].count(0.0) == len( terrs[i]): terrs[i] = [0.0] * len(eterrs[i]) except Exception, e: raise e for i, subt in enumerate(this.spts): if subt.original.average_err is True: terrs[i] = [ xterr * (1.0 * (c) / (c + 1)) + xeterr * (1.0 / (c + 1)) for xterr, xeterr in zip(terrs[i], eterrs[i]) ] else: terrs[i] = [ xterr + xeterr for xterr, xeterr in zip(terrs[i], eterrs[i]) ] c += 1
def loadvalue(self, path, dim, indim=None): tt = TT(self.__class__.__name__) tt.tick() W = [np.zeros((1, dim))] D = OrderedDict() i = 1 for line in open(path): if indim is not None and i >= (indim + 1): break ls = line.split(" ") word = ls[0] D[word] = i W.append(np.asarray([map(lambda x: float(x), ls[1:])])) i += 1 W = np.concatenate(W, axis=0) tt.tock("loaded") return W, D
def trainloop(self, trainf, validf=None): self.tt.tick("training") err = [] verr = [] stop = self.maxiter == 0 self.currentiter = 1 evalinter = self._validinter evalcount = evalinter tt = TT("iter") prevverre = [float("inf")] * len(self.validators) while not stop: tt.tick("%d/%d" % (self.currentiter, int(self.maxiter))) erre = trainf() if self.currentiter == self.maxiter: stop = True self.currentiter += 1 err.append(erre) print "done training" verre = prevverre if validf is not None and self.currentiter % evalinter == 0: # validate and print verre = validf() prevverre = verre verr.append(verre) tt.msg("training error: %s \t validation error: %s" % ("%.4f" % erre[0], " - ".join( map(lambda x: "%.4f" % x, verre))), prefix="-") else: tt.msg("training error: %s" % " - ".join(map(lambda x: "%.4f" % x, erre)), prefix="-") # retaining the best if self.besttaker is not None: modelscore = self.besttaker( ([erre] + verre + [self.currentiter])) if modelscore < self.bestmodel[1]: #tt.tock("freezing best with score %.3f (prev: %.3f)" % (modelscore, self.bestmodel[1]), prefix="-").tick() self.bestmodel = (self.model.freeze(), modelscore) tt.tock("done", prefix="-") self._update_lr(self.currentiter, self.maxiter, err, verr) evalcount += 1 #embed() if self._autosave: self.save(self.model) self.tt.tock("trained").tick() return err, verr
def batchloop(): c = 0 numex = 0 prevperc = -1. terr = [0.0] numdigs = 2 tt = TT("iter progress", verbose=verbose) tt.tick() while datafeeder.hasnextbatch(): perc = round(c * 100. * (10**numdigs) / datafeeder._numbats) / (10**numdigs) if perc > prevperc: terr0 = terr[0] * 1.0 / numex if numex > 0 else 0.0 s = ("%." + str(numdigs) + "f%% \t error: %.3f") % (perc, terr0) tt.live(s) prevperc = perc sampleinps, batsize = datafeeder.nextbatch(withbatchsize=True) numex += batsize sampleinps = sampletransf(*sampleinps) eterr = trainf(*sampleinps) if len(terr) != len(eterr) and terr.count(0.0) == len(terr): terr = [0.0] * len( eterr ) # ensure compatible size of terr (number of output scores) if self.average_err is True: terr = [ xterr + xeterr * batsize for xterr, xeterr in zip(terr, eterr) ] else: terr = [ xterr + xeterr for xterr, xeterr in zip(terr, eterr) ] c += 1 tt.stoplive() if self.average_err is True: terr = [xterr * 1.0 / numex for xterr in terr] return terr
def batchloop(): c = 0 numex = 0 prevperc = -1. terr = [0.0] numdigs = 2 tt = TT("iter progress", verbose=verbose) tt.tick() datafeeder.reset() while datafeeder.hasnextbatch(): perc = round(c * 100. * (10**numdigs) / datafeeder.getnumbats()) / (10**numdigs) if perc > prevperc: terr0 = terr[0] * 1.0 / numex if numex > 0 else 0.0 s = ("%." + str(numdigs) + "f%% \t error: %.3f") % (perc, terr0) tt.live(s) prevperc = perc sampleinps, batsize = datafeeder.nextbatch(withbatchsize=True) numex += batsize #embed() sampleinps = sampletransf(*sampleinps, phase=phase) try: eterr = trainf(*sampleinps) if len(terr) != len(eterr) and terr.count(0.0) == len( terr): terr = [0.0] * len(eterr) except Exception, e: raise e if self.average_err is True: terr = [ xterr + xeterr * batsize for xterr, xeterr in zip(terr, eterr) ] else: terr = [ xterr + xeterr for xterr, xeterr in zip(terr, eterr) ] c += 1
def loaddata(p, top=np.infty): tt = TT("Dataloader") traindata = None golddata = None i = 0 tt.tick("loading") with open(p) as f: numsam = 1 for line in f: if traindata is None and golddata is None: # first line numsam, numcol = map(int, line[:-1].split(" ")) traindata = np.zeros((min(numsam, top), numcol - 1)).astype("float32") golddata = np.zeros((min(numsam, top), )).astype("int32") else: ns = line[:-1].split("\t") traindata[i, :] = map(float, ns[:-1]) golddata[i] = int(ns[-1]) i += 1 tt.progress(i, numsam, live=True) if top is not None and i >= top: break tt.tock("loaded") return traindata, golddata
rellits = rellits.union(trellits) types = types.union(ttypes) for ent in ents: g.add_rule_str("SENT", ent) for rel in rels: g.add_rule_str("REL", rel) for rellit in rellits: g.add_rule_str("RELLIT", rellit) for type in types: g.add_rule_str("TYPE", type) g.parse_info(typeinfo) #print g gen = Generator(g) #print "\n" from teafacto.util import ticktock as TT tt = TT() tt.tick() k = 50000 outp = "../../data/semparse/geoquery.lbd.abstr.autogen" with open(outp, "w") as f: parser = LambdaParser() for i in range(k): x = " ".join(map(lambda x: x.name, gen.generate())) y = parser.parse(x) if y.name == "the": y.name = "lambda" y.children = (y.children[0], "e", y.children[1]) print y.greedy_linearize() print y f.write("{}\n{}\n\n".format(y.greedy_linearize(), str(y))) tt.tock("generated {} samples".format(k))
def trainloop(self, trainf, validf=None, _skiptrain=False): self.tt.tick("training") err = [] verr = [] stop = self.maxiter == 0 self.currentiter = 1 evalinter = self._validinter evalcount = evalinter tt = TT("iter") prevverre = [float("inf")] * len(self.validators) writeresf = None if self._writeresultspath is not None: writeresf = open(self._writeresultspath, "w", 1) while not stop: # loop over epochs tt.tick("%d/%d" % (self.currentiter, int(self.maxiter))) if _skiptrain: tt.msg("skipping training") erre = [0.] else: erre = trainf() if self.currentiter == self.maxiter: stop = True self.currentiter += 1 err.append(erre) #print "done training" verre = prevverre restowrite = "" if self._autosave: self.save() if validf is not None and self.currentiter % evalinter == 0: # validate and print verre = validf() prevverre = verre verr.append(verre) ttmsg = "training error: %s \t validation error: %s" \ % ("%.4f" % erre[0], " - ".join(map(lambda x: "%.4f" % x, verre))) restowrite = "\t".join(map(str, erre[0:1] + verre)) else: ttmsg = "training error: %s" % " - ".join( map(lambda x: "%.4f" % x, erre)) restowrite = str(erre[0]) if writeresf is not None: writeresf.write("{}\t{}\n".format(self.currentiter - 1, restowrite)) # retaining the best if self.besttaker is not None: modelscore = self.besttaker( ([erre] + verre + [self.currentiter])) smallerbettermult = 1 if self.smallerbetter else -1 if smallerbettermult * modelscore < smallerbettermult * self.bestmodel[ 1]: if self.savebest: self.save(suffix=".best") self.bestmodel = (None, modelscore) else: #tt.tock("freezing best with score %.3f (prev: %.3f)" % (modelscore, self.bestmodel[1]), prefix="-").tick() self.bestmodel = (self.save(freeze=True, filepath=False), modelscore) tt.tock(ttmsg + "\t", prefix="-") self._update_lr(self.currentiter, self.maxiter, err, verr) evalcount += 1 #embed() if writeresf is not None: writeresf.close() self.tt.tock("trained").tick() return err, verr
def __init__(self, maintrainer, *othertrainers): self.spts = [maintrainer] + list(othertrainers) self.tt = TT("InterleavedTrainer") self.currentiter = 0 self._validinter = self.spts[0].original._validinter