def corrupt_example(self, e): """ Return a corrupted version of example e, plus the weight of this example. """ from hyperparameters import HYPERPARAMETERS import random import copy e = copy.copy(e) last = e[-1] cnt = 0 while e[-1] == last: if HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"] == 0: e[-1] = random.randint(0, self.parameters.vocab_size-1) pr = 1./self.parameters.vocab_size elif HYPERPARAMETERS["NGRAM_FOR_TRAINING_NOISE"] == 1: import noise from common.myrandom import weighted_sample e[-1], pr = weighted_sample(noise.indexed_weights()) # from vocabulary import wordmap # print wordmap.str(e[-1]), pr else: assert 0 cnt += 1 # Backoff to 0gram smoothing if we fail 10 times to get noise. if cnt > 10: e[-1] = random.randint(0, self.parameters.vocab_size-1) weight = 1./pr return e, weight
logging.info(stats()) # print "FINAL VALIDATION AT TRAINING STEP %d: mean(logrank) = %.2f, stddev(logrank) = %.2f, cnt = %d" % (cnt, numpy.mean(numpy.array(logranks)), numpy.std(numpy.array(logranks)), i+1) # print stats() if __name__ == "__main__": import common.hyperparameters, common.options HYPERPARAMETERS = common.hyperparameters.read("language-model") HYPERPARAMETERS, options, args, newkeystr = common.options.reparse(HYPERPARAMETERS) import hyperparameters from common import myyaml import sys print >> sys.stderr, myyaml.dump(common.dump.vars_seq([hyperparameters, miscglobals])) import noise indexed_weights = noise.indexed_weights() rundir = common.dump.create_canonical_directory(HYPERPARAMETERS) import os.path, os logfile = os.path.join(rundir, "log") if newkeystr != "": verboselogfile = os.path.join(rundir, "log%s" % newkeystr) print >> sys.stderr, "Logging to %s, and creating link %s" % (logfile, verboselogfile) os.system("ln -s log %s " % (verboselogfile)) else: print >> sys.stderr, "Logging to %s, not creating any link because of default settings" % logfile import random, numpy random.seed(miscglobals.RANDOMSEED) numpy.random.seed(miscglobals.RANDOMSEED)
logging.info(stats()) # print "FINAL VALIDATION AT TRAINING STEP %d: mean(logrank) = %.2f, stddev(logrank) = %.2f, cnt = %d" % (cnt, numpy.mean(numpy.array(logranks)), numpy.std(numpy.array(logranks)), i+1) # print stats() if __name__ == "__main__": import common.hyperparameters, common.options HYPERPARAMETERS = common.hyperparameters.read("language-model") HYPERPARAMETERS, options, args, newkeystr = common.options.reparse(HYPERPARAMETERS) import hyperparameters from common import myyaml import sys print >> sys.stderr, myyaml.dump(common.dump.vars_seq([hyperparameters, miscglobals])) import noise indexed_weights = noise.indexed_weights() from rundir import rundir rundir = rundir() import os.path, os logfile = os.path.join(rundir, "log") if newkeystr != "": verboselogfile = os.path.join(rundir, "log%s" % newkeystr) print >> sys.stderr, "Logging to %s, and creating link %s" % (logfile, verboselogfile) os.system("ln -s log %s " % (verboselogfile)) else: print >> sys.stderr, "Logging to %s, not creating any link because of default settings" % logfile import random, numpy random.seed(miscglobals.RANDOMSEED)