def onDecodingFinish(data, output): if op == "gen": for src_id, (inp, out) in sorted(output.items(), key=lambda x:x[0]): print(TRG.str_rpr(out)) elif op == "cppl": UF.trace("Corpus PPL:", PPL(output)) print(PPL(output))
def train(self, x_data, y_data, learn=True, *args, **kwargs): accum_loss, output = self._train(x_data, y_data, not learn, *args, **kwargs) if learn: if not math.isnan(float(accum_loss.data)): self._model.zerograds() accum_loss.backward() self._opt.update() else: UF.trace("Warning: LOSS is nan, ignoring!") return accum_loss.data, output
def main(): args = parse_args() init_program_state(args) # Variable batch_size = args.batch epoch_total = args.epoch # data UF.trace("Loading corpus + dictionary") train, label, X, Y = load_data() # Setup model UF.trace("Setting up classifier") model = L.Classifier(load_model(args, X, Y)) if not args.use_cpu: model = model.to_gpu() opt = optimizers.SGD() opt.setup(model) # Begin training UF.trace("Begin training MLP") for ep in range(epoch_total): UF.trace("Epoch %d" % (ep+1)) accum_loss = 0 for i in range(0, len(train), batch_size): x_data = Variable(train[i:i+batch_size]) y_data = Variable(label[i:i+batch_size]) model.zerograds() loss = model(x_data, y_data) accum_loss += loss loss.backward() opt.update() print("Loss:", loss.data, file=sys.stderr) print("Accuracy:", model.accuracy.data, file=sys.stderr) # # Classifying training data # for i in range(0, len(train), batch_size): # x_data = Variable(train[i:i+batch_size]) # y_data = model.predictor(x_data) # UF.print_classification(y_data.data, Y) UF.trace("Saving model....") with ModelFile(open(args.model_out, "w")) as model_out: model.predictor.save(model_out)
def main(): args = parse_args() init_program_state(args) # Variable batch_size = args.batch # Setup model UF.trace("Setting up classifier") model = load_model(args) if not args.use_cpu: model = model.to_gpu() # data UF.trace("Loading corpus + dictionary") test = load_data(sys.stdin, model._feat, model._input) for i in range(0, len(test), batch_size): x_data = Variable(test[i:i+batch_size]) y_data = model(x_data) UF.print_classification(y_data.data, model._trg)
def onDecodingStart(): if op == "gen": UF.trace("Sentence generation started.") elif op == "cppl": UF.trace("Corpus PPL calculation started.") elif op == "sppl": UF.trace("Sentence PPL calculation started.")
sys.exit(1) # Loading data x_all, y_all = load_data(sys.argv[1]) x_train, x_test = np.split(x_all, [60000]) y_train, y_test = np.split(y_all, [60000]) train_size = len(x_train) test_size = len(x_test) # Init model model = L.Classifier(MLP()) optimizer = optimizers.SGD() optimizer.setup(model) # Training begins here UF.trace("Begin training") for epoch in range(EPOCH): UF.trace("Epoch %d" % (epoch+1)) indexes = np.random.permutation(train_size) for i in range(0, train_size, BATCH): x = Variable(x_train[indexes[i:i+BATCH]]) t = Variable(y_train[indexes[i:i+BATCH]]) model.zerograds() loss = model(x,t) loss.backward() optimizer.update() # Testing begins here UF.trace("Begin Testing") sum_loss, sum_accuracy = 0, 0 for i in range(0, test_size, BATCH):
parser.add_argument("--operation", choices=["sppl", "cppl", "gen"], help="sppl: Sentence-wise ppl\ncppl: Corpus-wise ppl\ngen: Read input, start generating random words.", default="sppl") parser.add_argument("--use_cpu", action="store_true") parser.add_argument("--gpu", type=int, default=-1, help="Which GPU to use (Negative for cpu).") parser.add_argument("--verbose", action="store_true") parser.add_argument("--gen_limit", type=positive, default=50) parser.add_argument("--eos_disc", type=float, default=0.0, help="Give fraction positive discount to output longer sentence.") args = parser.parse_args() op = args.operation if op == "sppl" and args.batch != 1: raise ValueError("Currently sentence based perplexity not supports multi batching.") if args.use_cpu: args.gpu = -1 # Loading model UF.trace("Setting up classifier") model = LanguageModel(args, use_gpu=args.gpu, collect_output=True) VOC, _ = model.get_vocabularies() decoding_options = {"gen_limit": args.gen_limit, "eos_disc": args.eos_disc} # Testing callbacks def PPL(loss): try: return math.exp(loss.data) except: return math.exp(loss) def onDecodingStart(): if op == "gen": UF.trace("Sentence generation started.") elif op == "cppl":
def onDecodingStart(): UF.trace("Decoding started.")