def main(): args = get_arguments() print("initializing device ... ", end="", file=sys.stderr, flush=True) dev = D.Naive() if args.gpu < 0 else D.CUDA(args.gpu) Device.set_default(dev) print("done.", file=sys.stderr) mode = args.mode prefix = args.model if mode == "train": encdec = EncoderDecoder(args.dropout) encdec.init(args.src_vocab, args.trg_vocab, args.embed, args.hidden) optimizer = O.Adam() optimizer.set_weight_decay(1e-6) optimizer.set_gradient_clipping(5) train(encdec, optimizer, args, 1e10) elif mode == "resume": print("loading model/optimizer ... ", end="", file=sys.stderr, flush=True) encdec = EncoderDecoder(args.dropout) encdec.load(prefix + ".model") optimizer = O.Adam() optimizer.load(prefix + ".optimizer") valid_ppl = load_ppl(prefix + ".valid_ppl") print("done.", file=sys.stderr) train(encdec, optimizer, args, valid_ppl) else: print("loading model ... ", end="", file=sys.stderr, flush=True) encdec = EncoderDecoder(args.dropout) encdec.load(prefix + ".model") print("done.", file=sys.stderr) test(encdec, args)
def test_pyoptimizer_compare_with_cpp(self): c_optimizer = O.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8) py_params = train_func(self.t) c_params = train_func(c_optimizer) py_uint_configs, py_float_configs = Optimizer.get_configs(self.t) c_uint_configs, c_float_configs = c_optimizer.get_configs() self.assertEqual(py_uint_configs["Optimizer.epoch"], c_uint_configs["Optimizer.epoch"]) self.assertEqual(py_float_configs["TestAdam.alpha"], c_float_configs["Adam.alpha"]) self.assertEqual(py_float_configs["TestAdam.beta1"], c_float_configs["Adam.beta1"]) self.assertEqual(py_float_configs["TestAdam.beta2"], c_float_configs["Adam.beta2"]) self.assertEqual(py_float_configs["TestAdam.eps"], c_float_configs["Adam.eps"]) self.assertEqual(py_float_configs["Optimizer.clip_threshold"], c_float_configs["Optimizer.clip_threshold"]) self.assertEqual(py_float_configs["Optimizer.l2_strength"], c_float_configs["Optimizer.l2_strength"]) self.assertEqual(py_float_configs["Optimizer.lr_scale"], c_float_configs["Optimizer.lr_scale"]) self.assertTrue(np.isclose(py_params[0], c_params[0]).all()) self.assertTrue(np.isclose(py_params[1], c_params[1]).all()) self.assertTrue(np.isclose(py_params[2], c_params[2]).all()) self.assertTrue(np.isclose(py_params[3], c_params[3]).all())
def main(): parser = ArgumentParser() parser.add_argument("mode", help="(train|resume|test)") parser.add_argument("model_prefix", help="prefix of the model files.") args = parser.parse_args() mode = args.mode prefix = args.model_prefix print("mode:", mode, file=sys.stderr) print("prefix:", prefix, file=sys.stderr) if mode not in ("train", "resume", "test"): print("unknown mode:", mode, file=sys.stderr) return print("initializing device ... ", end="", file=sys.stderr) sys.stderr.flush() dev = D.CUDA(0) Device.set_default(dev) print("done.", file=sys.stderr) if mode == "train": encdec = AttentionalEncoderDecoder() encdec.init(SRC_VOCAB_SIZE, TRG_VOCAB_SIZE, NUM_EMBED_UNITS, NUM_HIDDEN_UNITS) optimizer = O.Adam() optimizer.set_weight_decay(1e-6) optimizer.set_gradient_clipping(5) train(encdec, optimizer, prefix, 1e10) elif mode == "resume": print("loading model/optimizer ... ", end="", file=sys.stderr) sys.stderr.flush() encdec = AttentionalEncoderDecoder() encdec.load(prefix + ".model") optimizer = O.Adam() optimizer.load(prefix + ".optimizer") valid_ppl = load_ppl(prefix + ".valid_ppl") print("done.", file=sys.stderr) train(encdec, optimizer, prefix, valid_ppl) else: print("loading model ... ", end="", file=sys.stderr) sys.stderr.flush() encdec = AttentionalEncoderDecoder() encdec.load(prefix + ".model") print("done.", file=sys.stderr) test(encdec)
def main(config): mode = config['mode'] if mode == 'preproc': preproc(config) return print('initializing device ...', end='', file=sys.stderr, flush=True) dev = D.Naive() if config['gpu'] < 0 else D.CUDA(config['gpu']) Device.set_default(dev) print("done.", file=sys.stderr, flush=True) prefix = config['model_prefix'] if mode == 'train': model = Transformer(config['n_heads'], config['n_stacks'], config['dropout'], config['generation_limit']) model.init(config['vocabulary_size'], config['d_model'], config['d_ff']) optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9) optimizer.set_gradient_clipping(5) train(model, optimizer, config, 1e10) elif mode == 'resume': print('loading model/optimizer ... ', end='', file=sys.stderr, flush=True) model = Transformer(config['n_heads'], config['n_stacks'], config['dropout'], config['generation_limit']) model.load(prefix + '.model') optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9) optimizer.set_gradient_clipping(5) optimizer.load(prefix + '.optimizer') with Path(prefix).with_suffix('.valid').open() as f: valid_ppl = float(f.read().strip()) print('done.', file=sys.stderr, flush=True) train(model, optimizer, config, valid_ppl) elif mode == 'test': model = Transformer(config['n_heads'], config['n_stacks'], config['dropout'], config['generation_limit']) model.load(prefix + '.model') test(model, config)
def test_optimizer_add(self): model = TestModel() p = Parameter([5], I.Constant(0)) p.gradient = tF.raw_input([5], [1, 2, 3, 4, 5]) optimizer = O.Adam() optimizer.set_weight_decay(1e-6) optimizer.set_gradient_clipping(5) optimizer.add(model) optimizer.add(p) self.assertEqual(p.gradient.to_list(), [1, 2, 3, 4, 5]) self.assertEqual(model.param.gradient.to_list(), [1, 2, 3, 4, 5]) optimizer.reset_gradients() self.assertEqual(p.gradient.to_list(), [0, 0, 0, 0, 0]) self.assertEqual(model.param.gradient.to_list(), [0, 0, 0, 0, 0])
def test_adam_virtual(self): t = O.Adam() uint_configs = {'Optimizer.epoch': 1} float_configs = { 'Optimizer.lr_scale': 1.0, 'Adam.beta2': 1.0, 'Adam.eps': 0.0, 'Optimizer.clip_threshold': 0.0, 'Adam.alpha': 0.0, 'Optimizer.l2_strength': 0.0, 'Adam.beta1': 1.0, } t.set_configs(uint_configs, float_configs) uint_configs, float_configs = t.get_configs() self.assertEqual(uint_configs['Optimizer.epoch'], 1)
def main(): # Loads vocab. vocab = make_vocab("data/ptb.train.txt") print("#vocab:", len(vocab)) # maybe 10000 eos_id = vocab["<s>"] # Loads all corpus. train_corpus = load_corpus("data/ptb.train.txt", vocab) valid_corpus = load_corpus("data/ptb.valid.txt", vocab) num_train_sents = len(train_corpus) num_valid_sents = len(valid_corpus) num_train_labels = count_labels(train_corpus) num_valid_labels = count_labels(valid_corpus) print("train:", num_train_sents, "sentences,", num_train_labels, "labels") print("valid:", num_valid_sents, "sentences,", num_valid_labels, "labels") # Device and computation graph. dev = D.CUDA(0) Device.set_default(dev) g = Graph() Graph.set_default(g) # Our LM. lm = RNNLM(len(vocab), eos_id) # Optimizer. optimizer = O.Adam() optimizer.set_weight_decay(1e-6) optimizer.set_gradient_clipping(5) optimizer.add_model(lm) # Sentence IDs. train_ids = list(range(num_train_sents)) valid_ids = list(range(num_valid_sents)) # Train/valid loop. for epoch in range(MAX_EPOCH): print("epoch", (epoch + 1), "/", MAX_EPOCH, ":") # Shuffles train sentence IDs. random.shuffle(train_ids) # Training. train_loss = 0 for ofs in range(0, num_train_sents, BATCH_SIZE): batch_ids = train_ids[ofs:min(ofs + BATCH_SIZE, num_train_sents)] batch = make_batch(train_corpus, batch_ids, eos_id) g.clear() outputs = lm.forward(batch) loss = lm.forward_loss(outputs, batch) train_loss += loss.to_float() * len(batch_ids) optimizer.reset_gradients() loss.backward() optimizer.update() print("%d" % ofs, end="\r") sys.stdout.flush() train_ppl = math.exp(train_loss / num_train_labels) print(" train ppl =", train_ppl) # Validation. valid_loss = 0 for ofs in range(0, num_valid_sents, BATCH_SIZE): batch_ids = valid_ids[ofs:min(ofs + BATCH_SIZE, num_valid_sents)] batch = make_batch(valid_corpus, batch_ids, eos_id) g.clear() outputs = lm.forward(batch) loss = lm.forward_loss(outputs, batch) valid_loss += loss.to_float() * len(batch_ids) print("%d" % ofs, end="\r") sys.stdout.flush() valid_ppl = math.exp(valid_loss / num_valid_labels) print(" valid ppl =", valid_ppl)