Exemple #1
0
def main():
    args = get_arguments()

    print("initializing device ... ", end="", file=sys.stderr, flush=True)
    dev = D.Naive() if args.gpu < 0 else D.CUDA(args.gpu)
    Device.set_default(dev)
    print("done.", file=sys.stderr)

    mode = args.mode
    prefix = args.model
    if mode == "train":
        encdec = EncoderDecoder(args.dropout)
        encdec.init(args.src_vocab, args.trg_vocab, args.embed, args.hidden)
        optimizer = O.Adam()
        optimizer.set_weight_decay(1e-6)
        optimizer.set_gradient_clipping(5)
        train(encdec, optimizer, args, 1e10)
    elif mode == "resume":
        print("loading model/optimizer ... ",
              end="",
              file=sys.stderr,
              flush=True)
        encdec = EncoderDecoder(args.dropout)
        encdec.load(prefix + ".model")
        optimizer = O.Adam()
        optimizer.load(prefix + ".optimizer")
        valid_ppl = load_ppl(prefix + ".valid_ppl")
        print("done.", file=sys.stderr)
        train(encdec, optimizer, args, valid_ppl)
    else:
        print("loading model ... ", end="", file=sys.stderr, flush=True)
        encdec = EncoderDecoder(args.dropout)
        encdec.load(prefix + ".model")
        print("done.", file=sys.stderr)
        test(encdec, args)
 def test_pyoptimizer_compare_with_cpp(self):
     c_optimizer = O.Adam(alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8)
     py_params = train_func(self.t)
     c_params = train_func(c_optimizer)
     py_uint_configs, py_float_configs = Optimizer.get_configs(self.t)
     c_uint_configs, c_float_configs = c_optimizer.get_configs()
     self.assertEqual(py_uint_configs["Optimizer.epoch"],
                      c_uint_configs["Optimizer.epoch"])
     self.assertEqual(py_float_configs["TestAdam.alpha"],
                      c_float_configs["Adam.alpha"])
     self.assertEqual(py_float_configs["TestAdam.beta1"],
                      c_float_configs["Adam.beta1"])
     self.assertEqual(py_float_configs["TestAdam.beta2"],
                      c_float_configs["Adam.beta2"])
     self.assertEqual(py_float_configs["TestAdam.eps"],
                      c_float_configs["Adam.eps"])
     self.assertEqual(py_float_configs["Optimizer.clip_threshold"],
                      c_float_configs["Optimizer.clip_threshold"])
     self.assertEqual(py_float_configs["Optimizer.l2_strength"],
                      c_float_configs["Optimizer.l2_strength"])
     self.assertEqual(py_float_configs["Optimizer.lr_scale"],
                      c_float_configs["Optimizer.lr_scale"])
     self.assertTrue(np.isclose(py_params[0], c_params[0]).all())
     self.assertTrue(np.isclose(py_params[1], c_params[1]).all())
     self.assertTrue(np.isclose(py_params[2], c_params[2]).all())
     self.assertTrue(np.isclose(py_params[3], c_params[3]).all())
Exemple #3
0
def main():
    parser = ArgumentParser()
    parser.add_argument("mode", help="(train|resume|test)")
    parser.add_argument("model_prefix", help="prefix of the model files.")
    args = parser.parse_args()

    mode = args.mode
    prefix = args.model_prefix
    print("mode:", mode, file=sys.stderr)
    print("prefix:", prefix, file=sys.stderr)

    if mode not in ("train", "resume", "test"):
        print("unknown mode:", mode, file=sys.stderr)
        return

    print("initializing device ... ", end="", file=sys.stderr)
    sys.stderr.flush()
    dev = D.CUDA(0)
    Device.set_default(dev)
    print("done.", file=sys.stderr)

    if mode == "train":
        encdec = AttentionalEncoderDecoder()
        encdec.init(SRC_VOCAB_SIZE, TRG_VOCAB_SIZE, NUM_EMBED_UNITS,
                    NUM_HIDDEN_UNITS)
        optimizer = O.Adam()
        optimizer.set_weight_decay(1e-6)
        optimizer.set_gradient_clipping(5)
        train(encdec, optimizer, prefix, 1e10)
    elif mode == "resume":
        print("loading model/optimizer ... ", end="", file=sys.stderr)
        sys.stderr.flush()
        encdec = AttentionalEncoderDecoder()
        encdec.load(prefix + ".model")
        optimizer = O.Adam()
        optimizer.load(prefix + ".optimizer")
        valid_ppl = load_ppl(prefix + ".valid_ppl")
        print("done.", file=sys.stderr)
        train(encdec, optimizer, prefix, valid_ppl)
    else:
        print("loading model ... ", end="", file=sys.stderr)
        sys.stderr.flush()
        encdec = AttentionalEncoderDecoder()
        encdec.load(prefix + ".model")
        print("done.", file=sys.stderr)
        test(encdec)
def main(config):
    mode = config['mode']
    if mode == 'preproc':
        preproc(config)
        return

    print('initializing device ...', end='', file=sys.stderr, flush=True)
    dev = D.Naive() if config['gpu'] < 0 else D.CUDA(config['gpu'])
    Device.set_default(dev)
    print("done.", file=sys.stderr, flush=True)

    prefix = config['model_prefix']
    if mode == 'train':
        model = Transformer(config['n_heads'], config['n_stacks'],
                            config['dropout'], config['generation_limit'])
        model.init(config['vocabulary_size'], config['d_model'],
                   config['d_ff'])
        optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9)
        optimizer.set_gradient_clipping(5)
        train(model, optimizer, config, 1e10)
    elif mode == 'resume':
        print('loading model/optimizer ... ',
              end='',
              file=sys.stderr,
              flush=True)
        model = Transformer(config['n_heads'], config['n_stacks'],
                            config['dropout'], config['generation_limit'])
        model.load(prefix + '.model')
        optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9)
        optimizer.set_gradient_clipping(5)
        optimizer.load(prefix + '.optimizer')
        with Path(prefix).with_suffix('.valid').open() as f:
            valid_ppl = float(f.read().strip())
        print('done.', file=sys.stderr, flush=True)
        train(model, optimizer, config, valid_ppl)
    elif mode == 'test':
        model = Transformer(config['n_heads'], config['n_stacks'],
                            config['dropout'], config['generation_limit'])
        model.load(prefix + '.model')
        test(model, config)
Exemple #5
0
 def test_optimizer_add(self):
     model = TestModel()
     p = Parameter([5], I.Constant(0))
     p.gradient = tF.raw_input([5], [1, 2, 3, 4, 5])
     optimizer = O.Adam()
     optimizer.set_weight_decay(1e-6)
     optimizer.set_gradient_clipping(5)
     optimizer.add(model)
     optimizer.add(p)
     self.assertEqual(p.gradient.to_list(), [1, 2, 3, 4, 5])
     self.assertEqual(model.param.gradient.to_list(), [1, 2, 3, 4, 5])
     optimizer.reset_gradients()
     self.assertEqual(p.gradient.to_list(), [0, 0, 0, 0, 0])
     self.assertEqual(model.param.gradient.to_list(), [0, 0, 0, 0, 0])
 def test_adam_virtual(self):
     t = O.Adam()
     uint_configs = {'Optimizer.epoch': 1}
     float_configs = {
         'Optimizer.lr_scale': 1.0,
         'Adam.beta2': 1.0,
         'Adam.eps': 0.0,
         'Optimizer.clip_threshold': 0.0,
         'Adam.alpha': 0.0,
         'Optimizer.l2_strength': 0.0,
         'Adam.beta1': 1.0,
     }
     t.set_configs(uint_configs, float_configs)
     uint_configs, float_configs = t.get_configs()
     self.assertEqual(uint_configs['Optimizer.epoch'], 1)
Exemple #7
0
def main():
    # Loads vocab.
    vocab = make_vocab("data/ptb.train.txt")
    print("#vocab:", len(vocab))  # maybe 10000
    eos_id = vocab["<s>"]

    # Loads all corpus.
    train_corpus = load_corpus("data/ptb.train.txt", vocab)
    valid_corpus = load_corpus("data/ptb.valid.txt", vocab)
    num_train_sents = len(train_corpus)
    num_valid_sents = len(valid_corpus)
    num_train_labels = count_labels(train_corpus)
    num_valid_labels = count_labels(valid_corpus)
    print("train:", num_train_sents, "sentences,", num_train_labels, "labels")
    print("valid:", num_valid_sents, "sentences,", num_valid_labels, "labels")

    # Device and computation graph.
    dev = D.CUDA(0)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    # Our LM.
    lm = RNNLM(len(vocab), eos_id)

    # Optimizer.
    optimizer = O.Adam()
    optimizer.set_weight_decay(1e-6)
    optimizer.set_gradient_clipping(5)
    optimizer.add_model(lm)

    # Sentence IDs.
    train_ids = list(range(num_train_sents))
    valid_ids = list(range(num_valid_sents))

    # Train/valid loop.
    for epoch in range(MAX_EPOCH):
        print("epoch", (epoch + 1), "/", MAX_EPOCH, ":")
        # Shuffles train sentence IDs.
        random.shuffle(train_ids)

        # Training.
        train_loss = 0
        for ofs in range(0, num_train_sents, BATCH_SIZE):
            batch_ids = train_ids[ofs:min(ofs + BATCH_SIZE, num_train_sents)]
            batch = make_batch(train_corpus, batch_ids, eos_id)

            g.clear()

            outputs = lm.forward(batch)
            loss = lm.forward_loss(outputs, batch)
            train_loss += loss.to_float() * len(batch_ids)

            optimizer.reset_gradients()
            loss.backward()
            optimizer.update()

            print("%d" % ofs, end="\r")
            sys.stdout.flush()

        train_ppl = math.exp(train_loss / num_train_labels)
        print("  train ppl =", train_ppl)

        # Validation.
        valid_loss = 0
        for ofs in range(0, num_valid_sents, BATCH_SIZE):
            batch_ids = valid_ids[ofs:min(ofs + BATCH_SIZE, num_valid_sents)]
            batch = make_batch(valid_corpus, batch_ids, eos_id)

            g.clear()

            outputs = lm.forward(batch)
            loss = lm.forward_loss(outputs, batch)
            valid_loss += loss.to_float() * len(batch_ids)
            print("%d" % ofs, end="\r")
            sys.stdout.flush()

        valid_ppl = math.exp(valid_loss / num_valid_labels)
        print("  valid ppl =", valid_ppl)