def setUp(self):
     self.device = Naive()
     self.graph = Graph()
     Device.set_default(self.device)
     Graph.set_default(self.graph)
     self.a = np.array([[1, 2], [3, 4]], np.float32)
     self.b = np.array([[1, 1], [4, 8]], np.float32)
Example #2
0
def test_batch(encdec, src_vocab, trg_vocab, lines):
    g = Graph()
    Graph.set_default(g)

    src_batch = make_batch(lines, list(range(len(lines))), src_vocab)

    encdec.encode(src_batch, False)

    # Generates target words one-by-one.
    trg_ids = [np.array([trg_vocab["<bos>"]] * len(lines))]
    eos_id = trg_vocab["<eos>"]
    eos_ids = np.array([eos_id] * len(lines))
    while (trg_ids[-1] != eos_ids).any():
        if len(trg_ids) > GENERATION_LIMIT + 1:
            print("Warning: Sentence generation did not finish in",
                  GENERATION_LIMIT,
                  "iterations.",
                  file=sys.stderr)
            trg_ids.append(eos_ids)
            break
        y = encdec.decode_step(trg_ids[-1], False)
        trg_ids.append(np.array(y.argmax(0)).T)

    return [
        hyp[:np.where(hyp == eos_id)[0][0]] for hyp in np.array(trg_ids[1:]).T
    ]
Example #3
0
 def setUp(self):
     self.device = Naive()
     self.graph = Graph()
     Device.set_default(self.device)
     Graph.set_default(self.graph)
     self.ndarray_data = [
         np.array([
             [1, 2, 3],
             [4, 5, 6],
             [7, 8, 9],
             [10, 11, 12],
         ], np.float32),
         np.array([
             [13, 14, 15],
             [16, 17, 18],
             [19, 20, 21],
             [22, 23, 24],
         ], np.float32),
     ]
     self.list_data = [
         1.0,
         4.0,
         7.0,
         10.0,
         2.0,
         5.0,
         8.0,
         11.0,
         3.0,
         6.0,
         9.0,
         12.0,
         13.0,
         16.0,
         19.0,
         22.0,
         14.0,
         17.0,
         20.0,
         23.0,
         15.0,
         18.0,
         21.0,
         24.0,
     ]
Example #4
0
def train_func(trainer):
    dev = D.Naive(12345)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([1], I.Constant(0))

    trainer.add_parameter(pw1)
    trainer.add_parameter(pb1)
    trainer.add_parameter(pw2)
    trainer.add_parameter(pb2)

    input_data = [1, 1, 1, -1, -1, 1, -1, -1]
    output_data = [1, -1, -1, 1]

    for i in range(10):
        g.clear()
        x = F.input(input_data, Shape([2], 4))
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        t = F.input(output_data, Shape([], 4))
        diff = t - y
        loss = F.batch.mean(diff * diff)

        trainer.reset_gradients()
        loss.backward()
        trainer.update()

    return [
        pw1.value.to_list(),
        pb1.value.to_list(),
        pw2.value.to_list(),
        pb2.value.to_list()
    ]
Example #5
0
 def setUp(self):
     self.device = Naive()
     self.graph = Graph()
     Device.set_default(self.device)
     Graph.set_default(self.graph)
     self.input_data = [
         np.array([
             [ 1, 2, 3],
             [ 4, 5, 6],
             [ 7, 8, 9],
         ], np.float32),
         np.array([
             [11,12,13],
             [14,15,16],
             [17,18,19],
         ], np.float32),
     ]
     self.list_expected = [
          1.0,  4.0,  7.0,  2.0,  5.0,  8.0,  3.0,  6.0,  9.0,
         11.0, 14.0, 17.0, 12.0, 15.0, 18.0, 13.0, 16.0, 19.0,
     ]
Example #6
0
def main():
    # Loads vocab.
    vocab = make_vocab("data/ptb.train.txt")
    print("#vocab:", len(vocab))  # maybe 10000
    eos_id = vocab["<s>"]

    # Loads all corpus.
    train_corpus = load_corpus("data/ptb.train.txt", vocab)
    valid_corpus = load_corpus("data/ptb.valid.txt", vocab)
    num_train_sents = len(train_corpus)
    num_valid_sents = len(valid_corpus)
    num_train_labels = count_labels(train_corpus)
    num_valid_labels = count_labels(valid_corpus)
    print("train:", num_train_sents, "sentences,", num_train_labels, "labels")
    print("valid:", num_valid_sents, "sentences,", num_valid_labels, "labels")

    # Device and computation graph.
    dev = D.CUDA(0)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    # Our LM.
    lm = RNNLM(len(vocab), eos_id)

    # Optimizer.
    optimizer = O.SGD(1)
    #optimizer.set_weight_decay(1e-6)
    optimizer.set_gradient_clipping(5)
    optimizer.add(lm)

    # Sentence IDs.
    train_ids = list(range(num_train_sents))
    valid_ids = list(range(num_valid_sents))

    best_valid_ppl = 1e10

    # Train/valid loop.
    for epoch in range(MAX_EPOCH):
        print("epoch", epoch + 1, "/", MAX_EPOCH, ":")
        # Shuffles train sentence IDs.
        random.shuffle(train_ids)

        # Training.
        train_loss = 0
        for ofs in range(0, num_train_sents, BATCH_SIZE):
            batch_ids = train_ids[ofs:min(ofs + BATCH_SIZE, num_train_sents)]
            batch = make_batch(train_corpus, batch_ids, eos_id)

            g.clear()

            outputs = lm.forward(batch, True)
            loss = lm.loss(outputs, batch)
            train_loss += loss.to_float() * len(batch_ids)

            optimizer.reset_gradients()
            loss.backward()
            optimizer.update()

            print("%d" % ofs, end="\r")
            sys.stdout.flush()

        train_ppl = math.exp(train_loss / num_train_labels)
        print("  train ppl =", train_ppl)

        # Validation.
        valid_loss = 0
        for ofs in range(0, num_valid_sents, BATCH_SIZE):
            batch_ids = valid_ids[ofs:min(ofs + BATCH_SIZE, num_valid_sents)]
            batch = make_batch(valid_corpus, batch_ids, eos_id)

            g.clear()

            outputs = lm.forward(batch, False)
            loss = lm.loss(outputs, batch)
            valid_loss += loss.to_float() * len(batch_ids)
            print("%d" % ofs, end="\r")
            sys.stdout.flush()

        valid_ppl = math.exp(valid_loss / num_valid_labels)
        print("  valid ppl =", valid_ppl)

        if valid_ppl < best_valid_ppl:
            best_valid_ppl = valid_ppl
            print("  BEST")
        else:
            old_lr = optimizer.get_learning_rate_scaling()
            new_lr = 0.5 * old_lr
            optimizer.set_learning_rate_scaling(new_lr)
            print("  learning rate scaled:", old_lr, "->", new_lr)
Example #7
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    # Initializes 2 device objects which manage different GPUs.
    dev0 = D.CUDA(0)
    dev1 = D.CUDA(1)

    # Parameters on GPU 0.
    pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform(), dev0)
    pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0), dev0)

    # Parameters on GPU 1.
    pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform(), dev1)
    pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0), dev1)

    trainer = T.SGD(.1)
    trainer.add_parameter(pw1)
    trainer.add_parameter(pb1)
    trainer.add_parameter(pw2)
    trainer.add_parameter(pb2)

    def make_graph(inputs):
        # We first store input values explicitly on GPU 0.
        x = F.input(inputs, device=dev0)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        # The hidden layer is calculated and implicitly stored on GPU 0.
        h_on_gpu0 = F.relu(w1 @ x + b1)
        # `copy()` transfers the hiddne layer to GPU 1.
        h_on_gpu1 = F.copy(h_on_gpu0, dev1)
        # The output layer is calculated and implicitly stored on GPU 1.
        return w2 @ h_on_gpu1 + b2

    ids = list(range(NUM_TRAIN_SAMPLES))

    g = Graph()
    Graph.set_default(g)

    for epoch in range(MAX_EPOCH):
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="")
            inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]
            labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            trainer.reset_gradients()
            avg_loss.backward()
            trainer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="")
            inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs)
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if (v > maxval):
                        maxval = v
                        argmax = j
                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
Example #8
0
def main():
    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    # Parameters
    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([], I.Constant(0))

    # Optimizer
    optimizer = O.SGD(0.1)

    # Registers parameters.
    optimizer.add_parameter(pw1)
    optimizer.add_parameter(pb1)
    optimizer.add_parameter(pw2)
    optimizer.add_parameter(pb2)

    # Training data
    input_data = [
        np.array([1, 1], dtype=np.float32),  # Sample 1
        np.array([1, -1], dtype=np.float32),  # Sample 2
        np.array([-1, 1], dtype=np.float32),  # Sample 3
        np.array([-1, -1], dtype=np.float32),  # Sample 4
    ]
    output_data = [
        np.array([1], dtype=np.float32),  # Label 1
        np.array([-1], dtype=np.float32),  # Label 2
        np.array([-1], dtype=np.float32),  # Label 3
        np.array([1], dtype=np.float32),  # Label 4
    ]

    g = Graph()
    Graph.set_default(g)

    for i in range(10):
        g.clear()

        # Builds a computation graph.
        x = F.input(input_data)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        # Obtains values.
        y_val = y.to_list()
        print("epoch ", i, ":")
        for j in range(4):
            print("  [", j, "]: ", y_val[j])

        # Extends the computation graph to calculate loss values.
        t = F.input(output_data)
        diff = t - y
        loss = F.batch.mean(diff * diff)

        # Obtains the loss.
        loss_val = loss.to_float()
        print("  loss: ", loss_val)

        # Updates parameters.
        optimizer.reset_gradients()
        loss.backward()
        optimizer.update()
Example #9
0
def train(model, optimizer, config, best_valid):
    max_epoch = config.get("max_epoch", int(1e9))
    max_iteration = config.get("max_iteration", int(1e9))
    max_sentences = config.get("max_sentences", 1e9)
    max_tokens = config.get("max_tokens", 1e9)
    update_freq = config.get('update_freq', 1)

    optimizer.add(model)

    corpus_prefix = Path(config['corpus_prefix']) / "subword"
    model_path = corpus_prefix / "spm.model"
    tokenizer = spm.SentencePieceProcessor()
    tokenizer.Load(str(model_path))
    train_src = load_corpus(corpus_prefix / Path(config["train_source"]).name,
                            tokenizer)
    train_trg = load_corpus(corpus_prefix / Path(config["train_target"]).name,
                            tokenizer)
    train_src, train_trg = clean_corpus(train_src, train_trg, config)
    dev_src = load_corpus(corpus_prefix / Path(config["dev_source"]).name,
                          tokenizer)
    dev_trg = load_corpus(corpus_prefix / Path(config["dev_target"]).name,
                          tokenizer)
    dev_src, dev_trg = clean_corpus(dev_src, dev_trg, config)
    num_train_sents = len(train_src)
    num_dev_sents = len(dev_src)
    eos_id = tokenizer.eos_id()

    epoch = 0
    iteration = 0
    while epoch < max_epoch and iteration < max_iteration:
        epoch += 1
        g = Graph()
        Graph.set_default(g)

        train_itr = create_batch_itr(train_src,
                                     train_trg,
                                     max_tokens,
                                     max_sentences,
                                     shuffle=True)
        train_itr = tqdm(train_itr, desc='train epoch {}'.format(epoch))

        train_loss = 0.
        itr_loss = 0.
        itr_tokens = 0
        itr_sentences = 0
        optimizer.reset_gradients()
        for step, batch_ids in enumerate(train_itr):
            src_batch = make_batch(train_src, batch_ids, eos_id)
            trg_batch = make_batch(train_trg, batch_ids, eos_id)
            src_mask = padding_mask(src_batch, eos_id)
            trg_mask = [
                x | subsequent_mask(len(trg_batch) - 1)
                for x in padding_mask(trg_batch[:-1], eos_id)
            ]
            itr_tokens += len(src_batch) * len(src_batch[0])
            itr_sentences += len(batch_ids)

            g.clear()
            loss = model.loss(src_batch, trg_batch, src_mask, trg_mask)

            loss /= update_freq
            loss.backward()
            loss_val = loss.to_float()
            train_loss += loss_val * update_freq * len(batch_ids)
            itr_loss += loss_val

            # with open('graph.dot', 'w') as f:
            #     print(g.dump("dot"), end="", file=f)

            if (step + 1) % update_freq == 0:
                step_num = optimizer.get_epoch() + 1
                new_scale = config['d_model'] ** (-0.5) * \
                    min(step_num ** (-0.5), step_num * config['warmup_steps'] ** (-1.5))
                optimizer.set_learning_rate_scaling(new_scale)

                optimizer.update()
                optimizer.reset_gradients()

                iteration += 1
                train_itr.set_postfix(itr=("%d" % (iteration)),
                                      loss=("%.3lf" % (itr_loss)),
                                      wpb=("%d" % (itr_tokens)),
                                      spb=("%d" % (itr_sentences)),
                                      lr=optimizer.get_learning_rate_scaling())
                itr_loss = 0.
                itr_tokens = 0
                itr_sentences = 0

            if iteration >= max_iteration:
                break
        print("\ttrain loss = %.4f" % (train_loss / num_train_sents))

        g.clear()
        valid_loss = 0.
        valid_itr = create_batch_itr(dev_src,
                                     dev_trg,
                                     max_tokens,
                                     max_sentences,
                                     shuffle=False)
        valid_itr = tqdm(valid_itr, desc='valid epoch {}'.format(epoch))
        for batch_ids in valid_itr:
            src_batch = make_batch(dev_src, batch_ids, eos_id)
            trg_batch = make_batch(dev_trg, batch_ids, eos_id)
            src_mask = padding_mask(src_batch, eos_id)
            trg_mask = [
                x | subsequent_mask(len(trg_batch) - 1)
                for x in padding_mask(trg_batch[:-1], eos_id)
            ]

            loss = model.loss(src_batch,
                              trg_batch,
                              src_mask,
                              trg_mask,
                              train=False)
            valid_loss += loss.to_float() * len(batch_ids)
            valid_itr.set_postfix(loss=loss.to_float())
        print("\tvalid loss = %.4f" % (valid_loss / num_dev_sents))

        if valid_loss < best_valid:
            best_valid = valid_loss
            print('\tsaving model/optimizer ... ', end="", flush=True)
            prefix = config['model_prefix']
            model.save(prefix + '.model')
            optimizer.save(prefix + '.optimizer')
            with Path(prefix).with_suffix('.valid').open('w') as f:
                f.write(str(best_valid))
            print('done.')
Example #10
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform())
    pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0))
    pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform())
    pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0))

    optimizer = O.SGD(.5)
    optimizer.add(pw1, pb1, pw2, pb2)

    def make_graph(inputs, train):
        x = F.input(inputs)

        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        h = F.relu(w1 @ x + b1)

        h = F.dropout(h, .5, train)

        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        return w2 @ h + b2

    ids = list(range(NUM_TRAIN_SAMPLES))

    g = Graph()
    Graph.set_default(g)

    for epoch in range(MAX_EPOCH):
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="")
            inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]
            labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, True)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            optimizer.reset_gradients()
            avg_loss.backward()
            optimizer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="")
            inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, False)
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if (v > maxval):
                        maxval = v
                        argmax = j
                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
Example #11
0
def train(encdec, optimizer, prefix, best_valid_ppl):
    # Registers all parameters to the optimizer.
    optimizer.add_model(encdec)

    # Loads vocab.
    src_vocab = make_vocab(SRC_TRAIN_FILE, SRC_VOCAB_SIZE)
    trg_vocab = make_vocab(TRG_TRAIN_FILE, TRG_VOCAB_SIZE)
    inv_trg_vocab = make_inv_vocab(trg_vocab)
    print("#src_vocab:", len(src_vocab))
    print("#trg_vocab:", len(trg_vocab))

    # Loads all corpus
    train_src_corpus = load_corpus(SRC_TRAIN_FILE, src_vocab)
    train_trg_corpus = load_corpus(TRG_TRAIN_FILE, trg_vocab)
    valid_src_corpus = load_corpus(SRC_VALID_FILE, src_vocab)
    valid_trg_corpus = load_corpus(TRG_VALID_FILE, trg_vocab)
    test_src_corpus = load_corpus(SRC_TEST_FILE, src_vocab)
    test_ref_corpus = load_corpus_ref(REF_TEST_FILE, trg_vocab)
    num_train_sents = len(train_trg_corpus)
    num_valid_sents = len(valid_trg_corpus)
    num_test_sents = len(test_ref_corpus)
    num_train_labels = count_labels(train_trg_corpus)
    num_valid_labels = count_labels(valid_trg_corpus)
    print("train:", num_train_sents, "sentences,", num_train_labels, "labels")
    print("valid:", num_valid_sents, "sentences,", num_valid_labels, "labels")

    # Sentence IDs
    train_ids = list(range(num_train_sents))
    valid_ids = list(range(num_valid_sents))

    # Train/valid loop.
    for epoch in range(MAX_EPOCH):
        # Computation graph.
        g = Graph()
        Graph.set_default(g)

        print("epoch %d/%d:" % (epoch + 1, MAX_EPOCH))
        print("  learning rate scale = %.4e" %
              optimizer.get_learning_rate_scaling())

        # Shuffles train sentence IDs.
        random.shuffle(train_ids)

        # Training.
        train_loss = 0.
        for ofs in range(0, num_train_sents, BATCH_SIZE):
            print("%d" % ofs, end="\r")
            sys.stdout.flush()

            batch_ids = train_ids[ofs:min(ofs + BATCH_SIZE, num_train_sents)]
            src_batch = make_batch(train_src_corpus, batch_ids, src_vocab)
            trg_batch = make_batch(train_trg_corpus, batch_ids, trg_vocab)

            g.clear()
            encdec.encode(src_batch, True)
            loss = encdec.loss(trg_batch, True)
            train_loss += loss.to_float() * len(batch_ids)

            optimizer.reset_gradients()
            loss.backward()
            optimizer.update()

        train_ppl = math.exp(train_loss / num_train_labels)
        print("  train PPL = %.4f" % train_ppl)

        # Validation.
        valid_loss = 0.
        for ofs in range(0, num_valid_sents, BATCH_SIZE):
            print("%d" % ofs, end="\r")
            sys.stdout.flush()

            batch_ids = valid_ids[ofs:min(ofs + BATCH_SIZE, num_valid_sents)]
            src_batch = make_batch(valid_src_corpus, batch_ids, src_vocab)
            trg_batch = make_batch(valid_trg_corpus, batch_ids, trg_vocab)

            g.clear()
            encdec.encode(src_batch, False)
            loss = encdec.loss(trg_batch, False)
            valid_loss += loss.to_float() * len(batch_ids)

        valid_ppl = math.exp(valid_loss / num_valid_labels)
        print("  valid PPL = %.4f" % valid_ppl)

        # Calculates test BLEU.
        stats = defaultdict(int)
        for ofs in range(0, num_test_sents, BATCH_SIZE):
            print("%d" % ofs, end="\r")
            sys.stdout.flush()

            src_batch = test_src_corpus[ofs:min(ofs +
                                                BATCH_SIZE, num_test_sents)]
            ref_batch = test_ref_corpus[ofs:min(ofs +
                                                BATCH_SIZE, num_test_sents)]

            hyp_ids = test_batch(encdec, src_vocab, trg_vocab, src_batch)
            for hyp_line, ref_line in zip(hyp_ids, ref_batch):
                for k, v in get_bleu_stats(ref_line[1:-1], hyp_line).items():
                    stats[k] += v

        bleu = calculate_bleu(stats)
        print("  test BLEU = %.2f" % (100 * bleu))

        # Saves best model/optimizer.
        if valid_ppl < best_valid_ppl:
            best_valid_ppl = valid_ppl
            print("  saving model/optimizer ... ", end="")
            sys.stdout.flush()
            encdec.save(prefix + ".model")
            optimizer.save(prefix + ".optimizer")
            save_ppl(prefix + ".valid_ppl", best_valid_ppl)
            print("done.")
        else:
            # Learning rate decay by 1/sqrt(2)
            new_scale = .7071 * optimizer.get_learning_rate_scaling()
            optimizer.set_learning_rate_scaling(new_scale)
Example #12
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte",
                               NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte",
                               NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    dev = D.CUDA(0)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    # Parameters of CNNs
    # Shape: {kernel_height, kernel_width, in_channels, out_channels}
    pw_cnn1 = Parameter(Shape([KERNEL_SIZE1, KERNEL_SIZE1, 1, NUM_CHANNELS1]),
                        I.XavierUniformConv2D())
    pw_cnn2 = Parameter(
        Shape([KERNEL_SIZE2, KERNEL_SIZE2, NUM_CHANNELS1, NUM_CHANNELS2]),
        I.XavierUniformConv2D())

    # Parameters of FC layers
    pw_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS]),
                       I.XavierUniform())
    pw_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS]),
                       I.XavierUniform())
    pb_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS]), I.Constant(0))
    pb_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS]), I.Constant(0))

    # Optimizer
    optimizer = O.SGD(.1)
    optimizer.add(pw_cnn1, pw_cnn2, pw_fc1, pw_fc2, pb_fc1, pb_fc2)

    # Helper lambda to construct the predictor network.
    def make_graph(inputs, train):
        # Input and parameters.
        #x = F.input(Shape([IMAGE_HEIGHT, IMAGE_WIDTH], BATCH_SIZE), inputs)
        x = F.input(inputs)
        w_cnn1 = F.parameter(pw_cnn1)
        w_cnn2 = F.parameter(pw_cnn2)
        w_fc1 = F.parameter(pw_fc1)
        w_fc2 = F.parameter(pw_fc2)
        b_fc1 = F.parameter(pb_fc1)
        b_fc2 = F.parameter(pb_fc2)
        # CNNs
        h_cnn1 = F.relu(F.conv2d(x, w_cnn1, PADDING1, PADDING1, 1, 1, 1, 1))
        h_pool1 = F.max_pool2d(h_cnn1, 2, 2, 0, 0, 2, 2)
        h_cnn2 = F.relu(
            F.conv2d(h_pool1, w_cnn2, PADDING2, PADDING2, 1, 1, 1, 1))
        h_pool2 = F.max_pool2d(h_cnn2, 2, 2, 0, 0, 2, 2)
        # FC layers
        x_fc = F.dropout(F.flatten(h_pool2), .5, train)
        h_fc = F.dropout(F.relu(F.matmul(w_fc1, x_fc) + b_fc1), .5, train)
        return F.matmul(w_fc2, h_fc) + b_fc2

    # Batch randomizer
    ids = list(range(NUM_TRAIN_SAMPLES))

    for epoch in range(MAX_EPOCH):
        # Shuffles sample IDs.
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES),
                  end="")
            # Makes a minibatch for training.
            inputs = [
                train_inputs[ids[batch * BATCH_SIZE + i]]
                for i in range(BATCH_SIZE)
            ]
            labels = [
                train_labels[ids[batch * BATCH_SIZE + i]]
                for i in range(BATCH_SIZE)
            ]

            # Constructs the graph.
            g.clear()
            y = make_graph(inputs, True)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            # Dump computation graph at the first time.
            # if epoch == 0 and batch == 0:
            #     print(g.dump("dot"))

            # Implicit forward, backward, and updates parameters.
            optimizer.reset_gradients()
            avg_loss.backward()
            optimizer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES),
                  end="")
            # Makes a test minibatch.
            inputs = [
                test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)
            ]

            # Constructs the graph.
            g.clear()
            y = make_graph(inputs, False)

            # Gets outputs, argmax, and compares them with the label.
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if v > maxval:
                        maxval = v
                        argmax = j

                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("epoch %d: accuracy: %.2f%%" % (epoch, accuracy))

    return 0
Example #13
0
 def setUp(self):
     self.device = Naive()
     Device.set_default(self.device)
     self.graph = Graph()
     Graph.set_default(self.graph)
Example #14
0
def main():
    # Loads vocab.
    vocab = make_vocab("data/ptb.train.txt")
    print("#vocab:", len(vocab))  # maybe 10000
    eos_id = vocab["<s>"]

    # Loads all corpus.
    train_corpus = load_corpus("data/ptb.train.txt", vocab)
    valid_corpus = load_corpus("data/ptb.valid.txt", vocab)
    num_train_sents = len(train_corpus)
    num_valid_sents = len(valid_corpus)
    num_train_labels = count_labels(train_corpus)
    num_valid_labels = count_labels(valid_corpus)
    print("train:", num_train_sents, "sentences,", num_train_labels, "labels")
    print("valid:", num_valid_sents, "sentences,", num_valid_labels, "labels")

    dev = D.CUDA(0)
    Device.set_default(dev)

    # Trainer.
    trainer = T.Adam()
    trainer.set_weight_decay(1e-6)
    trainer.set_gradient_clipping(5)

    # Our LM.
    lm = RNNLM(len(vocab), eos_id, trainer)

    # Sentence IDs.
    train_ids = list(range(num_train_sents))
    valid_ids = list(range(num_valid_sents))

    g = Graph()
    Graph.set_default(g)

    # Train/valid loop.
    for epoch in range(MAX_EPOCH):
        print("epoch", (epoch + 1), "/", MAX_EPOCH, ":")
        # Shuffles train sentence IDs.
        random.shuffle(train_ids)

        # Training.
        train_loss = 0
        for ofs in range(0, num_train_sents, BATCH_SIZE):
            batch_ids = train_ids[ofs:min(ofs + BATCH_SIZE, num_train_sents)]
            batch = make_batch(train_corpus, batch_ids, eos_id)

            g.clear()

            outputs = lm.forward(batch)
            loss = lm.forward_loss(outputs, batch)
            train_loss += loss.to_float() * len(batch_ids)

            trainer.reset_gradients()
            loss.backward()
            trainer.update()

            print("\r%d" % ofs, end="")
            sys.stdout.flush()

        print()

        train_ppl = math.exp(train_loss / num_train_labels)
        print("  train ppl =", train_ppl)

        # Validation.
        valid_loss = 0
        for ofs in range(0, num_valid_sents, BATCH_SIZE):
            batch_ids = valid_ids[ofs:min(ofs + BATCH_SIZE, num_valid_sents)]
            batch = make_batch(valid_corpus, batch_ids, eos_id)

            g.clear()

            outputs = lm.forward(batch)
            loss = lm.forward_loss(outputs, batch)
            valid_loss += loss.to_float() * len(batch_ids)
            print("\r%d" % ofs, end="")
            sys.stdout.flush()

        print()

        valid_ppl = math.exp(valid_loss / num_valid_labels)
        print("  valid ppl =", valid_ppl)