Ejemplo n.º 1
0
    def _run_feg(self, X, X_val):
        """Calculate difference between average free energies of subsets
        of validation and training sets to monitor overfitting,
        as proposed in [2]. If the model is not overfitting at all, this
        quantity should be close to zero. Once this value starts
        growing, the model is overfitting and the value ("free energy gap")
        represents the amount of overfitting.
        """
        self._free_energy_op = tf.get_collection('free_energy_op')[0]

        train_fes = []
        for _, X_b in zip(range(self.metrics_config['n_batches_for_feg']),
                          batch_iter(X, batch_size=self.batch_size)):
            train_fe = self._tf_session.run(
                self._free_energy_op, feed_dict=self._make_tf_feed_dict(X_b))
            train_fes.append(train_fe)

        val_fes = []
        for _, X_vb in zip(range(self.metrics_config['n_batches_for_feg']),
                           batch_iter(X_val, batch_size=self.batch_size)):
            val_fe = self._tf_session.run(
                self._free_energy_op, feed_dict=self._make_tf_feed_dict(X_vb))
            val_fes.append(val_fe)

        feg = np.mean(val_fes) - np.mean(train_fes)
        summary_value = [
            summary_pb2.Summary.Value(tag=self._metrics_names_map['feg'],
                                      simple_value=feg)
        ]
        feg_s = summary_pb2.Summary(value=summary_value)
        self._tf_val_writer.add_summary(feg_s, self.iter_)
        return feg
Ejemplo n.º 2
0
    def evaluate_ppl(self, dev_data, batch_size: int=32, encoder_only=False, decoder_only=False, **kwargs):

        cum_loss = 0.
        cum_tgt_words = 0.

        # you may want to wrap the following code using a context manager provided
        # by the NN library to signal the backend to not to keep gradient information
        # e.g., `torch.no_grad()`
        if encoder_only:
            cum_src_words = 0.
            for src_sents, tgt_sents, key in batch_iter(dev_data, batch_size):
                loss = self.encode_to_loss(src_sents, update_params=False)

                src_word_num_to_predict = sum(len(s[1:])
                                              for s in src_sents)  # omitting the leading `<s>`
                cum_src_words += src_word_num_to_predict
                cum_loss += loss
            ppl = np.exp(cum_loss / cum_src_words)

            return ppl

        for src_sents, tgt_sents, key in batch_iter(dev_data, batch_size):

            if decoder_only:
                loss = self.decode_to_loss(tgt_sents, update_params=False)
            else:
                loss = self(src_sents, tgt_sents, key=key, update_params=False)
            cum_loss += loss
            tgt_word_num_to_predict = sum(len(s[1:])
                                          for s in tgt_sents)  # omitting the leading `<s>`
            cum_tgt_words += tgt_word_num_to_predict

        ppl = np.exp(cum_loss / cum_tgt_words)

        return ppl
Ejemplo n.º 3
0
def train_per_epoch(model,
                    sess,
                    train_data,
                    train_labels,
                    test_data,
                    test_labels,
                    epoch,
                    loss,
                    batch_size=64,
                    model_type='rnn'):
    loss_meter = AverageMeter()
    n_minibatches = math.ceil(len(train_data) / batch_size)
    print(f'Epoch{epoch}')
    if model_type == 'rnn':
        with tqdm(total=(n_minibatches)) as prog:
            for i, (train_x, train_x_lengths, train_y) in enumerate(
                    batch_iter(train_data,
                               train_labels,
                               batch_size,
                               use_for=model_type)):
                loss_train, train_acc, _ = sess.run(
                    [loss, 'accuracy:0', 'train_step'],
                    feed_dict={
                        'sent:0': train_x,
                        'sent_lengths:0': train_x_lengths,
                        'y_true:0': train_y
                    })
                prog.update(1)
                loss_meter.update(loss_train.item())
    else:
        with tqdm(total=(n_minibatches)) as prog:
            for i, (train_x, train_y) in enumerate(
                    batch_iter(train_data,
                               train_labels,
                               batch_size,
                               use_for=model_type)):
                loss_train, train_acc, _ = sess.run(
                    [loss, 'accuracy:0', 'train_step'],
                    feed_dict={
                        'sent:0': train_x,
                        'y_true:0': train_y
                    })
                prog.update(1)
                loss_meter.update(loss_train.item())

    print("Average Train Loss: {}".format(loss_meter.avg))
    print('- train_accuracy: {:.2f}'.format(train_acc * 100.0))
    print("Evaluating on dev set", )
    if model.train == True:
        model.train = False
    valid_acc, valid_loss = evaluate(sess,
                                     test_data,
                                     test_labels,
                                     loss,
                                     model_type=model_type)
    model.train = True
    print("- valid_accuracy: {:.2f}".format(valid_acc * 100.0))
    print("- valid_loss: {:.2f}".format(valid_loss))
    return loss_meter.avg, train_acc, valid_loss, valid_acc
Ejemplo n.º 4
0
Archivo: main.py Proyecto: zhyq/acnn
def main(_):
    embeddings, train_vec, test_vec = init()
    bz = config.batch_size

    with tf.Graph().as_default():
        with tf.name_scope("Train"):
            with tf.variable_scope("Model", reuse=None):
                m_train = Model(config, embeddings, is_training=True)
            # tf.summary.scalar("Training_Loss", m_train.loss)
            # tf.summary.scalar("Training_acc", m_train.acc)

        with tf.name_scope("Valid"):
            with tf.variable_scope("Model", reuse=True):
                m_test = Model(config, embeddings, is_training=False)
            # tf.summary.scalar("test_acc", m_test.acc)

        sv = tf.train.Supervisor(logdir=config.save_path,
                                 global_step=m_train.global_step)
        with sv.managed_session() as session:
            if config.test_only:
                test_iter = utils.batch_iter(list(zip(*test_vec)),
                                             bz,
                                             shuffle=False)
                test_acc = run_epoch(session,
                                     m_test,
                                     test_iter,
                                     is_training=False)
                print("test acc: %.3f" % test_acc)
            else:
                for epoch in range(config.num_epoches):
                    # lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
                    # m.assign_lr(session, config.learning_rate * lr_decay)
                    train_iter = utils.batch_iter(list(zip(*train_vec)),
                                                  bz,
                                                  shuffle=True)
                    test_iter = utils.batch_iter(list(zip(*test_vec)),
                                                 bz,
                                                 shuffle=False)
                    train_acc = run_epoch(session,
                                          m_train,
                                          train_iter,
                                          verbose=False)
                    test_acc = run_epoch(session,
                                         m_test,
                                         test_iter,
                                         is_training=False)
                    logging.info("Epoch: %d Train: %.2f%% Test: %.2f%%" %
                                 (epoch + 1, train_acc * 100, test_acc * 100))
                if config.save_path:
                    sv.saver.save(session,
                                  config.save_path,
                                  global_step=sv.global_step)
Ejemplo n.º 5
0
def load_data(path):
    if os.path.exists(path):
        batches_train, batches_val, batches_test = pickle.load(open(
            path, 'rb'))
    else:
        batches_train = process('train')
        batches_val = process('valid')
        batches_test = process('test')
        # batches_train=batches_val=batches_test = process('valid')
        pickle.dump([batches_train, batches_val, batches_test],
                    open(path, 'wb'))
    global train_step_per_epoch, val_step_per_epoch, test_step_per_epoch
    train_step_per_epoch = int(
        (len(batches_train) - .1) / model_config.batch_size) + 1
    val_step_per_epoch = int(
        (len(batches_val) - .1) / model_config.batch_size) + 1
    test_step_per_epoch = int(
        (len(batches_test) - .1) / model_config.batch_size) + 1
    batches_train = utils.batch_iter(batches_train, model_config.batch_size,
                                     num_epochs)
    batches_val = utils.batch_iter(batches_val,
                                   model_config.batch_size,
                                   num_epochs,
                                   shuffle=False)
    batches_test = utils.batch_iter(batches_test,
                                    model_config.batch_size,
                                    num_epochs,
                                    shuffle=False)

    law_list = utils.law_to_list(law_path)
    laws = utils.cut_law(law_list, filter=law_class, cut_sentence=True)
    # pickle.dump(law_list,open('data/law.pkl','wb'))

    model_config.n_law = len(laws)
    laws = list(zip(*laws))
    # pickle.dump({laws[1][i]:laws[0][i] for i in range(len(laws[0]))},open('data/accu2law_dict.pkl','wb'))
    # law_set=laws[0]
    laws_doc_len = [
        len(i)
        if len(i) < model_config.law_doc_len else model_config.law_doc_len
        for i in laws[-1]
    ]
    laws_sent_len = utils.trun_n_words(laws[-1], model_config.law_sent_len)
    laws_sent_len = utils.align_flatten2d(laws_sent_len,
                                          model_config.law_doc_len,
                                          flatten=False)
    laws = utils.lookup_index_for_sentences(laws[-2], word2id,
                                            model_config.law_doc_len,
                                            model_config.law_sent_len)

    return batches_train, batches_val, batches_test, laws, laws_doc_len, laws_sent_len
Ejemplo n.º 6
0
 def test_batch_iter_3(self):
     """
     Check that successive calls shuffle in a different order.
     """
     data = list(range(16))
     out1 = []
     for x in batch_iter(data, batch_size=4, shuffle=True):
         out1.extend(x)
     out2 = []
     for x in batch_iter(data, batch_size=4, shuffle=True):
         out2.extend(x)
     self.assertEqual(set(out1), set(out2))
     with self.assertRaises(AssertionError):
         np.testing.assert_array_equal(out1, out2)
def test(args):
    print("load model from {}".format(args["MODEL_PATH"]), file=sys.stderr)
    model = NMT.load(args["MODEL_PATH"])

    if args["--cuda"]:
        model = model.to(torch.device("cuda:0"))

    binary = int(args["--num-classes"]) == 2
    test_data = load_test_data(binary=binary)
    batch_size = int(args["--batch-size"])

    cum_correct = 0
    cum_score = 0

    with torch.no_grad():
        for sentences, sentiments in batch_iter(test_data, batch_size):
            correct = model.compute_accuracy(sentences,
                                             sentiments) * len(sentences)
            cum_correct += correct
            score = -model(sentences, sentiments).sum()
            cum_score += score

    print("test dataset size: %d" % len(test_data))
    print("accuracy: %f" % (cum_correct / len(test_data)))
    print("loss: %f" % (cum_score / len(test_data)))
Ejemplo n.º 8
0
def validate(model, dev_src, dev_tgt, lang, batch_size=32):
    """
    validate model on dev set
    @param model
    @param dev_src (list(list[str])): list of source sentences (list of tokens)
    @param dev_tgt (list[str]): list of target sentences
    @param lang: target language
    @return dev_loss (float): cross entropy loss on dev set
    """
    was_training = model.training
    model.eval()

    cum_loss = .0
    cum_tgt_words = 0

    with torch.no_grad():
        for src_sents, tgt_nodes, tgt_tokens, tgt_actions in batch_iter(
                dev_src, dev_tgt, lang, batch_size):
            num_words_to_predict = sum(len(actions) for actions in tgt_actions)
            loss = -model(src_sents, tgt_nodes, tgt_tokens, tgt_actions).sum()

            cum_loss += loss
            cum_tgt_words += num_words_to_predict

        dev_loss = cum_loss / cum_tgt_words

    if was_training:
        model.train()

    return dev_loss
Ejemplo n.º 9
0
def test(test_data, labels, model, device, batch=1, training=0, embeddings=None):
    model.eval()

    if embeddings == None:
        embeddings = loadEmbeddings(model.vocab, model.embed_size,
                                './data/word2vec.6B.100d.txt')

    count, correct_count = 0, 0
    with torch.no_grad():
        for test_x, test_y in batch_iter(test_data, labels, batch):

            test_x = model.vocab.to_input_tensor(test_x)
            test_x = embeddings(test_x).to(device)

            output = model.search(test_x)

            test_y = test_y[0]
            for i in range(len(test_y)):
                count += 1
                if test_y[i] == output[i]:
                    correct_count += 1

        correct_rate = 1.*correct_count/count
        print('the corrent rate is : ', correct_rate)

    if training:
        model.train()
    return correct_rate
Ejemplo n.º 10
0
def train():
    # save = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    global_steps = 0
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)

    for epoch in range(config.num_epochs):
        print("Epoch: {}".format(epoch + 1))
        batch_data = batch_iter(x_train, y_train, config.batch_size)
        for batch_x, batch_y in batch_data:
            feed_dict = feed_data(batch_x, batch_y, config.dropout_prob)

            if global_steps % config.print_per_batch == 0:
                feed_dict[model.dropout] = 1.0
                train_acc, train_loss = session.run([model.acc, model.loss],
                                                    feed_dict=feed_dict)
            message = "train acc: {0}"
            print(message.format(train_acc))

            feed_dict[model.dropout] = config.dropout_prob

            session.run(model.optm, feed_dict)
            global_steps += 1
Ejemplo n.º 11
0
    def _parallel_train(self, fold_n, x_train, x_test):
        print('fold_n:{}'.format(fold_n))
        opt = Adam(0.01)
        self.model, self.emb = self.create_model()
        self.model.compile(optimizer=opt, loss=self.loss)
        # self.model.compile(optimizer=opt, loss='binary_crossentropy')
        patient = 0
        best_score = 0

        for epoch in range(self.epoch):
            # batch
            generator = utils.batch_iter(x_train, self.batch_size, 1)
            for index in generator:
                self.model.train_on_batch(x_train[index], x_train[index])

            # save best reconsitution model and embedding model
            score, best_score, patient = self.save_best_model(
                best_score, x_test, patient, fold_n)
            if (patient > 25 and best_score > 0.7) or patient > 50:
                break
            print(score, best_score)
        print("fold_n:{}, score:{}".format(fold_n + 1, best_score))

        self.model = load_model('dataset/output/model' + str(fold_n) + '.h5',
                                custom_objects={'loss_high_order': self.loss})
        return self.embedding(fold_n)
Ejemplo n.º 12
0
    def evaluate_ppl(self, dev_data: List[PairedData], batch_size: int=32):
        """
        Evaluate perplexity on dev sentences

        Args:
            dev_data: a list of dev sentences
            batch_size: batch size

        Returns:
            ppl: the perplexity on dev sentences
        """
        cum_loss = 0.
        cum_tgt_words = 0.
        output = []
        all_tgt_sents = []
        with torch.no_grad():
            for src_lang, tgt_lang, src_sents, tgt_sents in batch_iter(dev_data, batch_size):
                loss, best_sents = self(src_lang, tgt_lang, src_sents, tgt_sents)
                output += best_sents
                all_tgt_sents += tgt_sents
                cum_loss += loss.sum()
                tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents)  # omitting the leading `<s>`
                cum_tgt_words += tgt_word_num_to_predict

            ppl = np.exp(cum_loss / cum_tgt_words)

            return ppl, output, all_tgt_sents
Ejemplo n.º 13
0
 def test_batch_iter_1(self):
     """
     Check that batch_iter gives us exactly the right data back.
     """
     l1 = list(range(16))
     l2 = list(range(15))
     l3 = list(range(13))
     for l in [l1, l2, l3]:
         for shuffle in [True, False]:
             expected_data = l
             actual_data = set()
             expected_n_batches = ceil(len(l) / 4)
             actual_n_batches = 0
             for batch_n, x in enumerate(batch_iter(l,
                                                    batch_size=4,
                                                    shuffle=shuffle)):
                 if batch_n == expected_n_batches - 1 and len(l) % 4 != 0:
                     self.assertEqual(len(x), len(l) % 4)
                 else:
                     self.assertEqual(len(x), 4)
                 self.assertEqual(len(actual_data.intersection(set(x))), 0)
                 actual_data = actual_data.union(set(x))
                 actual_n_batches += 1
             self.assertEqual(actual_n_batches, expected_n_batches)
             np.testing.assert_array_equal(list(actual_data), expected_data)
Ejemplo n.º 14
0
def test(args):
    test_path = args['--test-src']
    model_path = args['--model-path']
    batch_size = int(args['--batch-size'])
    total_examples = 0
    total_correct = 0
    vocab_path = args['--vocab-src']
    softmax = torch.nn.Softmax(dim=1)

    if args['--data'] == 'quora':
        test_data = utils.read_data(test_path, 'quora')
        vocab_data = utils.load_vocab(vocab_path)
        network = Model(args, vocab_data, 2)
        network.model = torch.load(model_path)

    if args['--cuda'] == str(1):
        network.model = network.model.cuda()
        softmax = softmax.cuda()

    network.model.eval()
    for labels, p1, p2, idx in utils.batch_iter(test_data, batch_size):
        total_examples += len(labels)
        print(total_examples)
        pred, _ = network.forward(labels, p1, p2)
        pred = softmax(pred)
        _, pred = pred.max(dim=1)
        label = network.get_label(labels)
        total_correct += (pred == label).sum().float()
    final_acc = total_correct / total_examples
    print('Accuracy of the model is %.2f' % (final_acc), file=sys.stderr)
Ejemplo n.º 15
0
    def _train_epoch(self, X):
        results = [[] for _ in range(len(self._train_metrics_map))]
        for X_batch in batch_iter(X, self.batch_size, verbose=self.verbose):
            self.iter_ += 1
            #print_op = tf.print('Batch iteration = ', self.iter_)

            if self.iter_ % self.metrics_config[
                    'train_metrics_every_iter'] == 0:
                # if want train metrics, combine metric operations and training operations
                # the self._train_op should be the main training step
                run_ops = [
                    v for _, v in sorted(self._train_metrics_map.items())
                ]
                run_ops += [self._tf_merged_summaries, self._train_op]
                #run_ops += [print_op]
                outputs = \
                self._tf_session.run(run_ops,
                                     feed_dict=self._make_tf_feed_dict(X_batch))
                values = outputs[:len(self._train_metrics_map)]
                for i, v in enumerate(values):
                    results[i].append(v)
                train_s = outputs[len(self._train_metrics_map)]
                self._tf_train_writer.add_summary(train_s, self.iter_)
            else:
                # else, only run the training operations
                run_ops = [self._train_op]
                #run_ops += [print_op]
                self._tf_session.run(
                    run_ops, feed_dict=self._make_tf_feed_dict(X_batch))

        # aggregate and return metrics values
        results = map(lambda r: np.mean(r) if r else None, results)
        return dict(zip(sorted(self._train_metrics_map), results))
Ejemplo n.º 16
0
def evaluate_ppl(model, dev_data, batch_size=32):
    """ Evaluate perplexity on dev sentences
    :param NMT model: NMT Model
    :param List[tuple(src_sent, tgt_sent)] dev_data: list of tuples containing source and target sentences
    :param int batch_size: size of the batch
    :return float:  perplexity on dev sentences
    """
    was_training = model.training
    model.eval()

    cum_loss = 0.
    cum_tgt_words = 0.

    # no_grad() signals backend to throw away all gradients
    with torch.no_grad():
        for src_sents, tgt_sents in batch_iter(dev_data, batch_size):
            loss = -model(src_sents, tgt_sents).sum()

            cum_loss += loss.item()
            tgt_word_num_to_predict = sum(
                len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            cum_tgt_words += tgt_word_num_to_predict

        ppl = np.exp(cum_loss / cum_tgt_words)

    if was_training:
        model.train()

    return ppl
Ejemplo n.º 17
0
    def evaluate_ppl(self, dev_data: List[Any], batch_size: int = 32):
        """
        Evaluate perplexity on dev sentences

        Args:
            dev_data: a list of dev sentences
            batch_size: batch size

        Returns:
            ppl: the perplexity on dev sentences
        """

        self.set_model_to_eval()

        cum_loss = 0.
        cum_tgt_words = 0.

        # you may want to wrap the following code using a context manager provided
        # by the NN library to signal the backend to not to keep gradient information
        # e.g., `torch.no_grad()`

        for src_sents, tgt_sents in batch_iter(dev_data, batch_size):
            loss, num_words = self.__call__(src_sents, tgt_sents, keep_grad=False)

            loss = loss.detach().cpu().numpy()

            cum_loss += loss * num_words
            tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents)  # omitting the leading `<s>`
            cum_tgt_words += tgt_word_num_to_predict

        ppl = np.exp(cum_loss / cum_tgt_words)

        return cum_loss, ppl
Ejemplo n.º 18
0
def train_step(model, loss_fn, optimizer, train_data, batch_size=32, device="cpu"):
    """
    Train the model for 1 epoch.
    """
    total_loss = 0.0
    model.train()
    start_time = time.time()
    total_step = math.ceil(len(train_data) / batch_size)
    
    for step, batch in enumerate(batch_iter(train_data, batch_size=batch_size, shuffle=True)):
        if step % 250 == 0 and not step == 0:
            elapsed_since = time.time() - start_time
            logger.info("Batch {}/{}\tElapsed since: {}".format(step, total_step, 
                                                          str(datetime.timedelta(seconds=round(elapsed_since)))))
        # batch = (b.to(device) for b in batch)
        sents, tags = batch
        optimizer.zero_grad()
        train_loss = model.loss(sents, tags)
        total_loss += train_loss.item()
        train_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
            
    avg_train_loss = total_loss / total_step
    return avg_train_loss
Ejemplo n.º 19
0
def test(test_data, model, device, weight, training=0):
    # use the perplexity as a evaluation indicator
    model.eval()
    count, correct_count = 0, 0
    Hp = 0.
    with torch.no_grad():
        for ndata in batch_iter(test_data, 1):

            # ndata = [[model.vocab.start_token] + sent + [model.vocab.end_token] for sent in ndata]
            ndata = model.vocab.to_input_tensor(ndata).to(device)
            test_x = ndata[:, :-1]
            test_y = ndata[:, 1:]
            output = model(test_x)

            loss = loss_function(output, test_y, weight).item()
            Hp = Hp + loss

        m = len(test_data)

        # Hp = 1./m*Hp
        Hp = math.pow(math.e, 1. / m * Hp)
        print('the perplexity is : ', Hp)

    if training:
        model.train()
    return Hp
Ejemplo n.º 20
0
Archivo: train.py Proyecto: snnclsr/nmt
def evaluate_ppl(model, valid_data, batch_size=32):
    """ 
    Evaluate the perplexity on valid sentences
    model: Seq2Seq Model
    valid_data: list of tuples containing source and target sentence
    batch_size: batch size
    """
    was_training = model.training
    model.eval()

    cum_loss = 0.
    cum_tgt_words = 0.

    # no_grad() signals backend to throw away all gradients
    with torch.no_grad():
        for src_sents, tgt_sents in batch_iter(dev_data, batch_size):
            loss = -model(src_sents, tgt_sents).sum()

            cum_loss += float(loss)
            tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents)
            cum_tgt_words += tgt_word_num_to_predict

        ppl = np.exp(cum_loss / cum_tgt_words)

    if was_training:
        model.train()

    return ppl
Ejemplo n.º 21
0
def evaluate_ppl(model, dev_data, batch_size=32):
    """ Evaluate perplexity on dev sentences
    @param model (NMT): NMT Model
    @param dev_data (list of (src_sent, tgt_sent)): list of tuples containing source and target sentence
    @param batch_size (batch size)
    @returns ppl (perplixty on dev sentences)
    """
    was_training = model.training
    model.eval()

    cum_loss = 0.
    cum_examples = 0.
    cum_tgt_words = 0.

    # no_grad() signals backend to throw away all gradients
    with torch.no_grad():
        for src_sents, tgt_sents in batch_iter(dev_data, batch_size):
            loss = -model(src_sents, tgt_sents).sum()

            cum_loss += loss.item()
            cum_examples += batch_size
            tgt_word_num_to_predict = sum(
                len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            cum_tgt_words += tgt_word_num_to_predict

        ppl = np.exp(cum_loss / cum_tgt_words)

    if was_training:
        model.train()

    return ppl
Ejemplo n.º 22
0
def train(model, train_data, val_data, args):
    model.train()
    optimizer = torch.optim.SGD(model.parameters(), args.lr)
    loss_fn = nn.CrossEntropyLoss()

    for epoch in range(args.max_epochs):
        optimizer.zero_grad()

        train_iter = 0
        for contexts, words, senses in batch_iter(train_data,
                                                  args.batch_size,
                                                  shuffle=True):
            # forward pass
            scores = model(contexts, words)
            example_losses = loss_fn(scores, senses)

            batch_loss = example_losses.sum()
            loss = batch_loss / args.batch_size

            # backprop and weight update
            loss.backward()
            # gradient clipping
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       args.clip_grad)
            optimizer.step()

            if train_iter % args.print_iter == 0:
                print("Epoch {}, iter {}: loss {}".format(
                    epoch, train_iter, loss))

            if train_iter % args.val_iter == 0:
                validate(model, val_data, epoch, train_iter)

            train_iter += 1
Ejemplo n.º 23
0
    def evaluate_ppl(self, dev_data, batch_size: int=32):
        """
        Evaluate perplexity on dev sentences

        Args:
            dev_data: a list of dev sentences : List[Any]
            batch_size: batch size
        
        Returns:
            ppl: the perplexity on dev sentences
        """

        cum_loss = 0.
        cum_tgt_words = 0.

        # you may want to wrap the following code using a context manager provided
        # by the NN library to signal the backend to not to keep gradient information
        # e.g., `torch.no_grad()`

        for src_sents, tgt_sents in batch_iter(dev_data, batch_size):
            # loss = -model(src_sents, tgt_sents).sum()
            src_encodings, decoder_init_state = self.encode(src_sents)
            loss = self.decode(src_encodings, decoder_init_state, tgt_sents).sum()

            cum_loss += loss
            tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents)  # omitting the leading `<s>`
            cum_tgt_words += tgt_word_num_to_predict

        ppl = np.exp(cum_loss / cum_tgt_words)

        return ppl
Ejemplo n.º 24
0
    def evaluate_ppl(self, dev_data, batch_size=32):
        """
        Evaluate perplexity on dev sentences

        Args:
            dev_data: a list of dev sentences
            batch_size: batch size

        Returns:
            ppl: the perplexity on dev sentences
        """

        cum_loss = 0.
        cum_tgt_words = 0.

        # you may want to wrap the following code using a context manager provided
        # by the NN library to signal the backend to not to keep gradient information
        # e.g., `torch.no_grad()`
        with torch.no_grad():

            for src_sents, tgt_sents in batch_iter(dev_data, batch_size):
                loss = self.forward(src_sents, tgt_sents, is_training = False)

                cum_loss += loss.item()
                tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents)  # omitting the leading `<s>`
                cum_tgt_words += tgt_word_num_to_predict

            ppl = np.exp(cum_loss / cum_tgt_words)

        return ppl
Ejemplo n.º 25
0
def evaluate(args, criterion, model, dev_data, vocab):
    model.eval()
    total_loss = 0.
    total_step = 0.
    preds = None
    out_label_ids = None
    with torch.no_grad():  #不需要更新模型,不需要梯度
        for src_sents, labels in batch_iter(dev_data, args.train_batch_size):
            src_sents = split_sents(src_sents, vocab, args.device)
            logits = model(src_sents)
            labels = torch.tensor(labels, device=args.device)
            example_losses = criterion(logits, labels)

            total_loss += example_losses.item()
            total_step += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = labels.detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids,
                                          labels.detach().cpu().numpy(),
                                          axis=0)
            torch.cuda.empty_cache()

    preds = np.argmax(preds, axis=1)
    result = acc_and_f1(preds, out_label_ids)
    model.train()
    print("Evaluation loss", total_loss / total_step)
    print('Evaluation result', result)
    return total_loss / total_step, result
Ejemplo n.º 26
0
def test(args, criterion, model, te_data, vocab):
    model.eval()
    #total_loss = 0.
    #total_step = 0.
    preds = None
    #out_label_ids = None
    #不需要更新模型,不需要梯度
    with torch.no_grad():
        for src_sents in batch_iter(te_data,
                                    args.test_batch_size,
                                    test_batch=True):
            src_sents = split_sents(src_sents, vocab, args.device)
            logits = model(src_sents)
            #labels = torch.tensor(labels,device=args.device)
            #example_losses = criterion(logits,labels)

            #total_loss += example_losses.item()
            #total_step += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                #out_label_ids = labels.detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                #out_label_ids = np.append(out_label_ids, labels.detach().cpu().numpy(), axis=0)
            torch.cuda.empty_cache()

    #preds = np.argmax(preds, axis=1)
    #result = acc_and_f1(preds, out_label_ids)
    #model.train()
    #print("Evaluation loss", total_loss/total_step)
    #print('Evaluation result', result)
    return preds
Ejemplo n.º 27
0
def evaluate_ppl(model, dev_data, batch_size=32):
    """ 在验证集上评估困惑度
    @param model (NMT): NMT 模型
    @param dev_data (list of (src_sent, tgt_sent)): 元组列表,包含源句子和目标句子
    @param batch_size (batch size)
    @returns ppl (验证集上的困惑度)
    """
    was_training = model.training
    model.eval()

    cum_loss = 0.
    cum_tgt_words = 0.

    # no_grad() signals backend to throw away all gradients
    with torch.no_grad():
        for src_sents, tgt_sents in batch_iter(dev_data, batch_size):
            loss = -model(src_sents, tgt_sents).sum()

            cum_loss += loss.item()
            tgt_word_num_to_predict = sum(
                len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            cum_tgt_words += tgt_word_num_to_predict

        ppl = np.exp(cum_loss / cum_tgt_words)  # 困惑度=exp(累积损失/累积词数)

    if was_training:
        model.train()

    return ppl
Ejemplo n.º 28
0
def main():
    """ Main func.
    """
    # args = '1d'

    # Check Python & PyTorch Versions
    assert (sys.version_info >= (3, 5)), "Please update your installation of Python to version >= 3.5"
    # assert(torch.__version__ == "1.0.0"), "Please update your installation of PyTorch. You have {} and you should have version 1.0.0".format(torch.__version__)

    # Seed the Random Number Generators
    seed = 1234
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed * 13 // 7)

    # Load training data & vocabulary
    train_data_src = read_corpus('/Users/Pannu/Desktop/Python/AI/NLP/CS224N-2019-master/Assignment/a4/sanity_check_en_es_data/train_sanity_check.es', 'src')
    train_data_tgt = read_corpus('/Users/Pannu/Desktop/Python/AI/NLP/CS224N-2019-master/Assignment/a4/sanity_check_en_es_data/train_sanity_check.en', 'tgt')
    train_data = list(zip(train_data_src, train_data_tgt))

    for src_sents, tgt_sents in batch_iter(train_data, batch_size=BATCH_SIZE, shuffle=True):
        src_sents = src_sents
        tgt_sents = tgt_sents
        break
    vocab = Vocab.load('/Users/Pannu/Desktop/Python/AI/NLP/CS224N-2019-master/Assignment/a4/sanity_check_en_es_data/vocab_sanity_check.json') 

    # Create NMT Model
    model = NMT(
        embed_size=EMBED_SIZE,
        hidden_size=HIDDEN_SIZE,
        dropout_rate=DROPOUT_RATE,
        vocab=vocab)
Ejemplo n.º 29
0
def cal_dev_loss(model, dev_data, batch_size, sent_vocab, tag_vocab, device):
    """ Calculate loss on the development data
    Args:
        model: the model being trained
        dev_data: development data
        batch_size: batch size
        sent_vocab: sentence vocab
        tag_vocab: tag vocab
        device: torch.device on which the model is trained
    Returns:
        the average loss on the dev data
    """
    is_training = model.training
    model.eval()
    loss, n_sentences = 0, 0
    with torch.no_grad():
        for sentences, tags in utils.batch_iter(dev_data,
                                                batch_size,
                                                shuffle=False):
            sentences, sent_lengths = utils.pad(sentences,
                                                sent_vocab[sent_vocab.PAD],
                                                device)
            tags, _ = utils.pad(tags, tag_vocab[sent_vocab.PAD], device)
            batch_loss = model(sentences, tags, sent_lengths)  # shape: (b,)
            loss += batch_loss.sum().item()
            n_sentences += len(sentences)
    model.train(is_training)
    return loss / n_sentences
Ejemplo n.º 30
0
def evaluate(model, data, batch_size):
    """
    Evaluate the model on the data
    @param model (AvgSim): AvgSim Model
    @param data (list[tuple(sent1, sent2, score)]): list of sent_pairs, sim_score
    @param batch_size (int): batch size
    @return mean_loss (float): MSE loss on the scores_pred vs scores
    @return corr (float): correlation b/w scores_pred vs scores
    """
    was_training = model.training
    model.eval()

    total_loss = .0
    cum_scores = []
    cum_scores_pred = []
    with torch.no_grad():
        for sents1, sents2, scores in batch_iter(data,
                                                 batch_size,
                                                 shuffle=False,
                                                 result=True):
            scores = torch.tensor(scores, dtype=torch.float, device=device)
            scores_pred = model(sents1, sents2)
            loss = F.mse_loss(scores_pred, scores, reduction='sum')
            total_loss += loss.item()

            cum_scores.extend(scores.tolist())
            cum_scores_pred.extend(scores_pred.tolist())

    mean_loss = total_loss / len(data)
    corr, p_val = pearsonr(cum_scores_pred, cum_scores)

    if was_training:
        model.train()

    return mean_loss, corr
Ejemplo n.º 31
0
 def evaluate(self, dataset, batch_size, is_devset=True):
     accuracies = []
     for words, labels in batch_iter(dataset, batch_size):
         feed_dict = self._get_feed_dict(words, labels, lr=None, is_train=False)
         accuracy = self.sess.run(self.accuracy, feed_dict=feed_dict)
         accuracies.append(accuracy)
     acc = np.mean(accuracies) * 100
     self.logger.info("Testing model over {} dataset: accuracy - {:05.3f}".format('DEVELOPMENT' if is_devset else
                                                                                  'TEST', acc))
     return acc
Ejemplo n.º 32
0
def evaluate_ppl(model, dev_data, batch_size=32):
    """
    Evaluate perplexity on dev sentences

    Args:
        dev_data: a list of dev sentences
        batch_size: batch size

    Returns:
        ppl: the perplexity on dev sentences
    """

    was_training = model.training
    model.eval()

    cum_loss = 0.
    cum_tgt_words = 0.

    # you may want to wrap the following code using a context manager provided
    # by the NN library to signal the backend to not to keep gradient information
    # e.g., `torch.no_grad()`

    with torch.no_grad():
        for src_sents, tgt_sents in batch_iter(dev_data, batch_size):
            loss = -model(src_sents, tgt_sents).sum()

            cum_loss += loss.item()
            tgt_word_num_to_predict = sum(len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            cum_tgt_words += tgt_word_num_to_predict

        ppl = np.exp(cum_loss / cum_tgt_words)

    if was_training:
        model.train()

    return ppl
Ejemplo n.º 33
0
 def train(self, trainset, devset, testset, batch_size=64, epochs=50, shuffle=True):
     self.logger.info('Start training...')
     init_lr = self.cfg.lr  # initial learning rate, used for decay learning rate
     best_score = 0.0  # record the best score
     best_score_epoch = 1  # record the epoch of the best score obtained
     no_imprv_epoch = 0  # no improvement patience counter
     for epoch in range(self.start_epoch, epochs + 1):
         self.logger.info('Epoch %2d/%2d:' % (epoch, epochs))
         progbar = Progbar(target=(len(trainset) + batch_size - 1) // batch_size)  # number of batches
         if shuffle:
             np.random.shuffle(trainset)  # shuffle training dataset each epoch
         # training each epoch
         for i, (words, labels) in enumerate(batch_iter(trainset, batch_size)):
             feed_dict = self._get_feed_dict(words, labels, lr=self.cfg.lr, is_train=True)
             _, train_loss = self.sess.run([self.train_op, self.loss], feed_dict=feed_dict)
             progbar.update(i + 1, [("train loss", train_loss)])
         if devset is not None:
             self.evaluate(devset, batch_size)
         cur_score = self.evaluate(testset, batch_size, is_devset=False)
         # learning rate decay
         if self.cfg.decay_lr:
             self.cfg.lr = init_lr / (1 + self.cfg.lr_decay * epoch)
         # performs model saving and evaluating on test dataset
         if cur_score > best_score:
             no_imprv_epoch = 0
             self.save_session(epoch)
             best_score = cur_score
             best_score_epoch = epoch
             self.logger.info(' -- new BEST score on TEST dataset: {:05.3f}'.format(best_score))
         else:
             no_imprv_epoch += 1
             if no_imprv_epoch >= self.cfg.no_imprv_patience:
                 self.logger.info('early stop at {}th epoch without improvement for {} epochs, BEST score: '
                                  '{:05.3f} at epoch {}'.format(epoch, no_imprv_epoch, best_score, best_score_epoch))
                 break
     self.logger.info('Training process done...')
Ejemplo n.º 34
0
def train(args: Dict):
    train_data_src = read_corpus(args['--train-src'], source='src')
    train_data_tgt = read_corpus(args['--train-tgt'], source='tgt')

    dev_data_src = read_corpus(args['--dev-src'], source='src')
    dev_data_tgt = read_corpus(args['--dev-tgt'], source='tgt')

    train_data = list(zip(train_data_src, train_data_tgt))
    dev_data = list(zip(dev_data_src, dev_data_tgt))

    train_batch_size = int(args['--batch-size'])
    clip_grad = float(args['--clip-grad'])
    valid_niter = int(args['--valid-niter'])
    log_every = int(args['--log-every'])
    model_save_path = args['--save-to']

    vocab = Vocab.load(args['--vocab'])

    model = NMT(embed_size=int(args['--embed-size']),
                hidden_size=int(args['--hidden-size']),
                dropout_rate=float(args['--dropout']),
                input_feed=args['--input-feed'],
                label_smoothing=float(args['--label-smoothing']),
                vocab=vocab)
    model.train()

    uniform_init = float(args['--uniform-init'])
    if np.abs(uniform_init) > 0.:
        print('uniformly initialize parameters [-%f, +%f]' % (uniform_init, uniform_init), file=sys.stderr)
        for p in model.parameters():
            p.data.uniform_(-uniform_init, uniform_init)

    vocab_mask = torch.ones(len(vocab.tgt))
    vocab_mask[vocab.tgt['<pad>']] = 0

    device = torch.device("cuda:0" if args['--cuda'] else "cpu")
    print('use device: %s' % device, file=sys.stderr)

    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr']))

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cum_tgt_words = report_tgt_words = 0
    cum_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    print('begin Maximum Likelihood training')

    while True:
        epoch += 1

        for src_sents, tgt_sents in batch_iter(train_data, batch_size=train_batch_size, shuffle=True):
            train_iter += 1

            optimizer.zero_grad()

            batch_size = len(src_sents)

            # (batch_size)
            example_losses = -model(src_sents, tgt_sents)
            batch_loss = example_losses.sum()
            loss = batch_loss / batch_size

            loss.backward()

            # clip gradient
            grad_norm = torch.nn.utils.clip_grad_norm(model.parameters(), clip_grad)

            optimizer.step()

            batch_losses_val = batch_loss.item()
            report_loss += batch_losses_val
            cum_loss += batch_losses_val

            tgt_words_num_to_predict = sum(len(s[1:]) for s in tgt_sents)  # omitting leading `<s>`
            report_tgt_words += tgt_words_num_to_predict
            cum_tgt_words += tgt_words_num_to_predict
            report_examples += batch_size
            cum_examples += batch_size

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg. loss %.2f, avg. ppl %.2f ' \
                      'cum. examples %d, speed %.2f words/sec, time elapsed %.2f sec' % (epoch, train_iter,
                                                                                         report_loss / report_examples,
                                                                                         math.exp(report_loss / report_tgt_words),
                                                                                         cum_examples,
                                                                                         report_tgt_words / (time.time() - train_time),
                                                                                         time.time() - begin_time), file=sys.stderr)

                train_time = time.time()
                report_loss = report_tgt_words = report_examples = 0.

            # perform validation
            if train_iter % valid_niter == 0:
                print('epoch %d, iter %d, cum. loss %.2f, cum. ppl %.2f cum. examples %d' % (epoch, train_iter,
                                                                                         cum_loss / cum_examples,
                                                                                         np.exp(cum_loss / cum_tgt_words),
                                                                                         cum_examples), file=sys.stderr)

                cum_loss = cum_examples = cum_tgt_words = 0.
                valid_num += 1

                print('begin validation ...', file=sys.stderr)

                # compute dev. ppl and bleu
                dev_ppl = evaluate_ppl(model, dev_data, batch_size=128)   # dev batch size can be a bit larger
                valid_metric = -dev_ppl

                print('validation: iter %d, dev. ppl %f' % (train_iter, dev_ppl), file=sys.stderr)

                is_better = len(hist_valid_scores) == 0 or valid_metric > max(hist_valid_scores)
                hist_valid_scores.append(valid_metric)

                if is_better:
                    patience = 0
                    print('save currently the best model to [%s]' % model_save_path, file=sys.stderr)
                    model.save(model_save_path)

                    # also save the optimizers' state
                    torch.save(optimizer.state_dict(), model_save_path + '.optim')
                elif patience < int(args['--patience']):
                    patience += 1
                    print('hit patience %d' % patience, file=sys.stderr)

                    if patience == int(args['--patience']):
                        num_trial += 1
                        print('hit #%d trial' % num_trial, file=sys.stderr)
                        if num_trial == int(args['--max-num-trial']):
                            print('early stop!', file=sys.stderr)
                            exit(0)

                        # decay lr, and restore from previously best checkpoint
                        lr = optimizer.param_groups[0]['lr'] * float(args['--lr-decay'])
                        print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr)

                        # load model
                        params = torch.load(model_save_path, map_location=lambda storage, loc: storage)
                        model.load_state_dict(params['state_dict'])
                        model = model.to(device)

                        print('restore parameters of the optimizers', file=sys.stderr)
                        optimizer.load_state_dict(torch.load(model_save_path + '.optim'))

                        # set new lr
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr

                        # reset patience
                        patience = 0

                if epoch == int(args['--max-epoch']):
                    print('reached maximum number of epochs!', file=sys.stderr)
                    exit(0)