Example #1
0
    def run_evaluate(self, sess, test, tags):
        """
        Evaluates performance on test set
        Args:
            sess: tensorflow session
            test: dataset that yields tuple of sentences, tags
            tags: {tag: index} dictionary
        Returns:
            accuracy
            f1 score
        """
        accs = []
        correct_preds, total_correct, total_preds = 0., 0., 0.
        for words, labels in minibatches(test, self.config.batch_size):
            labels_pred, sequence_lengths = self.predict_batch(sess, words)

            for lab, lab_pred, length in zip(labels, labels_pred,
                                             sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                accs += [a == b for (a, b) in zip(lab, lab_pred)]
                lab_chunks = set(get_chunks(lab, tags, self.config.DEFAULT))
                lab_pred_chunks = set(
                    get_chunks(lab_pred, tags, self.config.DEFAULT))
                correct_preds += len(lab_chunks & lab_pred_chunks)
                total_preds += len(lab_pred_chunks)
                total_correct += len(lab_chunks)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        acc = np.mean(accs)
        return acc, f1
Example #2
0
    def run_epoch(self, sess, train, dev, tags, epoch):
        """
        Performs one complete pass over the train set and evaluate on dev
        Args:
            sess: tensorflow session
            train: dataset that yields tuple of sentences, tags
            dev: dataset
            tags: {tag: index} dictionary
            epoch: (int) number of the epoch
        """
        nbatches = (
            len(train) + self.config.batch_size - 1) // self.config.batch_size
        prog = Progbar(target=nbatches)
        for i, (words, labels
                ) in enumerate(minibatches(train, self.config.batch_size)):
            fd, _ = self.get_feed_dict(words, labels, self.config.LR,
                                       self.config.dropout)

            _, train_loss, summary = sess.run(
                [self.train_op, self.loss, self.merged], feed_dict=fd)

            prog.update(i + 1, [("train loss", train_loss)])

            # tensorboard
            if i % 10 == 0:
                self.file_writer.add_summary(summary, epoch * nbatches + i)

        acc, f1 = self.run_evaluate(sess, dev, tags)
        self.logger.info(
            "- dev acc {:04.2f} - f1 {:04.2f}".format(100 * acc, 100 * f1))
        return acc, f1
Example #3
0
def train(model, training_data, optimizer):
    model.train()

    num_batches = math.ceil(len(training_data) / args.batch_size)
    bar = utils.Progbar(target=num_batches)
    train_loss = 0.0
    train_total_instances = 0

    for batch_id, batch in enumerate(
            utils.minibatches(training_data, args.batch_size)):
        model.zero_grad()

        for sentence, tags in batch:
            sentence_in = processor.tensor(sentence)
            targets = processor.tensor(tags)

            loss = model.neg_log_likelihood(sentence_in, targets)
            loss.backward()

            train_loss += loss
            train_total_instances += 1

        optimizer.step()

        bar.update(batch_id + 1,
                   exact=[("train loss", train_loss / train_total_instances)])

    if args.save_checkpoint:
        save_model(model)
Example #4
0
def evaluate(model, eval_data, dataset_name):
    model.eval()

    num_batches = math.ceil(len(eval_data) / args.batch_size)
    bar = utils.Progbar(target=num_batches)
    eval_score = 0.0
    eval_total_instances = 0
    eval_total_characters = 0
    eval_correct_characters = 0

    with torch.no_grad():
        for batch_id, batch in enumerate(
                utils.minibatches(eval_data, args.batch_size)):
            for sentence, tags in batch:
                score, tag_out = model(processor.tensor(sentence))
                if len(tag_out) != len(tags):
                    raise IndexError(
                        'Size of output tag sequence differs from that of reference.'
                    )
                length = len(tags)
                correct = [
                    tag_out[i] == tags[i] for i in range(1, length - 1)
                ].count(1)
                eval_score += score
                eval_total_instances += 1
                eval_total_characters += length
                eval_correct_characters += correct
            bar.update(batch_id + 1,
                       exact=[("eval score", eval_score / eval_total_instances)
                              ])

        logger.info('{} dataset accuracy: {}'.format(
            dataset_name, eval_correct_characters / eval_total_characters))
Example #5
0
 def run(self, data):
     predicts = []
     bar = ProgressBar(max_value=len(data) // 1024 + 1)
     for batch_data in bar(utils.minibatches(data, 1024, False)):
         predict = self.model.eval_step(self.sess, batch_data)
         predicts.extend(predict)
     print 'The model is finished!'
     return predicts
Example #6
0
def test():
    x, y = utils.read_file(is_train=True, label_list=['人类作者', '自动摘要'])
    x = utils.process(x)
    x = utils.truncation(x)
    word2id, id2word, tag2id, id2tag = utils.build_vocab(x, y, min_df=10)

    x = utils.build_x_ids(x, word2id)
    y = utils.build_y_ids(y, tag2id)

    data = zip(x, y)

    train_data, dev_data = train_test_split(data,
                                            test_size=10000,
                                            random_state=24)

    vocab_size = len(word2id)
    emb_dim = 100
    num_classes = len(tag2id)

    print "训练集数据大小:%d 验证集数据大小:%d" % (len(train_data), len(dev_data))
    print "vocab_size:%d num_classes:%d" % (vocab_size, num_classes)
    print FLAGS.model_name

    model_dir = os.path.join('temp', 'nn')
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)

    with tf.Session() as sess:
        model = getattr(models, FLAGS.model_name)(vocab_size, emb_dim,
                                                  num_classes)
        saver = tf.train.Saver(tf.global_variables())
        model_file = os.path.join('temp', 'nn', FLAGS.model_file)
        saver.restore(sess, model_file)
        print "Restore model from %s" % model_file

        dev_loss = []
        labels = []
        predicts = []
        bar = ProgressBar(max_value=len(dev_data) // FLAGS.batch_size + 1)
        for batch_data in bar(
                utils.minibatches(dev_data,
                                  FLAGS.batch_size,
                                  True,
                                  shuffle=False)):
            loss, predict = model.dev_step(sess, batch_data)
            dev_loss.append(loss)
            labels.extend(batch_data[1])
            predicts.extend(predict)
        dev_loss = np.mean(dev_loss)
        dev_f1 = utils.score_all(labels, predicts, tag2id)
        utils.error_print(predicts, labels, id2tag, zip(*dev_data)[0], id2word)
        print "loss:%.3f f1:%.3f" % (dev_loss, dev_f1)
Example #7
0
    def run(self, sess, train, dev, epoch):
        nbatches = (len(train) + self.config.batch_size -
                    1) / self.config.batch_size
        for i, (word_ids,
                labels) in enumerate(minibatches(train,
                                                 self.config.batch_size)):
            feed, sequence_lengths = self.get_feed(word_ids=word_ids,
                                                   batch_id=i *
                                                   self.config.batch_size,
                                                   labels=labels,
                                                   lr=self.config.lr,
                                                   dropout=self.config.dropout,
                                                   training=True)
            #print (np.asarray(feed[self.word_feats]))
            _, train_loss = sess.run([self.train_, self.loss], feed_dict=feed)
            self.logger.info("Train loss: %f" % train_loss)

        acc, f05 = self.performance_eval(sess, dev, is_dev=True)
        self.logger.info("dev accuracy: %f, f05: %f" % (acc, f05))
        return acc, f05
Example #8
0
 def run_infer(self, sess, test, tags):
     """
     Evaluates performance on test set
     Args:
         sess: tensorflow session
         test: dataset that yields tuple of sentences, tags
         tags: {tag: index} dictionary
     Returns:
         accuracy
         f1 score
     """
     infer_res = open(self.config.infer_filename, 'w', encoding="utf-8-sig")
     accs = []
     correct_preds, total_correct, total_preds = 0., 0., 0.
     for words, labels in minibatches(test, self.config.batch_size):
         words_copy = copy.deepcopy(words)
         labels_pred, sequence_lengths = self.predict_batch(sess, words)
         # print("predict_batch", labels_pred, sequence_lengths,words_copy)
         if self.config.chars:
             _, words_res = zip(*words_copy)
         else:
             words_res = words_copy
         for word_res, lab, lab_pred, length in zip(
                 words_res, labels, labels_pred, sequence_lengths):
             lab = lab[:length]
             lab_pred = lab_pred[:length]
             # print("idx_restore", word_res, lab, lab_pred)
             infer_res.write(self.idx_restore(word_res, lab, lab_pred))
             accs += [a == b for (a, b) in zip(lab, lab_pred)]
             lab_chunks = set(get_chunks(lab, tags, self.config.DEFAULT))
             lab_pred_chunks = set(
                 get_chunks(lab_pred, tags, self.config.DEFAULT))
             correct_preds += len(lab_chunks & lab_pred_chunks)
             total_preds += len(lab_pred_chunks)
             total_correct += len(lab_chunks)
     infer_res.close()
     p = correct_preds / total_preds if correct_preds > 0 else 0
     r = correct_preds / total_correct if correct_preds > 0 else 0
     f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
     acc = np.mean(accs)
     return acc, f1
Example #9
0
    def performance_eval(self, sess, test, is_dev=True):
        accs, all_labels, all_labels_pred = [], [], []
        main_predicted_count, main_total_count, main_correct_count = 0., 0., 0.
        for i, (word_ids,
                labels) in enumerate(minibatches(test,
                                                 self.config.batch_size)):
            labels_pred, sequence_lengths = self.predict_batch(
                sess,
                word_ids,
                i * self.config.batch_size,
                is_dev=is_dev,
                is_training=False)

            all_labels_pred.append(labels_pred)
            all_labels.append(labels)

            for lab, lab_pred, length in zip(labels, labels_pred,
                                             sequence_lengths):
                lab = lab[:length]
                lab_pred = lab_pred[:length]
                accs += map(lambda (a, b): a == b, zip(lab, lab_pred))
                main_predicted_count += sum(
                    map(lambda (a): a == self.config.main_label, lab_pred))
                main_total_count += sum(
                    map(lambda (a): a == self.config.main_label, lab))
                main_correct_count += sum(
                    map(
                        lambda (a, b): (a == self.config.main_label) *
                        (b == self.config.main_label), zip(lab_pred, lab)))

        acc = np.mean(accs)
        p = (float(main_correct_count) / float(main_predicted_count)) if (
            main_predicted_count > 0) else 0.0
        r = (float(main_correct_count) /
             float(main_total_count)) if (main_total_count > 0) else 0.0
        f = (2.0 * p * r / (p + r)) if (p + r > 0.0) else 0.0
        f05 = ((1 + 0.5 * 0.5) * p * r /
               ((0.5 * 0.5 * p) + r)) if (p + r > 0.0) else 0.0

        return acc, f05
Example #10
0
    def cost(self, X, minibatch_size=20):
        if self.phase == 0:
            val = [0, 0]
        else:
            val = 0

        data_size = X.shape[0]
        for Xb in utils.minibatches(minibatch_size, X, shuffle_f=False):
            eb = np.asarray(np.random.randn(Xb.shape[0], self.n_hidden),
                            dtype=theano.config.floatX)
            zb = np.asarray(np.random.randn(Xb.shape[0], self.n_hidden),
                            dtype=theano.config.floatX)
            if self.phase == 0:
                c = self.early_cost_func(Xb, eb, zb)
                val[0] += c[0] * float(Xb.shape[0]) \
                          / float(data_size)
                val[1] += c[1] * float(Xb.shape[1]) \
                          / float(data_size)
            else:
                val += self.final_cost_func( Xb, eb, zb ) * float(Xb.shape[0]) \
                       / float(data_size)

        return val
Example #11
0
    def cost( self, X, minibatch_size = 20 ):
        if self.phase == 0:
            val = [0,0]
        else:
            val = 0

        data_size = X.shape[0]
        for Xb in utils.minibatches( minibatch_size, X, shuffle_f=False ):        
            eb = np.asarray( np.random.randn( Xb.shape[0], self.n_hidden ),
                             dtype = theano.config.floatX )
            zb = np.asarray( np.random.randn( Xb.shape[0], self.n_hidden ),
                             dtype = theano.config.floatX )
            if self.phase == 0:
                c = self.early_cost_func( Xb, eb, zb )
                val[0] += c[0] * float(Xb.shape[0]) \
                          / float(data_size)
                val[1] += c[1] * float(Xb.shape[1]) \
                          / float(data_size)
            else:
                val += self.final_cost_func( Xb, eb, zb ) * float(Xb.shape[0]) \
                       / float(data_size)

        return val
Example #12
0
    logger.info("Number dev instances: {}".format(len(dev_instances)))
    training_total_tokens = 0
    best_f1 = 0.
    for epoch in range(int(options.num_epochs)):
        logger.info("Epoch {} out of {}".format(epoch + 1, options.num_epochs))
        random.shuffle(training_instances)
        train_loss = 0.0
        train_total_instance = 0  # size of trained instances

        if options.dropout > 0:
            model.set_dropout(options.dropout)

        nbatches = (len(training_instances) + options.batch_size - 1) // options.batch_size

        bar = utils.Progbar(target=nbatches)
        for batch_id, batch in enumerate(utils.minibatches(training_instances, options.batch_size)):
            for idx, instance in enumerate(batch):
                if len(instance.sentence) == 0: continue
                train_total_instance += 1

                loss_expr = model.neg_log_loss(instance.sentence, instance.tags)
                # Forward pass
                loss = loss_expr.scalar_value()
                # Do backward pass
                loss_expr.backward()

                # Bail if loss is NaN
                if math.isnan(loss):
                    assert False, "NaN occured"

                train_loss += loss
Example #13
0
def train():
    x, y = utils.load_data(True, True)

    word2id, id2word, tag2id, id2tag = utils.build_vocab(x, y, min_df=20)

    x = utils.build_x_ids(x, word2id)
    y = utils.build_y_ids(y, tag2id)
    data = zip(x, y)

    train_data, dev_data = train_test_split(data,
                                            test_size=10000,
                                            random_state=24)

    #pre_embeddings=utils.load_embeddings(word2id)

    vocab_size = len(word2id)
    emb_dim = 100
    num_classes = len(tag2id)

    print "训练集数据大小:%d 验证集数据大小:%d" % (len(train_data), len(dev_data))
    print "vocab_size:%d num_classes:%d" % (vocab_size, num_classes)
    print FLAGS.model_name

    model_dir = os.path.join('temp', 'nn')
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)

    with tf.Session() as sess:
        model = getattr(models, FLAGS.model_name)(vocab_size, emb_dim,
                                                  num_classes)
        saver = tf.train.Saver(tf.global_variables())
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        # sess.run(model.embeddings.assign(pre_embeddings))
        print "Train start!"

        best_dev_f1 = 0
        best_dev_epoch = 0
        no_improve = 0
        for epoch in range(FLAGS.max_epoch):
            bar = ProgressBar(max_value=len(train_data) // FLAGS.batch_size +
                              1)
            train_loss = []
            labels = []
            predicts = []
            for batch_data in bar(
                    utils.minibatches(train_data, FLAGS.batch_size, True)):
                loss, predict = model.train_step(sess, batch_data)
                train_loss.append(loss)
                labels.extend(batch_data[1])
                predicts.extend(predict)
            train_loss = np.mean(train_loss)
            train_f1 = utils.score_all(labels, predicts, tag2id)
            print "Train epoch %d finished. loss:%.3f f1:%.3f" % (
                epoch, train_loss, train_f1)

            dev_loss = []
            labels = []
            predicts = []
            bar = ProgressBar(max_value=len(train_data) // FLAGS.batch_size +
                              1)
            for batch_data in bar(
                    utils.minibatches(train_data, FLAGS.batch_size, True)):
                loss, predict = model.dev_step(sess, batch_data)
                dev_loss.append(loss)
                labels.extend(batch_data[1])
                predicts.extend(predict)
            dev_loss = np.mean(dev_loss)
            dev_f1 = utils.score_all(labels, predicts, tag2id)
            print "Train epoch %d finished. loss:%.3f f1:%.3f" % (
                epoch, dev_loss, dev_f1)

            dev_loss = []
            labels = []
            predicts = []
            for batch_data in utils.minibatches(dev_data, FLAGS.batch_size,
                                                True):
                loss, predict = model.dev_step(sess, batch_data)
                dev_loss.append(loss)
                labels.extend(batch_data[1])
                predicts.extend(predict)
            dev_loss = np.mean(dev_loss)

            dev_f1 = utils.score_all(labels, predicts, tag2id)
            print "Dev epoch %d finished. loss:%.3f f1:%.3f" % (
                epoch, dev_loss, dev_f1)

            if dev_f1 > best_dev_f1:
                best_dev_f1 = dev_f1
                best_dev_epoch = epoch
                no_improve = 0
                saver.save(sess, os.path.join(model_dir, FLAGS.model_file))
                print '保存模型!'
            else:
                no_improve += 1
                if no_improve >= 5:
                    print "停止训练!"
                    break

            print

        print "Best epoch %d  best f1: %.3f" % (best_dev_epoch, best_dev_f1)
Example #14
0
    best_f1 = 0.
    for epoch in range(int(options.num_epochs)):
        logger.info("Epoch {} out of {}".format(epoch + 1, options.num_epochs))
        random.shuffle(training_instances)
        train_loss = 0.0
        train_total_instance = 0  # size of trained instances

        if options.dropout > 0:
            model.set_dropout(options.dropout)

        nbatches = (len(training_instances) + options.batch_size -
                    1) // options.batch_size

        bar = utils.Progbar(target=nbatches)
        for batch_id, batch in enumerate(
                utils.minibatches(training_instances, options.batch_size)):
            for idx, instance in enumerate(batch):
                if len(instance.sentence) == 0: continue
                train_total_instance += 1

                loss_expr = model.neg_log_loss(instance.sentence,
                                               instance.tags)
                # Forward pass
                loss = loss_expr.scalar_value()
                # Do backward pass
                loss_expr.backward()

                # Bail if loss is NaN
                if math.isnan(loss):
                    assert False, "NaN occured"
Example #15
0
def train():
    source_data, target_data, test_data, word2id = utils.load_data()
    embeddings = utils.load_embeddings(word2id)

    random.seed(1)
    random.shuffle(target_data)

    cv_losses = []
    for k in range(1, 11):
        train_data, dev_data = utils.train_dev_split(target_data, k)
        model_file = FLAGS.model_file + str(k)
        print model_file

        print "训练集1数据大小:%d" % len(source_data)
        print "训练集2数据大小:%d" % len(train_data)
        print "验证集数据大小:%d" % len(dev_data)
        print "embedding大小:(%d,%d)" % (embeddings.shape[0],
                                       embeddings.shape[1])

        model_dir = '../model'
        graph = tf.Graph()
        sess = tf.Session(graph=graph)
        with graph.as_default():
            model = getattr(models, FLAGS.model_name)(embeddings)
            saver = tf.train.Saver(tf.global_variables())
            if FLAGS.restore == 1:
                saver.restore(sess, os.path.join(model_dir, FLAGS.model_file))
                print "Restore from pre-trained model"
            else:
                sess.run(tf.global_variables_initializer())
            print "Train start!"

            best_loss = 1e6
            best_epoch = 0
            not_improved = 0
            for epoch in range(FLAGS.max_epoch):

                print epoch, "================================================"
                train_loss = []
                ground_trues = []
                predicts = []

                for batch_data in utils.minibatches2(source_data,
                                                     train_data,
                                                     FLAGS.batch_size,
                                                     ratio=1,
                                                     mode='train'):
                    loss, predict = model.train_step(sess, batch_data[:3],
                                                     batch_data[3])
                    train_loss.extend(loss)
                    predicts.extend(predict)
                    ground_trues.extend(batch_data[2])
                train_loss = utils.loss(ground_trues, train_loss)
                p, r, f1 = utils.score(ground_trues, predicts)
                print "%d-fold Train epoch %d finished. loss:%.4f  p:%.4f r:%.4f f1:%.4f" % (
                    k, epoch, train_loss, p, r, f1)

                valid_loss = []
                ground_trues = []
                predicts = []
                for batch_data in utils.minibatches(dev_data,
                                                    FLAGS.batch_size,
                                                    mode='dev'):
                    loss, predict = model.valid_step(sess, batch_data, 2)
                    valid_loss.extend(loss)
                    predicts.extend(predict)
                    ground_trues.extend(batch_data[2])
                valid_loss = utils.loss(ground_trues, valid_loss)
                p, r, f1 = utils.score(ground_trues, predicts)
                print "%d-fold,Valid epoch %d finished. loss:%.4f  p:%.4f r:%.4f f1:%.4f" % (
                    k, epoch, valid_loss, p, r, f1)

                if valid_loss < best_loss:
                    best_loss = valid_loss
                    best_epoch = epoch
                    not_improved = 0
                    print "save model!"
                    saver.save(sess, os.path.join(model_dir, model_file))
                else:
                    not_improved += 1
                    if not_improved > 4:
                        print "停止训练!"
                        break
                print
            print "Best epoch %d  best loss %.4f" % (best_epoch, best_loss)
            print "#########################################################"
            cv_losses.append(best_loss)
    print "final cv loss: %.4f" % (sum(cv_losses) / len(cv_losses))
Example #16
0
 def run(self,test_data):
     predicts = []
     for batch_data in utils.minibatches(test_data, 128, mode='test'):
         predict =self.model.infer_step(self.sess, batch_data,2)
         predicts.extend(predict)
     return predicts
Example #17
0
    data_train, labels_train, _, _, _, _ = load_mnist_data(
    )  # the data is 55k samples
    widgets = [
        'Training: ',
        Percentage(), ' ',
        AnimatedMarker(markers='←↖↑↗→↘↓↙'), ' ',
        ETA()
    ]
    pbar = ProgressBar(widgets=widgets,
                       maxval=n_epochs * data_train.shape[0] // 32).start()
    i = 0

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        for epoch in range(n_epochs):
            batches = minibatches(data_train, labels_train, batch_size=32)
            for data, _ in batches:
                pbar.update(i)
                i += 1
                random_vectors = urand_vector(shape=(32, 100))
                sess.run(train_gen,
                         feed_dict={
                             inputs: random_vectors,
                             real_images: np.expand_dims(data, 3)
                         })
                sess.run(train_dist,
                         feed_dict={
                             inputs: random_vectors,
                             real_images: np.expand_dims(data, 3)
                         })
        plt.imshow(conv3_out.eval()[0, :, :, 0])
  def run_evaluate(self, sess, test, tags):
    """
    Evaluates performance on test set
    Args:
      sess: tensorflow session
      test: dataset that yields tuple of sentences, tags
      tags: {tag: index} dictionary
    Returns:
      accuracy
      f1 score
    """
    accs = []
    correct_preds, total_correct, total_preds = 0., 0., 0.
    output_file = codecs.open("output", 'w', 'UTF-8')
    idx_to_tag = {idx: tag for tag, idx in tags.items()}
    for words, labels, iob_gold, mention_type_gold, mentions_gold, word_features, char_features in minibatches(test, self.config.batch_size):
      iob_labels_pred, sequence_lengths= self.predict_iob_batch(sess, words, word_features, char_features)
      mentions = []
      mention_sizes = []
      count = 0
      for i in range(self.config.batch_size):
        length = sequence_lengths[i]
        mention = find_mentions(iob_labels_pred[i][:length])
        mentions.append(mention)
        mention_sizes.append(len(mention))
        if len(mention) == 0:
          count += 1
      if count != self.config.batch_size:
        mentions_pred, _ = self.predict_type_batch(sess, words, word_features, char_features, mentions)
      else:
        mentions_pred = [[]]*self.config.batch_size
   
      for lab, iob_pred, length, mention, mention_pred, mention_size in zip(labels, iob_labels_pred, sequence_lengths, mentions, mentions_pred, mention_sizes):
        lab = lab[:length]
        iob_pred = iob_pred[:length]
        mention_pred = mention_pred[:mention_size]
        
        lab_pred = find_labels(iob_pred, mention_pred, tags, self.id2type)
        accs += [a==b for (a, b) in zip(lab, lab_pred)]
        lab_chunks = set(get_chunks(lab, tags))
        lab_pred_chunks = set(get_chunks(lab_pred, tags))
        correct_preds += len(lab_chunks & lab_pred_chunks)
        total_preds += len(lab_pred_chunks)
        total_correct += len(lab_chunks)
        
        output_string = ""
        for b, c in zip(lab, lab_pred):
          split_line = []
          split_line.append(idx_to_tag[b])
          split_line.append(idx_to_tag[c])
          output_string += ' '.join(split_line) + '\n'
        output_file.write(output_string+'\n')

    p = correct_preds / total_preds if correct_preds > 0 else 0
    r = correct_preds / total_correct if correct_preds > 0 else 0
    f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
    acc = np.mean(accs)
    return acc, f1
  def run_epoch(self, sess, train, dev, tags, epoch):
    """
    Performs one complete pass over the train set and evaluate on dev
    Args:
      sess: tensorflow session
      train: dataset that yields tuple of sentences, tags
      dev: dataset
      tags: {tag: index} dictionary
      epoch: (int) number of the epoch
    """
    nbatches = (len(train) + self.config.batch_size - 1) // self.config.batch_size
    total_loss = 0.0
    count = 0
    for i, (words, labels, iob, mention_type, mentions, word_features, char_features) in enumerate(minibatches(train, self.config.batch_size)):
      if len(mentions[0]) == 0:
        fd, _, _ = self.get_feed_dict(words, word_features, char_features, self.config.lr, self.config.dropout, iob)
        logits, _, train_loss= sess.run([self.boundry_logits, self.train_op_boundry, self.loss_a], feed_dict=fd)
      else:
        fd, _, _ = self.get_feed_dict(words, word_features, char_features, self.config.lr, self.config.dropout, iob, mention_type, mentions)
        logits, _, a, b, train_loss= sess.run([self.boundry_logits, self.train_op, self.loss_a, self.loss_b, self.loss], feed_dict=fd)
      total_loss += train_loss
      count += 1
    print total_loss/count

    acc, f1 = self.run_evaluate(sess, dev, tags)
    self.logger.info("- dev acc {:04.2f} - f1 {:04.2f}".format(100*acc, 100*f1))
    return acc, f1
Example #20
0
    validation_acc = []

    ############################################################################
    #                              Train the net                               #
    ############################################################################

    widgets = [
        'Training: ',
        Percentage(), ' ',
        AnimatedMarker(markers='←↖↑↗→↘↓↙'), ' ',
        ETA()
    ]
    pbar = ProgressBar(widgets=widgets, maxval=n_epochs).start()
    d_train, l_train, d_test, l_test, d_val, l_val = load_svhn_data(
        normalize=True)
    batches = minibatches(d_train, l_train, batch_size=batch_size)
    training_step_accuracy = []
    val_accuracy = []
    save_file = "./exercise5.ckpt"
    plt.ion()
    plt.gca().set_ylim([0, 1])
    plt.gca().set_xlim([0, n_epochs / 30])

    with tf.Session() as sess:
        saver = tf.train.Saver()
        if os.path.exists(save_file):
            saver.restore(sess, save_file)
        else:
            sess.run(tf.initialize_all_variables())
        for i in range(n_epochs):
            pbar.update(i)