Exemple #1
0
    def _train(self, ts, dropout, batchsz, model, sess, word_keep):

        start_time = time.time()

        steps = int(math.floor(len(ts) / float(batchsz)))

        shuffle = np.random.permutation(np.arange(steps))

        total_loss = total_err = total_sum = 0

        for i in range(steps):
            si = shuffle[i]
            ts_i = batch(ts, si, batchsz)
            feed_dict = model.ex2dict(ts_i, 1.0 - dropout, True, word_keep)

            _, step, summary_str, lossv = sess.run(
                [self.train_op, self.global_step, self.summary_op, self.loss],
                feed_dict=feed_dict)
            self.train_writer.add_summary(summary_str, step)

            total_loss += lossv

        duration = time.time() - start_time
        print('Train (Loss %.4f) (%.3f sec)' %
              (float(total_loss) / len(ts), duration))
Exemple #2
0
    def _train(self, ts, dropout, batchsz, model, sess, word_keep):

        start_time = time.time()

        steps = int(math.floor(len(ts) / float(batchsz)))

        shuffle = np.random.permutation(np.arange(steps))

        total_loss = total_err = total_sum = 0

        for i in range(steps):
            si = shuffle[i]
            ts_i = batch(ts, si, batchsz)

            if self.drop_words > 0.0:
                word_probs = np.random.random(ts_i['x'].shape)
                drop_indices = np.where((word_probs > self.drop_words)
                                        & (ts_i['x'] != self.vocab['<PAD>']))
                ts_i['x'][drop_indices[0],
                          drop_indices[1]] = self.vocab['<OOV>']

            feed_dict = model.ex2dict(ts_i, 1.0 - dropout, True, word_keep)

            _, step, summary_str, lossv = sess.run(
                [self.train_op, self.global_step, self.summary_op, model.loss],
                feed_dict=feed_dict)
            self.train_writer.add_summary(summary_str, step)

            total_loss += lossv

        duration = time.time() - start_time
        print('Train (Loss %.4f) (%.3f sec)' %
              (float(total_loss) / len(ts), duration))
Exemple #3
0
    def test(self, ts, batchsz=1, phase='Test', conll_file=None, txts=None):

        total_correct = total_sum = fscore = 0
        total_gold_count = total_guess_count = total_overlap_count = 0
        start_time = time.time()

        steps = int(math.floor(len(ts) / float(batchsz)))

        # Only if they provide a file and the raw txts, we can write CONLL file
        handle = None
        if conll_file is not None and txts is not None:
            handle = open(conll_file, "w")

        # total_cm = np.ndarray((len(self.idx2label), len(self.idx2label)))
        for i in range(steps):
            ts_i = batch(ts, i, batchsz)
            correct, count, overlaps, golds, guesses = self._batch(
                ts_i, handle, txts)
            total_correct += correct
            total_sum += count
            total_gold_count += golds
            total_guess_count += guesses
            total_overlap_count += overlaps
            # total_cm += cm

        duration = time.time() - start_time
        total_acc = total_correct / float(total_sum)

        # Only show the fscore if requested
        if self.fscore > 0:
            fscore = fScore(total_overlap_count, total_gold_count,
                            total_guess_count, self.fscore)
            print('%s (F%d = %.4f) (Acc %d/%d = %.4f) (%.3f sec)' %
                  (phase, self.fscore, float(fscore), total_correct, total_sum,
                   total_acc, duration))

            # #show label specific metrics
            # for i in len(self.idx2label.keys()):
            #     label = self.idx2label[i]
            #     not_i = [x for x in self.idx2label.keys() if x != i]
            #     tp = total_cm[i,i]
            #     fp = np.sum(total_cm[i, not_i])
            #     fn = np.sum(total_cm[not_i, i])

            #     fscore_l = fScore(tp, tp+fn, tp+fp, 1)

            #     print ('%s -- %s (F%d = %.4f)' % (
            #         phase, label, self.fscore, fscore_l))

        else:
            print('%s (Acc %d/%d = %.4f) (%.3f sec)' %
                  (phase, total_correct, total_sum, total_acc, duration))

        if handle is not None:
            handle.close()

        return total_acc, fscore
Exemple #4
0
    def test(self, ts, batchsz=1, phase='Test', conll_file=None, txts=None):

        total_correct = total_sum = fscore = 0
        total_gold_count = total_guess_count = total_overlap_count = 0
        start_time = time.time()
    
        steps = int(math.floor(len(ts)/float(batchsz)))

        # Only if they provide a file and the raw txts, we can write CONLL file
        handle = None
        if conll_file is not None and txts is not None:
            handle = open(conll_file, "w")

        for i in range(steps):
            ts_i = batch(ts, i, batchsz)
            correct, count, overlaps, golds, guesses = self._batch(ts_i, handle, txts)
            total_correct += correct
            total_sum += count
            total_gold_count += golds
            total_guess_count += guesses
            total_overlap_count += overlaps

        duration = time.time() - start_time
        total_acc = total_correct / float(total_sum)

        # Only show the fscore if requested
        if self.fscore > 0:
            fscore = fScore(total_overlap_count,
                            total_gold_count,
                            total_guess_count,
                            self.fscore)
            print('%s (F%d = %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % 
                  (phase,
                   self.fscore,
                   float(fscore),
                   total_correct,
                   total_sum,
                   total_acc,
                   duration))
                        
        else:
            print('%s (Acc %d/%d = %.4f) (%.3f sec)' %
                  (phase,
                   total_correct,
                   total_sum, total_acc,
                   duration))

        if handle is not None:
            handle.close()

        return total_acc, fscore