Beispiel #1
0
    def test_score_batch(self):
        # Network predicts <unk> probability.
        scorer = TextScorer(self.dummy_network)
        word_ids = numpy.arange(6).reshape((3, 2))
        class_ids = numpy.arange(6).reshape((3, 2))
        membership_probs = numpy.ones_like(word_ids).astype('float32')
        mask = numpy.ones_like(word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        assert_almost_equal(logprobs[0],
                            numpy.log(word_ids[1:, 0].astype('float32') / 5))
        assert_almost_equal(logprobs[1],
                            numpy.log(word_ids[1:, 1].astype('float32') / 5))

        # <unk> is removed from the resulting logprobs.
        scorer = TextScorer(self.dummy_network, ignore_unk=True)
        word_ids = numpy.arange(6).reshape((3, 2))
        word_ids[1, 1] = self.vocabulary.word_to_id['<unk>']
        class_ids = numpy.arange(6).reshape((3, 2))
        membership_probs = numpy.ones_like(word_ids).astype('float32')
        mask = numpy.ones_like(word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        assert_almost_equal(logprobs[0],
                            numpy.log(word_ids[1:, 0].astype('float32') / 5))
        assert_almost_equal(logprobs[1],
                            numpy.log(word_ids[2:, 1].astype('float32') / 5))

        # <unk> is assigned a constant logprob.
        scorer = TextScorer(self.dummy_network,
                            ignore_unk=False,
                            unk_penalty=-5)
        word_ids = numpy.arange(6).reshape((3, 2))
        word_ids[1, 1] = self.vocabulary.word_to_id['<unk>']
        class_ids = numpy.arange(6).reshape((3, 2))
        membership_probs = numpy.ones_like(word_ids).astype('float32')
        mask = numpy.ones_like(word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        assert_almost_equal(logprobs[0],
                            numpy.log(word_ids[1:, 0].astype('float32') / 5))
        assert_almost_equal(logprobs[1][0], -5)
        assert_almost_equal(logprobs[1][1],
                            numpy.log(word_ids[2, 1].astype('float32') / 5))
    def test_score_batch(self):
        # Network predicts <unk> probability.
        scorer = TextScorer(self.dummy_network)
        word_ids = numpy.arange(6).reshape((3, 2))
        class_ids = numpy.arange(6).reshape((3, 2))
        membership_probs = numpy.ones_like(word_ids).astype('float32')
        mask = numpy.ones_like(word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        assert_almost_equal(logprobs[0],
                            numpy.log(word_ids[1:,0].astype('float32') / 5))
        assert_almost_equal(logprobs[1],
                            numpy.log(word_ids[1:,1].astype('float32') / 5))

        # <unk> is removed from the resulting logprobs.
        scorer = TextScorer(self.dummy_network, ignore_unk=True)
        word_ids = numpy.arange(6).reshape((3, 2))
        word_ids[1,1] = self.vocabulary.word_to_id['<unk>']
        class_ids = numpy.arange(6).reshape((3, 2))
        membership_probs = numpy.ones_like(word_ids).astype('float32')
        mask = numpy.ones_like(word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        assert_almost_equal(logprobs[0],
                            numpy.log(word_ids[1:,0].astype('float32') / 5))
        assert_almost_equal(logprobs[1],
                            numpy.log(word_ids[2:,1].astype('float32') / 5))

        # <unk> is assigned a constant logprob.
        scorer = TextScorer(self.dummy_network, ignore_unk=False, unk_penalty=-5)
        word_ids = numpy.arange(6).reshape((3, 2))
        word_ids[1,1] = self.vocabulary.word_to_id['<unk>']
        class_ids = numpy.arange(6).reshape((3, 2))
        membership_probs = numpy.ones_like(word_ids).astype('float32')
        mask = numpy.ones_like(word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        assert_almost_equal(logprobs[0],
                            numpy.log(word_ids[1:,0].astype('float32') / 5))
        assert_almost_equal(logprobs[1][0], -5)
        assert_almost_equal(logprobs[1][1],
                            numpy.log(word_ids[2,1].astype('float32') / 5))
    def test_score_batch(self):
        # Network predicts <unk> probability. Out-of-shortlist words are mapped
        # to <unk> class by .
        scorer = TextScorer(self.dummy_network, use_shortlist=False)
        word_ids = numpy.arange(15).reshape((3, 5)).T
        class_ids, _ = self.vocabulary.get_class_memberships(word_ids)
        membership_probs = numpy.ones_like(word_ids).astype('float32')
        mask = numpy.ones_like(word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        assert_almost_equal(
            logprobs[0], numpy.log(word_ids[1:, 0].astype('float32') / 100.0))
        assert_almost_equal(
            logprobs[1], numpy.log(word_ids[1:, 1].astype('float32') / 100.0))
        self.assertAlmostEqual(logprobs[2][0],
                               numpy.log(11.0 / 100.0),
                               places=5)  # </s>
        self.assertAlmostEqual(logprobs[2][1],
                               numpy.log(12.0 / 100.0),
                               places=5)  # <unk>
        self.assertAlmostEqual(logprobs[2][2],
                               numpy.log(12.0 / 100.0),
                               places=5)
        self.assertAlmostEqual(logprobs[2][3],
                               numpy.log(12.0 / 100.0),
                               places=5)

        # Network predicts <unk> probability. This is distributed for
        # out-of-shortlist words according to word frequency.
        scorer = TextScorer(self.dummy_network, use_shortlist=True)
        word_ids = numpy.arange(15).reshape((3, 5)).T
        class_ids, _ = self.vocabulary.get_class_memberships(word_ids)
        membership_probs = numpy.ones_like(word_ids).astype('float32')
        mask = numpy.ones_like(word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        assert_almost_equal(
            logprobs[0], numpy.log(word_ids[1:, 0].astype('float32') / 100.0))
        assert_almost_equal(
            logprobs[1], numpy.log(word_ids[1:, 1].astype('float32') / 100.0))
        self.assertAlmostEqual(logprobs[2][0],
                               numpy.log(11.0 / 100.0),
                               places=5)  # </s>
        self.assertIsNone(logprobs[2][1])  # <unk>
        self.assertAlmostEqual(logprobs[2][2],
                               numpy.log(12.0 / 100.0 * 0.3),
                               places=5)
        self.assertAlmostEqual(logprobs[2][3],
                               numpy.log(12.0 / 100.0 * 0.7),
                               places=5)

        # OOV and OOS words are replaced with None.
        scorer = TextScorer(self.dummy_network,
                            use_shortlist=False,
                            exclude_unk=True)
        word_ids = numpy.arange(15).reshape((3, 5)).T
        class_ids, _ = self.vocabulary.get_class_memberships(word_ids)
        membership_probs = numpy.ones_like(word_ids).astype('float32')
        mask = numpy.ones_like(word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        assert_almost_equal(
            logprobs[0], numpy.log(word_ids[1:, 0].astype('float32') / 100.0))
        assert_almost_equal(
            logprobs[1], numpy.log(word_ids[1:, 1].astype('float32') / 100.0))
        self.assertAlmostEqual(logprobs[2][0],
                               numpy.log(11.0 / 100.0),
                               places=5)  # </s>
        self.assertIsNone(logprobs[2][1])  # <unk>
        self.assertIsNone(logprobs[2][2])
        self.assertIsNone(logprobs[2][3])