コード例 #1
0
    def test_move_and_recompute(self):
        optimizer1 = NumpyBigramOptimizer(self.statistics, self.vocabulary)
        word_id = self.vocabulary.word_to_id['d']
        orig_class_id = optimizer1.get_word_class(word_id)
        new_class_id = 3 if orig_class_id != 3 else 4
        optimizer1._word_to_class[word_id] = new_class_id
        counts = optimizer1._compute_class_statistics(
            optimizer1._word_counts, optimizer1._ww_counts,
            optimizer1._word_to_class)

        class_counts = numpy.zeros(optimizer1.num_classes, 'int32')
        cc_counts = numpy.zeros(
            (optimizer1.num_classes, optimizer1.num_classes), dtype='int32')
        cw_counts = numpy.zeros(
            (optimizer1.num_classes, optimizer1.vocabulary_size),
            dtype='int32')
        wc_counts = numpy.zeros(
            (optimizer1.vocabulary_size, optimizer1.num_classes),
            dtype='int32')
        for wid, cid in enumerate(optimizer1._word_to_class):
            class_counts[cid] += optimizer1._word_counts[wid]
        for left_wid, right_wid in zip(*optimizer1._ww_counts.nonzero()):
            count = optimizer1._ww_counts[left_wid, right_wid]
            left_cid = optimizer1._word_to_class[left_wid]
            right_cid = optimizer1._word_to_class[right_wid]
            cc_counts[left_cid, right_cid] += count
            cw_counts[left_cid, right_wid] += count
            wc_counts[left_wid, right_cid] += count
        self.assertTrue(numpy.array_equal(class_counts, counts[0]))
        self.assertTrue(numpy.array_equal(cc_counts, counts[1]))
        self.assertTrue(numpy.array_equal(cw_counts, counts[2]))
        self.assertTrue(numpy.array_equal(wc_counts, counts[3]))
        optimizer1._class_counts = counts[0]
        optimizer1._cc_counts = counts[1]
        optimizer1._cw_counts = counts[2]
        optimizer1._wc_counts = counts[3]

        optimizer2 = NumpyBigramOptimizer(self.statistics, self.vocabulary)
        orig_class_id = optimizer2.get_word_class(word_id)
        optimizer2._move(word_id, new_class_id)

        self.assertEqual(
            numpy.count_nonzero(
                optimizer1._class_counts != optimizer2._class_counts), 0)
        self.assertEqual(
            numpy.count_nonzero(
                optimizer1._cc_counts != optimizer2._cc_counts), 0)
        self.assertEqual(
            numpy.count_nonzero(
                optimizer1._cw_counts != optimizer2._cw_counts), 0)
        self.assertEqual(
            numpy.count_nonzero(
                optimizer1._wc_counts != optimizer2._wc_counts), 0)

        optimizer3 = TheanoBigramOptimizer(self.statistics, self.vocabulary)
        orig_class_id = optimizer3.get_word_class(word_id)
        optimizer3._move(word_id, new_class_id)

        self.assert_optimizers_equal(optimizer2, optimizer3)
コード例 #2
0
    def test_move_and_recompute(self):
        optimizer1 = NumpyBigramOptimizer(self.statistics, self.vocabulary)
        word_id = self.vocabulary.word_to_id['d']
        orig_class_id = optimizer1.get_word_class(word_id)
        new_class_id = 3 if orig_class_id != 3 else 4
        optimizer1._word_to_class[word_id] = new_class_id
        counts = optimizer1._compute_class_statistics(optimizer1._word_counts,
                                                      optimizer1._ww_counts,
                                                      optimizer1._word_to_class)

        class_counts = numpy.zeros(optimizer1.num_classes, 'int32')
        cc_counts = numpy.zeros((optimizer1.num_classes, optimizer1.num_classes), dtype='int32')
        cw_counts = numpy.zeros((optimizer1.num_classes, optimizer1.vocabulary_size), dtype='int32')
        wc_counts = numpy.zeros((optimizer1.vocabulary_size, optimizer1.num_classes), dtype='int32')
        for wid, cid in enumerate(optimizer1._word_to_class):
            class_counts[cid] += optimizer1._word_counts[wid]
        for left_wid, right_wid in zip(*optimizer1._ww_counts.nonzero()):
            count = optimizer1._ww_counts[left_wid, right_wid]
            left_cid = optimizer1._word_to_class[left_wid]
            right_cid = optimizer1._word_to_class[right_wid]
            cc_counts[left_cid,right_cid] += count
            cw_counts[left_cid,right_wid] += count
            wc_counts[left_wid,right_cid] += count
        self.assertTrue(numpy.array_equal(class_counts, counts[0]))
        self.assertTrue(numpy.array_equal(cc_counts, counts[1]))
        self.assertTrue(numpy.array_equal(cw_counts, counts[2]))
        self.assertTrue(numpy.array_equal(wc_counts, counts[3]))
        optimizer1._class_counts = counts[0]
        optimizer1._cc_counts = counts[1]
        optimizer1._cw_counts = counts[2]
        optimizer1._wc_counts = counts[3]

        optimizer2 = NumpyBigramOptimizer(self.statistics, self.vocabulary)
        orig_class_id = optimizer2.get_word_class(word_id)
        optimizer2._move(word_id, new_class_id)

        self.assertEqual(numpy.count_nonzero(optimizer1._class_counts != optimizer2._class_counts), 0)
        self.assertEqual(numpy.count_nonzero(optimizer1._cc_counts != optimizer2._cc_counts), 0)
        self.assertEqual(numpy.count_nonzero(optimizer1._cw_counts != optimizer2._cw_counts), 0)
        self.assertEqual(numpy.count_nonzero(optimizer1._wc_counts != optimizer2._wc_counts), 0)

        optimizer3 = TheanoBigramOptimizer(self.statistics, self.vocabulary)
        orig_class_id = optimizer3.get_word_class(word_id)
        optimizer3._move(word_id, new_class_id)

        self.assert_optimizers_equal(optimizer2, optimizer3)