예제 #1
0
 def test_int_to_bit_vector_large_0(self):
     # Need at least one bit to represent 0.
     numpy.testing.assert_array_equal(bits.int_to_bit_vector_large(0),
                                      [False])
     # Force 5 bits.
     numpy.testing.assert_array_equal(bits.int_to_bit_vector_large(0, 5),
                                      [False, False, False, False, False])
예제 #2
0
    def test_int_to_bit_vector_large_large(self):
        # Try large integer bit vectors
        int_val = (2**256) - 1
        expected_vector = [True] * 256
        numpy.testing.assert_array_equal(bits.int_to_bit_vector_large(int_val),
                                         expected_vector)

        int_val = (2**512)
        expected_vector = [True] + ([False] * 512)
        numpy.testing.assert_array_equal(bits.int_to_bit_vector_large(int_val),
                                         expected_vector)
예제 #3
0
    def test_remove_from_index_invalid_key_single(self):
        bt = SkLearnBallTreeHashIndex(random_seed=0)
        index = np.ndarray((1000, 256), bool)
        for i in range(1000):
            index[i] = int_to_bit_vector_large(i, 256)
        bt.build_index(index)
        # Copy post-build index for checking no removal occurred
        bt_data = np.copy(bt.bt.data)

        self.assertRaises(KeyError, bt.remove_from_index, [
            int_to_bit_vector_large(1001, 256),
        ])
        np.testing.assert_array_equal(bt_data, np.asarray(bt.bt.data))
예제 #4
0
    def test_remove_from_index_invalid_key_multiple(self):
        # Test that mixed valid and invalid keys raises KeyError and does not
        # modify the index.
        bt = SkLearnBallTreeHashIndex(random_seed=0)
        index = np.ndarray((1000, 256), bool)
        for i in range(1000):
            index[i] = int_to_bit_vector_large(i, 256)
        bt.build_index(index)
        # Copy post-build index for checking no removal occurred
        bt_data = np.copy(bt.bt.data)

        self.assertRaises(KeyError, bt.remove_from_index, [
            int_to_bit_vector_large(42, 256),
            int_to_bit_vector_large(1008, 256),
        ])
        np.testing.assert_array_equal(bt_data, np.asarray(bt.bt.data))
예제 #5
0
    def test_remove_from_index(self):
        # Test that we actually remove from the index.
        bt = SkLearnBallTreeHashIndex(random_seed=0)
        index = np.ndarray((1000, 256), bool)
        for i in range(1000):
            index[i] = int_to_bit_vector_large(i, 256)
        bt.build_index(index)
        # BallTree data should now contain 1000 entries
        self.assertEqual(bt.bt.data.shape, (1000, 256))

        bt.remove_from_index([
            int_to_bit_vector_large(42, 256),
            int_to_bit_vector_large(998, 256),
        ])
        # Make sure data block is of the expected shape (two rows shorter)
        new_data = np.asarray(bt.bt.data)
        self.assertEqual(new_data.shape, (998, 256))
        # Make sure expected arrays are missing from data block.
        new_data_set = set(tuple(r) for r in new_data.tolist())
        self.assertNotIn(tuple(int_to_bit_vector_large(42, 256)), new_data_set)
        self.assertNotIn(tuple(int_to_bit_vector_large(998, 256)),
                         new_data_set)
예제 #6
0
    def test_remove_from_index_last_element(self):
        """
        Test removing the final the only element / final elements from the
        index.
        """
        # Add one hash, remove one hash.
        bt = SkLearnBallTreeHashIndex(random_seed=0)
        index = np.ndarray((1, 256), bool)
        index[0] = int_to_bit_vector_large(1, 256)
        bt.build_index(index)
        self.assertEqual(bt.count(), 1)
        bt.remove_from_index(index)
        self.assertEqual(bt.count(), 0)
        self.assertIsNone(bt.bt)

        # Add many hashes, remove many hashes in batches until zero
        bt = SkLearnBallTreeHashIndex(random_seed=0)
        index = np.ndarray((1000, 256), bool)
        for i in range(1000):
            index[i] = int_to_bit_vector_large(i, 256)
        bt.build_index(index)
        # Remove first 250
        bt.remove_from_index(index[:250])
        self.assertEqual(bt.count(), 750)
        self.assertIsNotNone(bt.bt)
        # Remove second 250
        bt.remove_from_index(index[250:500])
        self.assertEqual(bt.count(), 500)
        self.assertIsNotNone(bt.bt)
        # Remove third 250
        bt.remove_from_index(index[500:750])
        self.assertEqual(bt.count(), 250)
        self.assertIsNotNone(bt.bt)
        # Remove final 250
        bt.remove_from_index(index[750:])
        self.assertEqual(bt.count(), 0)
        self.assertIsNone(bt.bt)
예제 #7
0
    def test_remove_from_index_last_element_with_cache(self):
        """
        Test removing final element also clears the cache element.
        """
        c = DataMemoryElement()
        bt = SkLearnBallTreeHashIndex(cache_element=c, random_seed=0)
        index = np.ndarray((1, 256), bool)
        index[0] = int_to_bit_vector_large(1, 256)

        bt.build_index(index)
        self.assertEqual(bt.count(), 1)
        self.assertFalse(c.is_empty())

        bt.remove_from_index(index)
        self.assertEqual(bt.count(), 0)
        self.assertTrue(c.is_empty())
예제 #8
0
파일: linear.py 프로젝트: sanyarud/SMQTK
    def _nn(self, h, n=1):
        """
        Internal method to be implemented by sub-classes to return the nearest
        `N` neighbor hash codes as bit-vectors to the given hash code
        bit-vector.

        Distances are in the range [0,1] and are the percent different each
        neighbor hash is from the query, based on the number of bits contained
        in the query (normalized hamming distance).

        When this internal method is called, we have already checked that our
        index is not empty.

        :param h: Hash code to compute the neighbors of. Should be the same bit
            length as indexed hash codes.
        :type h: numpy.ndarray[bool]

        :param n: Number of nearest neighbors to find.
        :type n: int

        :return: Tuple of nearest N hash codes and a tuple of the distance
            values to those neighbors.
        :rtype: (tuple[numpy.ndarray[bool]], tuple[float])

        """
        with self._model_lock:
            h_int = bit_vector_to_int_large(h)
            bits = len(h)
            #: :type: list[int|long]
            near_codes = \
                heapq.nsmallest(n, self.index,
                                lambda e: hamming_distance(h_int, e)
                                )
            distances = map(hamming_distance, near_codes,
                            [h_int] * len(near_codes))
            return [int_to_bit_vector_large(c, bits) for c in near_codes], \
                   [d / float(bits) for d in distances]
예제 #9
0
 def test_int_to_bit_vector_large_1(self):
     numpy.testing.assert_array_equal(bits.int_to_bit_vector_large(1),
                                      [True])
     numpy.testing.assert_array_equal(bits.int_to_bit_vector_large(1, 7),
                                      ([False] * 6) + [True])