Ejemplo n.º 1
0
 def test_pickle(self):
     forest = MinHashLSHForest()
     m1 = MinHash()
     m1.update("a".encode("utf8"))
     m2 = MinHash()
     m2.update("b".encode("utf8"))
     forest.add("a", m1)
     forest.add("b", m2)
     forest.index()
     forest2 = pickle.loads(pickle.dumps(forest))
     result = forest.query(m1, 1)
     self.assertTrue("a" in result)
     result = forest.query(m2, 1)
     self.assertTrue("b" in result)
Ejemplo n.º 2
0
    def test_query(self):
        forest = MinHashLSHForest()
        mg = WeightedMinHashGenerator(10)
        m1 = mg.minhash(np.random.uniform(1, 10, 10))
        m2 = mg.minhash(np.random.uniform(1, 10, 10))
        forest.add("a", m1)
        forest.add("b", m2)
        forest.index()
        result = forest.query(m1, 2)
        self.assertTrue("a" in result)
        self.assertTrue("b" in result)

        mg = WeightedMinHashGenerator(10, 5)
        m3 = mg.minhash(np.random.uniform(1, 10, 10))
        self.assertRaises(ValueError, forest.query, m3, 1)