def test_addDocument_existingKey(self): newDocModifiedWithExistingSignature = Document( 3, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) newDocModifiedWithExistingSignature.signature = Signature( self.doc1.signature.to01()) self.pm.addDocument(newDocModifiedWithExistingSignature) self.assertEqual( self.pm.signatureTrie[self.doc1.signature.permutate( self.pm).to01()], set([1, 3]))
def test_getNearestDocument_usingANearbyKeyInTrie(self): digitReplacement = {'0': '1', '1': '0'} newDocWithANearbySignature = Document( 3, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) exactSignature = self.doc1.signature.to01() newDocWithANearbySignature.signature = Signature( exactSignature[:-1] + digitReplacement[exactSignature[-1]]) self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01()) self.assertEqual( self.pm.getNearestDocuments(newDocWithANearbySignature), set([1]) ) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
def test_setSignatureUsingVectors(self): phraseTextAndDimensionMap = TwoWayMap() phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'a', 1) phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'b', 2) documentWithDimensionsInVector = Document(1, {'a': 1, 'b': 4}) documentWithDimensionsNotInVector = Document(1, {'a': 1, 'c': 4}) vectors = [ Vector({ 1: 3 / 5., 2: -4 / 5. }), Vector({ 1: -5 / 13., 2: 12 / 13. }) ] documentWithDimensionsInVector.setSignatureUsingVectors( vectors, phraseTextAndDimensionMap) documentWithDimensionsNotInVector.setSignatureUsingVectors( vectors, phraseTextAndDimensionMap) self.assertEqual(Signature('01'), documentWithDimensionsInVector.signature) self.assertEqual(Signature('10'), documentWithDimensionsNotInVector.signature)
def test_getNearestSignature_nearbyKey(self): self.assertEquals( SignatureTrie.getNearestSignatureKey(self.tr, Signature('1100')), '1000')
def test_permutate(self): sgnt = Signature('1001011') self.assertTrue(sgnt.count() == sgnt.permutate( SignaturePermutationWithTrie(7)).count())
def test_initialization(self): sgnt = Signature('1001011') self.assertEqual(sgnt.count(), 4) sgnt = Signature() self.assertEqual(sgnt.count(), 0)
def test_permutate(self): sgnt = Signature('1001011') self.assertTrue(sgnt.count()==sgnt.permutate(SignaturePermutationWithTrie(7)).count())