def setUp(self): self.dimension, self.signatureLength = 50, 23 self.phraseTextAndDimensionMap = TwoWayMap() for i in range(self.dimension): self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i) self.unitRandomVectors = [ VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1) for i in range(self.signatureLength) ] self.doc1 = Document( 1, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) self.doc2 = Document( 2, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) self.doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) self.doc2.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) self.pm = SignaturePermutationWithTrie( signatureLength=self.signatureLength) self.pm.addDocument(self.doc1) self.pm.addDocument(self.doc2)
def setUp(self): self.dimension, self.signatureLength = 50, 23 self.phraseTextAndDimensionMap = TwoWayMap() for i in range(self.dimension): self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i) self.unitRandomVectors = [VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1) for i in range(self.signatureLength)] self.doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) self.doc2=Document(2, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) self.doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap); self.doc2.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) self.pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength) self.pm.addDocument(self.doc1) self.pm.addDocument(self.doc2)
def test_getNearestDocument_usingANearbyKeyInTrie(self): digitReplacement = {'0': '1', '1': '0'} newDocWithANearbySignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) exactSignature = self.doc1.signature.to01() newDocWithANearbySignature.signature = Signature(exactSignature[:-1]+digitReplacement[exactSignature[-1]]) self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01()) self.assertEqual(self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
def __init__(self, vector=None, dimensions=None, mu=None, sigma=None): if vector == None: vector = VectorGenerator.getRandomGaussianUnitVector( dimensions, mu, sigma) super(RandomGaussianUnitVector, self).__init__(vector.getNormalizedVector()) else: super(RandomGaussianUnitVector, self).__init__(vector)
def test_removeDocument_documents(self): newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01()) self.pm.addDocument(newDocModifiedWithExistingSignature) self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3])) self.pm.removeDocument(newDocModifiedWithExistingSignature) self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1])) self.pm.removeDocument(self.doc1) self.assertEqual(None, self.pm.signatureTrie.get(self.doc1.signature.permutate(self.pm).to01()))
def test_addDocument_existingKey(self): newDocModifiedWithExistingSignature = Document( 3, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) newDocModifiedWithExistingSignature.signature = Signature( self.doc1.signature.to01()) self.pm.addDocument(newDocModifiedWithExistingSignature) self.assertEqual( self.pm.signatureTrie[self.doc1.signature.permutate( self.pm).to01()], set([1, 3]))
def test_addDocument_newKey(self): doc1 = Document( 1, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength) pm.addDocument(doc1) self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1]))
def test_getNearestDocument_usingANearbyKeyInTrie(self): digitReplacement = {'0': '1', '1': '0'} newDocWithANearbySignature = Document( 3, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) exactSignature = self.doc1.signature.to01() newDocWithANearbySignature.signature = Signature( exactSignature[:-1] + digitReplacement[exactSignature[-1]]) self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01()) self.assertEqual( self.pm.getNearestDocuments(newDocWithANearbySignature), set([1]) ) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
def test_setSignatureUsingVectorPermutations(self): dimensions, signatureLength = 53, 13 phraseTextAndDimensionMap = TwoWayMap() for i in range(dimensions): phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i) phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap() for i in range(dimensions-50): phraseTextAndDimensionMapWithMissingDimensions.set(TwoWayMap.MAP_FORWARD, i,i) unitVector = RandomGaussianUnitVector(dimensions=dimensions, mu=0, sigma=1) vectorPermutations = VectorPermutation.getPermutations(signatureLength, dimensions, unitVector) permutatedUnitVectors = [unitVector.getPermutedVector(r) for r in vectorPermutations] documentVector = VectorGenerator.getRandomGaussianUnitVector(dimension=dimensions, mu=0, sigma=1) documentWithSignatureByVectors=Document(1, documentVector) documentWithSignatureByVectorPermutations=Document(2, documentVector) documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMap) documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMap) self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature) documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMapWithMissingDimensions) documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMapWithMissingDimensions) self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
def test_setSignatureUsingVectorPermutations(self): dimensions, signatureLength = 53, 13 phraseTextAndDimensionMap = TwoWayMap() for i in range(dimensions): phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i) phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap() for i in range(dimensions - 50): phraseTextAndDimensionMapWithMissingDimensions.set( TwoWayMap.MAP_FORWARD, i, i) unitVector = RandomGaussianUnitVector(dimensions=dimensions, mu=0, sigma=1) vectorPermutations = VectorPermutation.getPermutations( signatureLength, dimensions, unitVector) permutatedUnitVectors = [ unitVector.getPermutedVector(r) for r in vectorPermutations ] documentVector = VectorGenerator.getRandomGaussianUnitVector( dimension=dimensions, mu=0, sigma=1) documentWithSignatureByVectors = Document(1, documentVector) documentWithSignatureByVectorPermutations = Document(2, documentVector) documentWithSignatureByVectors.setSignatureUsingVectors( permutatedUnitVectors, phraseTextAndDimensionMap) documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations( unitVector, vectorPermutations, phraseTextAndDimensionMap) self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature) documentWithSignatureByVectors.setSignatureUsingVectors( permutatedUnitVectors, phraseTextAndDimensionMapWithMissingDimensions) documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations( unitVector, vectorPermutations, phraseTextAndDimensionMapWithMissingDimensions) self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
def test_addDocument_newKey(self): doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength) pm.addDocument(doc1) self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1]))
def __init__(self, vector=None, dimensions=None, mu=None, sigma=None): if vector==None: vector = VectorGenerator.getRandomGaussianUnitVector(dimensions, mu, sigma) super(RandomGaussianUnitVector, self).__init__(vector.getNormalizedVector()) else: super(RandomGaussianUnitVector, self).__init__(vector)