Ejemplo n.º 1
0
 def test_addDocument_newKey(self):
     doc1 = Document(
         1,
         VectorGenerator.getRandomGaussianUnitVector(
             dimension=self.dimension, mu=0, sigma=1))
     doc1.setSignatureUsingVectors(self.unitRandomVectors,
                                   self.phraseTextAndDimensionMap)
     pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
     pm.addDocument(doc1)
     self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()],
                      set([1]))
Ejemplo n.º 2
0
 def test_setSignatureUsingVectors(self):
     phraseTextAndDimensionMap = TwoWayMap()
     phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'a', 1)
     phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'b', 2)
     documentWithDimensionsInVector = Document(1, {'a':1, 'b':4})
     documentWithDimensionsNotInVector = Document(1, {'a':1, 'c':4})
     vectors = [ Vector({1: 3/5., 2: -4/5.}), Vector({1:-5/13., 2: 12/13.})]
     documentWithDimensionsInVector.setSignatureUsingVectors(vectors, phraseTextAndDimensionMap)
     documentWithDimensionsNotInVector.setSignatureUsingVectors(vectors, phraseTextAndDimensionMap)
     self.assertEqual(Signature('01'), documentWithDimensionsInVector.signature)
     self.assertEqual(Signature('10'), documentWithDimensionsNotInVector.signature)
Ejemplo n.º 3
0
class SignaturePermutationTests(unittest.TestCase):
    def setUp(self):
        self.dimension, self.signatureLength = 50, 23
        self.phraseTextAndDimensionMap = TwoWayMap()
        for i in range(self.dimension): self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i)
        self.unitRandomVectors = [VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1) for i in range(self.signatureLength)]
        self.doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        self.doc2=Document(2, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        self.doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap); self.doc2.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap)
        self.pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
        self.pm.addDocument(self.doc1)
        self.pm.addDocument(self.doc2)
    def test_addDocument_newKey(self):
        doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap)
        pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
        pm.addDocument(doc1)
        self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1]))
    def test_addDocument_existingKey(self):
        newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01())
        self.pm.addDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3]))
    def test_getNearestDocument_usingAKeyAlreadyInTrie(self): self.assertEqual(self.pm.getNearestDocuments(self.doc1), set([1]))
    def test_getNearestDocument_usingANearbyKeyInTrie(self):
        digitReplacement = {'0': '1', '1': '0'}
        newDocWithANearbySignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        exactSignature = self.doc1.signature.to01() 
        newDocWithANearbySignature.signature = Signature(exactSignature[:-1]+digitReplacement[exactSignature[-1]])
        self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01())
        self.assertEqual(self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
    def test_getNearestDocument_emptyTrie(self):
        permutationWithEmptyTrie = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
        self.assertEqual(permutationWithEmptyTrie.getNearestDocuments(self.doc1), set())
    def test_removeDocument_documents(self):
        newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01())
        self.pm.addDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3]))
        self.pm.removeDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1]))
        self.pm.removeDocument(self.doc1)
        self.assertEqual(None, self.pm.signatureTrie.get(self.doc1.signature.permutate(self.pm).to01()))
    def test_resetSignatureTrie(self):
        self.assertTrue(len(self.pm.signatureTrie)>0)
        self.pm.resetSignatureDataStructure()
        self.assertTrue(len(self.pm.signatureTrie)==0)
Ejemplo n.º 4
0
 def test_setSignatureUsingVectorPermutations(self): 
     dimensions, signatureLength = 53, 13
     phraseTextAndDimensionMap = TwoWayMap()
     for i in range(dimensions): phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i)
     phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap()
     for i in range(dimensions-50): phraseTextAndDimensionMapWithMissingDimensions.set(TwoWayMap.MAP_FORWARD, i,i)
     
     unitVector = RandomGaussianUnitVector(dimensions=dimensions, mu=0, sigma=1)
     vectorPermutations = VectorPermutation.getPermutations(signatureLength, dimensions, unitVector)
     permutatedUnitVectors = [unitVector.getPermutedVector(r) for r in vectorPermutations]
     documentVector = VectorGenerator.getRandomGaussianUnitVector(dimension=dimensions, mu=0, sigma=1)
     documentWithSignatureByVectors=Document(1, documentVector)
     documentWithSignatureByVectorPermutations=Document(2, documentVector)
     documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMap)
     documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMap)
     self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
     documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMapWithMissingDimensions)
     documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMapWithMissingDimensions)
     self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
Ejemplo n.º 5
0
 def test_setSignatureUsingVectors(self):
     phraseTextAndDimensionMap = TwoWayMap()
     phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'a', 1)
     phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'b', 2)
     documentWithDimensionsInVector = Document(1, {'a': 1, 'b': 4})
     documentWithDimensionsNotInVector = Document(1, {'a': 1, 'c': 4})
     vectors = [
         Vector({
             1: 3 / 5.,
             2: -4 / 5.
         }),
         Vector({
             1: -5 / 13.,
             2: 12 / 13.
         })
     ]
     documentWithDimensionsInVector.setSignatureUsingVectors(
         vectors, phraseTextAndDimensionMap)
     documentWithDimensionsNotInVector.setSignatureUsingVectors(
         vectors, phraseTextAndDimensionMap)
     self.assertEqual(Signature('01'),
                      documentWithDimensionsInVector.signature)
     self.assertEqual(Signature('10'),
                      documentWithDimensionsNotInVector.signature)
Ejemplo n.º 6
0
    def test_setSignatureUsingVectorPermutations(self):
        dimensions, signatureLength = 53, 13
        phraseTextAndDimensionMap = TwoWayMap()
        for i in range(dimensions):
            phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i)
        phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap()
        for i in range(dimensions - 50):
            phraseTextAndDimensionMapWithMissingDimensions.set(
                TwoWayMap.MAP_FORWARD, i, i)

        unitVector = RandomGaussianUnitVector(dimensions=dimensions,
                                              mu=0,
                                              sigma=1)
        vectorPermutations = VectorPermutation.getPermutations(
            signatureLength, dimensions, unitVector)
        permutatedUnitVectors = [
            unitVector.getPermutedVector(r) for r in vectorPermutations
        ]
        documentVector = VectorGenerator.getRandomGaussianUnitVector(
            dimension=dimensions, mu=0, sigma=1)
        documentWithSignatureByVectors = Document(1, documentVector)
        documentWithSignatureByVectorPermutations = Document(2, documentVector)
        documentWithSignatureByVectors.setSignatureUsingVectors(
            permutatedUnitVectors, phraseTextAndDimensionMap)
        documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(
            unitVector, vectorPermutations, phraseTextAndDimensionMap)
        self.assertEqual(documentWithSignatureByVectors.signature,
                         documentWithSignatureByVectorPermutations.signature)
        documentWithSignatureByVectors.setSignatureUsingVectors(
            permutatedUnitVectors,
            phraseTextAndDimensionMapWithMissingDimensions)
        documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(
            unitVector, vectorPermutations,
            phraseTextAndDimensionMapWithMissingDimensions)
        self.assertEqual(documentWithSignatureByVectors.signature,
                         documentWithSignatureByVectorPermutations.signature)
Ejemplo n.º 7
0
class SignaturePermutationTests(unittest.TestCase):
    def setUp(self):
        self.dimension, self.signatureLength = 50, 23
        self.phraseTextAndDimensionMap = TwoWayMap()
        for i in range(self.dimension):
            self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i)
        self.unitRandomVectors = [
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1)
            for i in range(self.signatureLength)
        ]
        self.doc1 = Document(
            1,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        self.doc2 = Document(
            2,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        self.doc1.setSignatureUsingVectors(self.unitRandomVectors,
                                           self.phraseTextAndDimensionMap)
        self.doc2.setSignatureUsingVectors(self.unitRandomVectors,
                                           self.phraseTextAndDimensionMap)
        self.pm = SignaturePermutationWithTrie(
            signatureLength=self.signatureLength)
        self.pm.addDocument(self.doc1)
        self.pm.addDocument(self.doc2)

    def test_addDocument_newKey(self):
        doc1 = Document(
            1,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        doc1.setSignatureUsingVectors(self.unitRandomVectors,
                                      self.phraseTextAndDimensionMap)
        pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
        pm.addDocument(doc1)
        self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()],
                         set([1]))

    def test_addDocument_existingKey(self):
        newDocModifiedWithExistingSignature = Document(
            3,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        newDocModifiedWithExistingSignature.signature = Signature(
            self.doc1.signature.to01())
        self.pm.addDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(
            self.pm.signatureTrie[self.doc1.signature.permutate(
                self.pm).to01()], set([1, 3]))

    def test_getNearestDocument_usingAKeyAlreadyInTrie(self):
        self.assertEqual(self.pm.getNearestDocuments(self.doc1), set([1]))

    def test_getNearestDocument_usingANearbyKeyInTrie(self):
        digitReplacement = {'0': '1', '1': '0'}
        newDocWithANearbySignature = Document(
            3,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        exactSignature = self.doc1.signature.to01()
        newDocWithANearbySignature.signature = Signature(
            exactSignature[:-1] + digitReplacement[exactSignature[-1]])
        self.assertNotEquals(self.doc1.signature.to01(),
                             newDocWithANearbySignature.signature.to01())
        self.assertEqual(
            self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])
        )  # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!

    def test_getNearestDocument_emptyTrie(self):
        permutationWithEmptyTrie = SignaturePermutationWithTrie(
            signatureLength=self.signatureLength)
        self.assertEqual(
            permutationWithEmptyTrie.getNearestDocuments(self.doc1), set())

    def test_removeDocument_documents(self):
        newDocModifiedWithExistingSignature = Document(
            3,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        newDocModifiedWithExistingSignature.signature = Signature(
            self.doc1.signature.to01())
        self.pm.addDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(
            self.pm.signatureTrie[self.doc1.signature.permutate(
                self.pm).to01()], set([1, 3]))
        self.pm.removeDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(
            self.pm.signatureTrie[self.doc1.signature.permutate(
                self.pm).to01()], set([1]))
        self.pm.removeDocument(self.doc1)
        self.assertEqual(
            None,
            self.pm.signatureTrie.get(
                self.doc1.signature.permutate(self.pm).to01()))

    def test_resetSignatureTrie(self):
        self.assertTrue(len(self.pm.signatureTrie) > 0)
        self.pm.resetSignatureDataStructure()
        self.assertTrue(len(self.pm.signatureTrie) == 0)
Ejemplo n.º 8
0
 def test_addDocument_newKey(self):
     doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap)
     pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
     pm.addDocument(doc1)
     self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1]))