예제 #1
0
 def setUp(self):
     self.dimension, self.signatureLength = 50, 23
     self.phraseTextAndDimensionMap = TwoWayMap()
     for i in range(self.dimension):
         self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i)
     self.unitRandomVectors = [
         VectorGenerator.getRandomGaussianUnitVector(
             dimension=self.dimension, mu=0, sigma=1)
         for i in range(self.signatureLength)
     ]
     self.doc1 = Document(
         1,
         VectorGenerator.getRandomGaussianUnitVector(
             dimension=self.dimension, mu=0, sigma=1))
     self.doc2 = Document(
         2,
         VectorGenerator.getRandomGaussianUnitVector(
             dimension=self.dimension, mu=0, sigma=1))
     self.doc1.setSignatureUsingVectors(self.unitRandomVectors,
                                        self.phraseTextAndDimensionMap)
     self.doc2.setSignatureUsingVectors(self.unitRandomVectors,
                                        self.phraseTextAndDimensionMap)
     self.pm = SignaturePermutationWithTrie(
         signatureLength=self.signatureLength)
     self.pm.addDocument(self.doc1)
     self.pm.addDocument(self.doc2)
예제 #2
0
 def setUp(self):
     self.dimension, self.signatureLength = 50, 23
     self.phraseTextAndDimensionMap = TwoWayMap()
     for i in range(self.dimension): self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i)
     self.unitRandomVectors = [VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1) for i in range(self.signatureLength)]
     self.doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     self.doc2=Document(2, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     self.doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap); self.doc2.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap)
     self.pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
     self.pm.addDocument(self.doc1)
     self.pm.addDocument(self.doc2)
예제 #3
0
 def test_getNearestDocument_usingANearbyKeyInTrie(self):
     digitReplacement = {'0': '1', '1': '0'}
     newDocWithANearbySignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     exactSignature = self.doc1.signature.to01() 
     newDocWithANearbySignature.signature = Signature(exactSignature[:-1]+digitReplacement[exactSignature[-1]])
     self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01())
     self.assertEqual(self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
예제 #4
0
 def __init__(self, vector=None, dimensions=None, mu=None, sigma=None):
     if vector == None:
         vector = VectorGenerator.getRandomGaussianUnitVector(
             dimensions, mu, sigma)
         super(RandomGaussianUnitVector,
               self).__init__(vector.getNormalizedVector())
     else:
         super(RandomGaussianUnitVector, self).__init__(vector)
예제 #5
0
 def test_removeDocument_documents(self):
     newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01())
     self.pm.addDocument(newDocModifiedWithExistingSignature)
     self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3]))
     self.pm.removeDocument(newDocModifiedWithExistingSignature)
     self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1]))
     self.pm.removeDocument(self.doc1)
     self.assertEqual(None, self.pm.signatureTrie.get(self.doc1.signature.permutate(self.pm).to01()))
예제 #6
0
 def test_addDocument_existingKey(self):
     newDocModifiedWithExistingSignature = Document(
         3,
         VectorGenerator.getRandomGaussianUnitVector(
             dimension=self.dimension, mu=0, sigma=1))
     newDocModifiedWithExistingSignature.signature = Signature(
         self.doc1.signature.to01())
     self.pm.addDocument(newDocModifiedWithExistingSignature)
     self.assertEqual(
         self.pm.signatureTrie[self.doc1.signature.permutate(
             self.pm).to01()], set([1, 3]))
예제 #7
0
 def test_addDocument_newKey(self):
     doc1 = Document(
         1,
         VectorGenerator.getRandomGaussianUnitVector(
             dimension=self.dimension, mu=0, sigma=1))
     doc1.setSignatureUsingVectors(self.unitRandomVectors,
                                   self.phraseTextAndDimensionMap)
     pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
     pm.addDocument(doc1)
     self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()],
                      set([1]))
예제 #8
0
 def test_getNearestDocument_usingANearbyKeyInTrie(self):
     digitReplacement = {'0': '1', '1': '0'}
     newDocWithANearbySignature = Document(
         3,
         VectorGenerator.getRandomGaussianUnitVector(
             dimension=self.dimension, mu=0, sigma=1))
     exactSignature = self.doc1.signature.to01()
     newDocWithANearbySignature.signature = Signature(
         exactSignature[:-1] + digitReplacement[exactSignature[-1]])
     self.assertNotEquals(self.doc1.signature.to01(),
                          newDocWithANearbySignature.signature.to01())
     self.assertEqual(
         self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])
     )  # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
예제 #9
0
 def test_setSignatureUsingVectorPermutations(self): 
     dimensions, signatureLength = 53, 13
     phraseTextAndDimensionMap = TwoWayMap()
     for i in range(dimensions): phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i)
     phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap()
     for i in range(dimensions-50): phraseTextAndDimensionMapWithMissingDimensions.set(TwoWayMap.MAP_FORWARD, i,i)
     
     unitVector = RandomGaussianUnitVector(dimensions=dimensions, mu=0, sigma=1)
     vectorPermutations = VectorPermutation.getPermutations(signatureLength, dimensions, unitVector)
     permutatedUnitVectors = [unitVector.getPermutedVector(r) for r in vectorPermutations]
     documentVector = VectorGenerator.getRandomGaussianUnitVector(dimension=dimensions, mu=0, sigma=1)
     documentWithSignatureByVectors=Document(1, documentVector)
     documentWithSignatureByVectorPermutations=Document(2, documentVector)
     documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMap)
     documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMap)
     self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
     documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMapWithMissingDimensions)
     documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMapWithMissingDimensions)
     self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
예제 #10
0
    def test_setSignatureUsingVectorPermutations(self):
        dimensions, signatureLength = 53, 13
        phraseTextAndDimensionMap = TwoWayMap()
        for i in range(dimensions):
            phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i)
        phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap()
        for i in range(dimensions - 50):
            phraseTextAndDimensionMapWithMissingDimensions.set(
                TwoWayMap.MAP_FORWARD, i, i)

        unitVector = RandomGaussianUnitVector(dimensions=dimensions,
                                              mu=0,
                                              sigma=1)
        vectorPermutations = VectorPermutation.getPermutations(
            signatureLength, dimensions, unitVector)
        permutatedUnitVectors = [
            unitVector.getPermutedVector(r) for r in vectorPermutations
        ]
        documentVector = VectorGenerator.getRandomGaussianUnitVector(
            dimension=dimensions, mu=0, sigma=1)
        documentWithSignatureByVectors = Document(1, documentVector)
        documentWithSignatureByVectorPermutations = Document(2, documentVector)
        documentWithSignatureByVectors.setSignatureUsingVectors(
            permutatedUnitVectors, phraseTextAndDimensionMap)
        documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(
            unitVector, vectorPermutations, phraseTextAndDimensionMap)
        self.assertEqual(documentWithSignatureByVectors.signature,
                         documentWithSignatureByVectorPermutations.signature)
        documentWithSignatureByVectors.setSignatureUsingVectors(
            permutatedUnitVectors,
            phraseTextAndDimensionMapWithMissingDimensions)
        documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(
            unitVector, vectorPermutations,
            phraseTextAndDimensionMapWithMissingDimensions)
        self.assertEqual(documentWithSignatureByVectors.signature,
                         documentWithSignatureByVectorPermutations.signature)
예제 #11
0
 def test_addDocument_newKey(self):
     doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap)
     pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
     pm.addDocument(doc1)
     self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1]))
예제 #12
0
 def __init__(self, vector=None, dimensions=None, mu=None, sigma=None):
     if vector==None:
         vector = VectorGenerator.getRandomGaussianUnitVector(dimensions, mu, sigma)
         super(RandomGaussianUnitVector, self).__init__(vector.getNormalizedVector())
     else: super(RandomGaussianUnitVector, self).__init__(vector)