def test_getNearestDocument_usingANearbyKeyInTrie(self):
     digitReplacement = {'0': '1', '1': '0'}
     newDocWithANearbySignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     exactSignature = self.doc1.signature.to01() 
     newDocWithANearbySignature.signature = Signature(exactSignature[:-1]+digitReplacement[exactSignature[-1]])
     self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01())
     self.assertEqual(self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
 def test_removeDocument_documents(self):
     newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01())
     self.pm.addDocument(newDocModifiedWithExistingSignature)
     self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3]))
     self.pm.removeDocument(newDocModifiedWithExistingSignature)
     self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1]))
     self.pm.removeDocument(self.doc1)
     self.assertEqual(None, self.pm.signatureTrie.get(self.doc1.signature.permutate(self.pm).to01()))
 def setUp(self):
     self.dimension, self.signatureLength = 50, 23
     self.phraseTextAndDimensionMap = TwoWayMap()
     for i in range(self.dimension): self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i)
     self.unitRandomVectors = [VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1) for i in range(self.signatureLength)]
     self.doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     self.doc2=Document(2, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     self.doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap); self.doc2.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap)
     self.pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
     self.pm.addDocument(self.doc1)
     self.pm.addDocument(self.doc2)
Exemple #4
0
 def test_getNearestDocument_usingANearbyKeyInTrie(self):
     digitReplacement = {'0': '1', '1': '0'}
     newDocWithANearbySignature = Document(
         3,
         VectorGenerator.getRandomGaussianUnitVector(
             dimension=self.dimension, mu=0, sigma=1))
     exactSignature = self.doc1.signature.to01()
     newDocWithANearbySignature.signature = Signature(
         exactSignature[:-1] + digitReplacement[exactSignature[-1]])
     self.assertNotEquals(self.doc1.signature.to01(),
                          newDocWithANearbySignature.signature.to01())
     self.assertEqual(
         self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])
     )  # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
class SignaturePermutationTests(unittest.TestCase):
    def setUp(self):
        self.dimension, self.signatureLength = 50, 23
        self.phraseTextAndDimensionMap = TwoWayMap()
        for i in range(self.dimension): self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i)
        self.unitRandomVectors = [VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1) for i in range(self.signatureLength)]
        self.doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        self.doc2=Document(2, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        self.doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap); self.doc2.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap)
        self.pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
        self.pm.addDocument(self.doc1)
        self.pm.addDocument(self.doc2)
    def test_addDocument_newKey(self):
        doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap)
        pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
        pm.addDocument(doc1)
        self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1]))
    def test_addDocument_existingKey(self):
        newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01())
        self.pm.addDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3]))
    def test_getNearestDocument_usingAKeyAlreadyInTrie(self): self.assertEqual(self.pm.getNearestDocuments(self.doc1), set([1]))
    def test_getNearestDocument_usingANearbyKeyInTrie(self):
        digitReplacement = {'0': '1', '1': '0'}
        newDocWithANearbySignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        exactSignature = self.doc1.signature.to01() 
        newDocWithANearbySignature.signature = Signature(exactSignature[:-1]+digitReplacement[exactSignature[-1]])
        self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01())
        self.assertEqual(self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
    def test_getNearestDocument_emptyTrie(self):
        permutationWithEmptyTrie = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
        self.assertEqual(permutationWithEmptyTrie.getNearestDocuments(self.doc1), set())
    def test_removeDocument_documents(self):
        newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
        newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01())
        self.pm.addDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3]))
        self.pm.removeDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1]))
        self.pm.removeDocument(self.doc1)
        self.assertEqual(None, self.pm.signatureTrie.get(self.doc1.signature.permutate(self.pm).to01()))
    def test_resetSignatureTrie(self):
        self.assertTrue(len(self.pm.signatureTrie)>0)
        self.pm.resetSignatureDataStructure()
        self.assertTrue(len(self.pm.signatureTrie)==0)
Exemple #6
0
def main_app(doc_name1, doc_name2, queue=None, test_mode=False):
    _global.init('hebrew', test_mode=test_mode)
    output = ""
    gui_output = ""
    '''
	Prepare Documents, Detection & Recognition Phases
	'''
    doc1 = init_doc(Document(doc_name1))
    doc2 = init_doc(Document(doc_name2))
    '''
	Verification Phase
	'''
    compare_docs = CompareDocuments(doc1, doc2)
    compare_docs.monkey_results()
    compare_docs.letters_autoencoder_results()
    s_count = compare_docs.ssim_count
    s_pred = s_count / compare_docs.ssim_total
    print("Ssim: count: {}, pred: {}".format(s_count, s_pred))
    output = output + "Monkey Result:{}\nAE result: {}".format(\
                  compare_docs.monkey_results,\
                  compare_docs.letters_ae_results)

    gui_output += "Algo1: Monkey Result:\n\t<{0}> [Confident: {1:.2f}%]\n".format(compare_docs.monkey_results['result'],\
                          compare_docs.monkey_results['precent']*100)
    gui_output += "Algo2: AutoEncoder Letters Result:\n\t<{}> [Confident: {:.2f}%]\n\tResult By Predictions:\n\t<{}> [Confident: {:.2f}%]\n".format(\
                 compare_docs.letters_ae_results['result'],\
                 compare_docs.letters_ae_results['precent']*100,
                 compare_docs.letters_ae_results['result_by_predictions'],\
                 compare_docs.letters_ae_results['precent_by_predictions']*100)
    gui_output += "\n\nFinal Result:\n\t<"
    conclusion = compare_docs.monkey_results['result'] + ">" if\
         compare_docs.monkey_results['result'] == compare_docs.letters_ae_results['result']\
         else "Conflict>"

    gui_output += conclusion

    conclusion2 = "\n\tWith AE by predictions:\n\t<"
    conclusion2 += compare_docs.monkey_results['result'] + ">" if\
         compare_docs.monkey_results['result'] == compare_docs.letters_ae_results['result_by_predictions']\
         else "Conflict>"

    gui_output += conclusion2
    if queue is not None:
        queue.put(gui_output)

    print(output)
 def test_setSignatureUsingVectorPermutations(self): 
     dimensions, signatureLength = 53, 13
     phraseTextAndDimensionMap = TwoWayMap()
     for i in range(dimensions): phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i)
     phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap()
     for i in range(dimensions-50): phraseTextAndDimensionMapWithMissingDimensions.set(TwoWayMap.MAP_FORWARD, i,i)
     
     unitVector = RandomGaussianUnitVector(dimensions=dimensions, mu=0, sigma=1)
     vectorPermutations = VectorPermutation.getPermutations(signatureLength, dimensions, unitVector)
     permutatedUnitVectors = [unitVector.getPermutedVector(r) for r in vectorPermutations]
     documentVector = VectorGenerator.getRandomGaussianUnitVector(dimension=dimensions, mu=0, sigma=1)
     documentWithSignatureByVectors=Document(1, documentVector)
     documentWithSignatureByVectorPermutations=Document(2, documentVector)
     documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMap)
     documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMap)
     self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
     documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMapWithMissingDimensions)
     documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMapWithMissingDimensions)
     self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
Exemple #8
0
 def createDocumentFromLine(docId, line):
     vector = Vector()
     words = line.split()
     for word in words[1:]:
         if word not in wordToDimensionMap:
             wordToDimensionMap[word] = len(wordToDimensionMap)
         wordDimension = wordToDimensionMap[word]
         if wordDimension not in vector: vector[wordDimension] = 1
         else: vector[wordDimension] += 1
     return Document(docId, vector, clusterId=words[0])
Exemple #9
0
 def test_removeDocument_documents(self):
     newDocModifiedWithExistingSignature = Document(
         3,
         VectorGenerator.getRandomGaussianUnitVector(
             dimension=self.dimension, mu=0, sigma=1))
     newDocModifiedWithExistingSignature.signature = Signature(
         self.doc1.signature.to01())
     self.pm.addDocument(newDocModifiedWithExistingSignature)
     self.assertEqual(
         self.pm.signatureTrie[self.doc1.signature.permutate(
             self.pm).to01()], set([1, 3]))
     self.pm.removeDocument(newDocModifiedWithExistingSignature)
     self.assertEqual(
         self.pm.signatureTrie[self.doc1.signature.permutate(
             self.pm).to01()], set([1]))
     self.pm.removeDocument(self.doc1)
     self.assertEqual(
         None,
         self.pm.signatureTrie.get(
             self.doc1.signature.permutate(self.pm).to01()))
 def test_setSignatureUsingVectors(self):
     phraseTextAndDimensionMap = TwoWayMap()
     phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'a', 1)
     phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'b', 2)
     documentWithDimensionsInVector = Document(1, {'a':1, 'b':4})
     documentWithDimensionsNotInVector = Document(1, {'a':1, 'c':4})
     vectors = [ Vector({1: 3/5., 2: -4/5.}), Vector({1:-5/13., 2: 12/13.})]
     documentWithDimensionsInVector.setSignatureUsingVectors(vectors, phraseTextAndDimensionMap)
     documentWithDimensionsNotInVector.setSignatureUsingVectors(vectors, phraseTextAndDimensionMap)
     self.assertEqual(Signature('01'), documentWithDimensionsInVector.signature)
     self.assertEqual(Signature('10'), documentWithDimensionsNotInVector.signature)
Exemple #11
0
def test_all_same(test_random_different=0):
    b_files = []
    all_docs = []
    all_files = []
    s = Stats()

    for _, _, files in os.walk(_global.DATA_PATH):
        b_files = [x for x in files if 'b' in x]
    for _, _, files in os.walk(_global.DATA_PATH):
        b_files = [x for x in files if 'b' in x]
        a_files = [x.replace('b', '') for x in b_files]
        all_files = a_files + b_files

    for file_name in all_files:
        print("Get Document obj for: {}".format(file_name))
        doc = Document(file_name)
        doc = init_doc(doc)
        all_docs.append(doc)

    for file_name in b_files:
        doc1 = get_doc_by_name(all_docs, file_name)
        doc2 = get_doc_by_name(all_docs, file_name.replace('b', ''))
        print("\n---------------------")
        print("Test: {} {}".format(doc1.name, doc2.name))
        s.same_author = True
        compare_docs = CompareDocuments(doc1, doc2)
        get_ae_monkey_results(s, compare_docs)
        s.count_num_of_tests += 1

    if test_random_different != 0:
        for i in range(test_random_different):
            sampled_list = random.sample(all_files, 2)
            doc1 = get_doc_by_name(all_docs, sampled_list[0])
            doc2 = get_doc_by_name(all_docs, sampled_list[1])
            if doc1.name.replace('b','') == doc2.name or doc2.name.replace('b','') == doc1.name\
             or doc1.name == doc2.name:
                continue
            s.same_author = False
            print("\n---------------------")
            print("Test: {} {}".format(doc1.name, doc2.name))
            compare_docs = CompareDocuments(doc1, doc2)
            get_ae_monkey_results(s, compare_docs)
            s.count_num_of_tests += 1

    print_ae_monkey_results(s, len(b_files))
Exemple #12
0
 def test_setSignatureUsingVectors(self):
     phraseTextAndDimensionMap = TwoWayMap()
     phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'a', 1)
     phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'b', 2)
     documentWithDimensionsInVector = Document(1, {'a': 1, 'b': 4})
     documentWithDimensionsNotInVector = Document(1, {'a': 1, 'c': 4})
     vectors = [
         Vector({
             1: 3 / 5.,
             2: -4 / 5.
         }),
         Vector({
             1: -5 / 13.,
             2: 12 / 13.
         })
     ]
     documentWithDimensionsInVector.setSignatureUsingVectors(
         vectors, phraseTextAndDimensionMap)
     documentWithDimensionsNotInVector.setSignatureUsingVectors(
         vectors, phraseTextAndDimensionMap)
     self.assertEqual(Signature('01'),
                      documentWithDimensionsInVector.signature)
     self.assertEqual(Signature('10'),
                      documentWithDimensionsNotInVector.signature)
Exemple #13
0
def offlineLSHClusteringDemo():
    wordToDimensionMap = {}

    def createDocumentFromLine(docId, line):
        vector = Vector()
        words = line.split()
        for word in words[1:]:
            if word not in wordToDimensionMap:
                wordToDimensionMap[word] = len(wordToDimensionMap)
            wordDimension = wordToDimensionMap[word]
            if wordDimension not in vector: vector[wordDimension] = 1
            else: vector[wordDimension] += 1
        return Document(docId, vector, clusterId=words[0])

    dimensions = 53
    signatureLength = 13
    numberOfPermutations = 5

    unitVector = RandomGaussianUnitVector(dimensions=dimensions, mu=0, sigma=1)
    vectorPermutations = VectorPermutation.getPermutations(
        signatureLength, dimensions, unitVector)
    signaturePermutations = [
        SignaturePermutationWithTrie(signatureLength)
        for i in range(numberOfPermutations)
    ]

    permutatedUnitVectors = [
        unitVector.getPermutedVector(r) for r in vectorPermutations
    ]

    # Build LSH Model.
    # Read training documents.
    traningDocumentsMap = {}
    for docId, l in enumerate(
            FileIO.iterateLinesFromFile('../data/train_offline.dat')):
        traningDocumentsMap[docId] = createDocumentFromLine(docId, l)
    # Construct cluster vectors.
    clusterToDocumentsMap = defaultdict(list)
    for document in traningDocumentsMap.values():
        clusterToDocumentsMap[document.clusterId].append(document)
    clusterMap = {}
    for k, v in clusterToDocumentsMap.iteritems():
        clusterMap[k] = Document(docId=k,
                                 vector=Vector.getMeanVector(v),
                                 clusterId=k)

    # Create signatures and signaturePermutations for all the clusters.
    map(
        lambda document: document.setSignatureUsingVectors(
            permutatedUnitVectors), clusterMap.values())
    for permutation in signaturePermutations:
        for document in clusterMap.values():
            permutation.addDocument(document)

    # Testing the model.
    # Read testing documents.
    testDocumentsMap = {}
    for docId, l in enumerate(
            FileIO.iterateLinesFromFile('../data/test_offline.dat')):
        testDocumentsMap[docId] = createDocumentFromLine(docId, l)
    # Create signatures for test documents
    map(
        lambda document: document.setSignatureUsingVectors(
            permutatedUnitVectors), testDocumentsMap.values())

    predicted, labels = [], []
    for t in testDocumentsMap.values():
        possibleNearestClusters = reduce(
            lambda x, y: x.union(y),
            (permutation.getNearestDocuments(t)
             for permutation in signaturePermutations), set())
        predictedClass = max(
            ((clusterId, clusterMap[clusterId].cosineSimilarity(t))
             for clusterId in possibleNearestClusters),
            key=itemgetter(1))
        predicted.append(predictedClass[0])
        labels.append(t.clusterId)
    return EvaluationMetrics.purity(predicted, labels)
Exemple #14
0
# # from gensim.models.ldamulticore import LdaMulticore
from os import getcwd

# NUM_TOPIC = 32
# NUM_WORDS = 10
# NUM_LINE = 10000
# NUM_PASS = 50

NUM_TOPIC = 5
NUM_WORDS = 10
NUM_LINE = 100
NUM_PASS = 10

print("start:")
docs_file = "data/doc.txt"
docs = []
line_counter = 0
with open(docs_file, 'r') as f:
    for line in f:
        splits = line.strip().split('\t')
        assert len(splits) == 2, len(splits)
        content = splits[-1]
        docs.append(Document(content).get_string_clean())
        line_counter += 1
        print("line counter:{}".format(line_counter))
        if line_counter == NUM_LINE:
            break
for i in range(3):
    print(i, docs[i])
print("=============================================")
def createDocumentFromLine(docId, line):
    vector, words = Vector(), line.split()
    for word in words[1:]:
        if word not in vector: vector[word] = 1
        else: vector[word] += 1
    return Document(words[0], vector)
Exemple #16
0
class SignaturePermutationTests(unittest.TestCase):
    def setUp(self):
        self.dimension, self.signatureLength = 50, 23
        self.phraseTextAndDimensionMap = TwoWayMap()
        for i in range(self.dimension):
            self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i)
        self.unitRandomVectors = [
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1)
            for i in range(self.signatureLength)
        ]
        self.doc1 = Document(
            1,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        self.doc2 = Document(
            2,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        self.doc1.setSignatureUsingVectors(self.unitRandomVectors,
                                           self.phraseTextAndDimensionMap)
        self.doc2.setSignatureUsingVectors(self.unitRandomVectors,
                                           self.phraseTextAndDimensionMap)
        self.pm = SignaturePermutationWithTrie(
            signatureLength=self.signatureLength)
        self.pm.addDocument(self.doc1)
        self.pm.addDocument(self.doc2)

    def test_addDocument_newKey(self):
        doc1 = Document(
            1,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        doc1.setSignatureUsingVectors(self.unitRandomVectors,
                                      self.phraseTextAndDimensionMap)
        pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
        pm.addDocument(doc1)
        self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()],
                         set([1]))

    def test_addDocument_existingKey(self):
        newDocModifiedWithExistingSignature = Document(
            3,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        newDocModifiedWithExistingSignature.signature = Signature(
            self.doc1.signature.to01())
        self.pm.addDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(
            self.pm.signatureTrie[self.doc1.signature.permutate(
                self.pm).to01()], set([1, 3]))

    def test_getNearestDocument_usingAKeyAlreadyInTrie(self):
        self.assertEqual(self.pm.getNearestDocuments(self.doc1), set([1]))

    def test_getNearestDocument_usingANearbyKeyInTrie(self):
        digitReplacement = {'0': '1', '1': '0'}
        newDocWithANearbySignature = Document(
            3,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        exactSignature = self.doc1.signature.to01()
        newDocWithANearbySignature.signature = Signature(
            exactSignature[:-1] + digitReplacement[exactSignature[-1]])
        self.assertNotEquals(self.doc1.signature.to01(),
                             newDocWithANearbySignature.signature.to01())
        self.assertEqual(
            self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])
        )  # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!

    def test_getNearestDocument_emptyTrie(self):
        permutationWithEmptyTrie = SignaturePermutationWithTrie(
            signatureLength=self.signatureLength)
        self.assertEqual(
            permutationWithEmptyTrie.getNearestDocuments(self.doc1), set())

    def test_removeDocument_documents(self):
        newDocModifiedWithExistingSignature = Document(
            3,
            VectorGenerator.getRandomGaussianUnitVector(
                dimension=self.dimension, mu=0, sigma=1))
        newDocModifiedWithExistingSignature.signature = Signature(
            self.doc1.signature.to01())
        self.pm.addDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(
            self.pm.signatureTrie[self.doc1.signature.permutate(
                self.pm).to01()], set([1, 3]))
        self.pm.removeDocument(newDocModifiedWithExistingSignature)
        self.assertEqual(
            self.pm.signatureTrie[self.doc1.signature.permutate(
                self.pm).to01()], set([1]))
        self.pm.removeDocument(self.doc1)
        self.assertEqual(
            None,
            self.pm.signatureTrie.get(
                self.doc1.signature.permutate(self.pm).to01()))

    def test_resetSignatureTrie(self):
        self.assertTrue(len(self.pm.signatureTrie) > 0)
        self.pm.resetSignatureDataStructure()
        self.assertTrue(len(self.pm.signatureTrie) == 0)
Exemple #17
0
    def test_setSignatureUsingVectorPermutations(self):
        dimensions, signatureLength = 53, 13
        phraseTextAndDimensionMap = TwoWayMap()
        for i in range(dimensions):
            phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i)
        phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap()
        for i in range(dimensions - 50):
            phraseTextAndDimensionMapWithMissingDimensions.set(
                TwoWayMap.MAP_FORWARD, i, i)

        unitVector = RandomGaussianUnitVector(dimensions=dimensions,
                                              mu=0,
                                              sigma=1)
        vectorPermutations = VectorPermutation.getPermutations(
            signatureLength, dimensions, unitVector)
        permutatedUnitVectors = [
            unitVector.getPermutedVector(r) for r in vectorPermutations
        ]
        documentVector = VectorGenerator.getRandomGaussianUnitVector(
            dimension=dimensions, mu=0, sigma=1)
        documentWithSignatureByVectors = Document(1, documentVector)
        documentWithSignatureByVectorPermutations = Document(2, documentVector)
        documentWithSignatureByVectors.setSignatureUsingVectors(
            permutatedUnitVectors, phraseTextAndDimensionMap)
        documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(
            unitVector, vectorPermutations, phraseTextAndDimensionMap)
        self.assertEqual(documentWithSignatureByVectors.signature,
                         documentWithSignatureByVectorPermutations.signature)
        documentWithSignatureByVectors.setSignatureUsingVectors(
            permutatedUnitVectors,
            phraseTextAndDimensionMapWithMissingDimensions)
        documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(
            unitVector, vectorPermutations,
            phraseTextAndDimensionMapWithMissingDimensions)
        self.assertEqual(documentWithSignatureByVectors.signature,
                         documentWithSignatureByVectorPermutations.signature)
 def test_addDocument_newKey(self):
     doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1))
     doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap)
     pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength)
     pm.addDocument(doc1)
     self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1]))