def test_getNearestDocument_usingANearbyKeyInTrie(self): digitReplacement = {'0': '1', '1': '0'} newDocWithANearbySignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) exactSignature = self.doc1.signature.to01() newDocWithANearbySignature.signature = Signature(exactSignature[:-1]+digitReplacement[exactSignature[-1]]) self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01()) self.assertEqual(self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
def test_removeDocument_documents(self): newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01()) self.pm.addDocument(newDocModifiedWithExistingSignature) self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3])) self.pm.removeDocument(newDocModifiedWithExistingSignature) self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1])) self.pm.removeDocument(self.doc1) self.assertEqual(None, self.pm.signatureTrie.get(self.doc1.signature.permutate(self.pm).to01()))
def setUp(self): self.dimension, self.signatureLength = 50, 23 self.phraseTextAndDimensionMap = TwoWayMap() for i in range(self.dimension): self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i) self.unitRandomVectors = [VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1) for i in range(self.signatureLength)] self.doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) self.doc2=Document(2, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) self.doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap); self.doc2.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) self.pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength) self.pm.addDocument(self.doc1) self.pm.addDocument(self.doc2)
def test_getNearestDocument_usingANearbyKeyInTrie(self): digitReplacement = {'0': '1', '1': '0'} newDocWithANearbySignature = Document( 3, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) exactSignature = self.doc1.signature.to01() newDocWithANearbySignature.signature = Signature( exactSignature[:-1] + digitReplacement[exactSignature[-1]]) self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01()) self.assertEqual( self.pm.getNearestDocuments(newDocWithANearbySignature), set([1]) ) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK!
class SignaturePermutationTests(unittest.TestCase): def setUp(self): self.dimension, self.signatureLength = 50, 23 self.phraseTextAndDimensionMap = TwoWayMap() for i in range(self.dimension): self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i) self.unitRandomVectors = [VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1) for i in range(self.signatureLength)] self.doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) self.doc2=Document(2, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) self.doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap); self.doc2.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) self.pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength) self.pm.addDocument(self.doc1) self.pm.addDocument(self.doc2) def test_addDocument_newKey(self): doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength) pm.addDocument(doc1) self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1])) def test_addDocument_existingKey(self): newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01()) self.pm.addDocument(newDocModifiedWithExistingSignature) self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3])) def test_getNearestDocument_usingAKeyAlreadyInTrie(self): self.assertEqual(self.pm.getNearestDocuments(self.doc1), set([1])) def test_getNearestDocument_usingANearbyKeyInTrie(self): digitReplacement = {'0': '1', '1': '0'} newDocWithANearbySignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) exactSignature = self.doc1.signature.to01() newDocWithANearbySignature.signature = Signature(exactSignature[:-1]+digitReplacement[exactSignature[-1]]) self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01()) self.assertEqual(self.pm.getNearestDocuments(newDocWithANearbySignature), set([1])) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK! def test_getNearestDocument_emptyTrie(self): permutationWithEmptyTrie = SignaturePermutationWithTrie(signatureLength=self.signatureLength) self.assertEqual(permutationWithEmptyTrie.getNearestDocuments(self.doc1), set()) def test_removeDocument_documents(self): newDocModifiedWithExistingSignature = Document(3, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) newDocModifiedWithExistingSignature.signature = Signature(self.doc1.signature.to01()) self.pm.addDocument(newDocModifiedWithExistingSignature) self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1, 3])) self.pm.removeDocument(newDocModifiedWithExistingSignature) self.assertEqual(self.pm.signatureTrie[self.doc1.signature.permutate(self.pm).to01()], set([1])) self.pm.removeDocument(self.doc1) self.assertEqual(None, self.pm.signatureTrie.get(self.doc1.signature.permutate(self.pm).to01())) def test_resetSignatureTrie(self): self.assertTrue(len(self.pm.signatureTrie)>0) self.pm.resetSignatureDataStructure() self.assertTrue(len(self.pm.signatureTrie)==0)
def main_app(doc_name1, doc_name2, queue=None, test_mode=False): _global.init('hebrew', test_mode=test_mode) output = "" gui_output = "" ''' Prepare Documents, Detection & Recognition Phases ''' doc1 = init_doc(Document(doc_name1)) doc2 = init_doc(Document(doc_name2)) ''' Verification Phase ''' compare_docs = CompareDocuments(doc1, doc2) compare_docs.monkey_results() compare_docs.letters_autoencoder_results() s_count = compare_docs.ssim_count s_pred = s_count / compare_docs.ssim_total print("Ssim: count: {}, pred: {}".format(s_count, s_pred)) output = output + "Monkey Result:{}\nAE result: {}".format(\ compare_docs.monkey_results,\ compare_docs.letters_ae_results) gui_output += "Algo1: Monkey Result:\n\t<{0}> [Confident: {1:.2f}%]\n".format(compare_docs.monkey_results['result'],\ compare_docs.monkey_results['precent']*100) gui_output += "Algo2: AutoEncoder Letters Result:\n\t<{}> [Confident: {:.2f}%]\n\tResult By Predictions:\n\t<{}> [Confident: {:.2f}%]\n".format(\ compare_docs.letters_ae_results['result'],\ compare_docs.letters_ae_results['precent']*100, compare_docs.letters_ae_results['result_by_predictions'],\ compare_docs.letters_ae_results['precent_by_predictions']*100) gui_output += "\n\nFinal Result:\n\t<" conclusion = compare_docs.monkey_results['result'] + ">" if\ compare_docs.monkey_results['result'] == compare_docs.letters_ae_results['result']\ else "Conflict>" gui_output += conclusion conclusion2 = "\n\tWith AE by predictions:\n\t<" conclusion2 += compare_docs.monkey_results['result'] + ">" if\ compare_docs.monkey_results['result'] == compare_docs.letters_ae_results['result_by_predictions']\ else "Conflict>" gui_output += conclusion2 if queue is not None: queue.put(gui_output) print(output)
def test_setSignatureUsingVectorPermutations(self): dimensions, signatureLength = 53, 13 phraseTextAndDimensionMap = TwoWayMap() for i in range(dimensions): phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i,i) phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap() for i in range(dimensions-50): phraseTextAndDimensionMapWithMissingDimensions.set(TwoWayMap.MAP_FORWARD, i,i) unitVector = RandomGaussianUnitVector(dimensions=dimensions, mu=0, sigma=1) vectorPermutations = VectorPermutation.getPermutations(signatureLength, dimensions, unitVector) permutatedUnitVectors = [unitVector.getPermutedVector(r) for r in vectorPermutations] documentVector = VectorGenerator.getRandomGaussianUnitVector(dimension=dimensions, mu=0, sigma=1) documentWithSignatureByVectors=Document(1, documentVector) documentWithSignatureByVectorPermutations=Document(2, documentVector) documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMap) documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMap) self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature) documentWithSignatureByVectors.setSignatureUsingVectors(permutatedUnitVectors, phraseTextAndDimensionMapWithMissingDimensions) documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations(unitVector, vectorPermutations, phraseTextAndDimensionMapWithMissingDimensions) self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
def createDocumentFromLine(docId, line): vector = Vector() words = line.split() for word in words[1:]: if word not in wordToDimensionMap: wordToDimensionMap[word] = len(wordToDimensionMap) wordDimension = wordToDimensionMap[word] if wordDimension not in vector: vector[wordDimension] = 1 else: vector[wordDimension] += 1 return Document(docId, vector, clusterId=words[0])
def test_removeDocument_documents(self): newDocModifiedWithExistingSignature = Document( 3, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) newDocModifiedWithExistingSignature.signature = Signature( self.doc1.signature.to01()) self.pm.addDocument(newDocModifiedWithExistingSignature) self.assertEqual( self.pm.signatureTrie[self.doc1.signature.permutate( self.pm).to01()], set([1, 3])) self.pm.removeDocument(newDocModifiedWithExistingSignature) self.assertEqual( self.pm.signatureTrie[self.doc1.signature.permutate( self.pm).to01()], set([1])) self.pm.removeDocument(self.doc1) self.assertEqual( None, self.pm.signatureTrie.get( self.doc1.signature.permutate(self.pm).to01()))
def test_setSignatureUsingVectors(self): phraseTextAndDimensionMap = TwoWayMap() phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'a', 1) phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'b', 2) documentWithDimensionsInVector = Document(1, {'a':1, 'b':4}) documentWithDimensionsNotInVector = Document(1, {'a':1, 'c':4}) vectors = [ Vector({1: 3/5., 2: -4/5.}), Vector({1:-5/13., 2: 12/13.})] documentWithDimensionsInVector.setSignatureUsingVectors(vectors, phraseTextAndDimensionMap) documentWithDimensionsNotInVector.setSignatureUsingVectors(vectors, phraseTextAndDimensionMap) self.assertEqual(Signature('01'), documentWithDimensionsInVector.signature) self.assertEqual(Signature('10'), documentWithDimensionsNotInVector.signature)
def test_all_same(test_random_different=0): b_files = [] all_docs = [] all_files = [] s = Stats() for _, _, files in os.walk(_global.DATA_PATH): b_files = [x for x in files if 'b' in x] for _, _, files in os.walk(_global.DATA_PATH): b_files = [x for x in files if 'b' in x] a_files = [x.replace('b', '') for x in b_files] all_files = a_files + b_files for file_name in all_files: print("Get Document obj for: {}".format(file_name)) doc = Document(file_name) doc = init_doc(doc) all_docs.append(doc) for file_name in b_files: doc1 = get_doc_by_name(all_docs, file_name) doc2 = get_doc_by_name(all_docs, file_name.replace('b', '')) print("\n---------------------") print("Test: {} {}".format(doc1.name, doc2.name)) s.same_author = True compare_docs = CompareDocuments(doc1, doc2) get_ae_monkey_results(s, compare_docs) s.count_num_of_tests += 1 if test_random_different != 0: for i in range(test_random_different): sampled_list = random.sample(all_files, 2) doc1 = get_doc_by_name(all_docs, sampled_list[0]) doc2 = get_doc_by_name(all_docs, sampled_list[1]) if doc1.name.replace('b','') == doc2.name or doc2.name.replace('b','') == doc1.name\ or doc1.name == doc2.name: continue s.same_author = False print("\n---------------------") print("Test: {} {}".format(doc1.name, doc2.name)) compare_docs = CompareDocuments(doc1, doc2) get_ae_monkey_results(s, compare_docs) s.count_num_of_tests += 1 print_ae_monkey_results(s, len(b_files))
def test_setSignatureUsingVectors(self): phraseTextAndDimensionMap = TwoWayMap() phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'a', 1) phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'b', 2) documentWithDimensionsInVector = Document(1, {'a': 1, 'b': 4}) documentWithDimensionsNotInVector = Document(1, {'a': 1, 'c': 4}) vectors = [ Vector({ 1: 3 / 5., 2: -4 / 5. }), Vector({ 1: -5 / 13., 2: 12 / 13. }) ] documentWithDimensionsInVector.setSignatureUsingVectors( vectors, phraseTextAndDimensionMap) documentWithDimensionsNotInVector.setSignatureUsingVectors( vectors, phraseTextAndDimensionMap) self.assertEqual(Signature('01'), documentWithDimensionsInVector.signature) self.assertEqual(Signature('10'), documentWithDimensionsNotInVector.signature)
def offlineLSHClusteringDemo(): wordToDimensionMap = {} def createDocumentFromLine(docId, line): vector = Vector() words = line.split() for word in words[1:]: if word not in wordToDimensionMap: wordToDimensionMap[word] = len(wordToDimensionMap) wordDimension = wordToDimensionMap[word] if wordDimension not in vector: vector[wordDimension] = 1 else: vector[wordDimension] += 1 return Document(docId, vector, clusterId=words[0]) dimensions = 53 signatureLength = 13 numberOfPermutations = 5 unitVector = RandomGaussianUnitVector(dimensions=dimensions, mu=0, sigma=1) vectorPermutations = VectorPermutation.getPermutations( signatureLength, dimensions, unitVector) signaturePermutations = [ SignaturePermutationWithTrie(signatureLength) for i in range(numberOfPermutations) ] permutatedUnitVectors = [ unitVector.getPermutedVector(r) for r in vectorPermutations ] # Build LSH Model. # Read training documents. traningDocumentsMap = {} for docId, l in enumerate( FileIO.iterateLinesFromFile('../data/train_offline.dat')): traningDocumentsMap[docId] = createDocumentFromLine(docId, l) # Construct cluster vectors. clusterToDocumentsMap = defaultdict(list) for document in traningDocumentsMap.values(): clusterToDocumentsMap[document.clusterId].append(document) clusterMap = {} for k, v in clusterToDocumentsMap.iteritems(): clusterMap[k] = Document(docId=k, vector=Vector.getMeanVector(v), clusterId=k) # Create signatures and signaturePermutations for all the clusters. map( lambda document: document.setSignatureUsingVectors( permutatedUnitVectors), clusterMap.values()) for permutation in signaturePermutations: for document in clusterMap.values(): permutation.addDocument(document) # Testing the model. # Read testing documents. testDocumentsMap = {} for docId, l in enumerate( FileIO.iterateLinesFromFile('../data/test_offline.dat')): testDocumentsMap[docId] = createDocumentFromLine(docId, l) # Create signatures for test documents map( lambda document: document.setSignatureUsingVectors( permutatedUnitVectors), testDocumentsMap.values()) predicted, labels = [], [] for t in testDocumentsMap.values(): possibleNearestClusters = reduce( lambda x, y: x.union(y), (permutation.getNearestDocuments(t) for permutation in signaturePermutations), set()) predictedClass = max( ((clusterId, clusterMap[clusterId].cosineSimilarity(t)) for clusterId in possibleNearestClusters), key=itemgetter(1)) predicted.append(predictedClass[0]) labels.append(t.clusterId) return EvaluationMetrics.purity(predicted, labels)
# # from gensim.models.ldamulticore import LdaMulticore from os import getcwd # NUM_TOPIC = 32 # NUM_WORDS = 10 # NUM_LINE = 10000 # NUM_PASS = 50 NUM_TOPIC = 5 NUM_WORDS = 10 NUM_LINE = 100 NUM_PASS = 10 print("start:") docs_file = "data/doc.txt" docs = [] line_counter = 0 with open(docs_file, 'r') as f: for line in f: splits = line.strip().split('\t') assert len(splits) == 2, len(splits) content = splits[-1] docs.append(Document(content).get_string_clean()) line_counter += 1 print("line counter:{}".format(line_counter)) if line_counter == NUM_LINE: break for i in range(3): print(i, docs[i]) print("=============================================")
def createDocumentFromLine(docId, line): vector, words = Vector(), line.split() for word in words[1:]: if word not in vector: vector[word] = 1 else: vector[word] += 1 return Document(words[0], vector)
class SignaturePermutationTests(unittest.TestCase): def setUp(self): self.dimension, self.signatureLength = 50, 23 self.phraseTextAndDimensionMap = TwoWayMap() for i in range(self.dimension): self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i) self.unitRandomVectors = [ VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1) for i in range(self.signatureLength) ] self.doc1 = Document( 1, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) self.doc2 = Document( 2, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) self.doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) self.doc2.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) self.pm = SignaturePermutationWithTrie( signatureLength=self.signatureLength) self.pm.addDocument(self.doc1) self.pm.addDocument(self.doc2) def test_addDocument_newKey(self): doc1 = Document( 1, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength) pm.addDocument(doc1) self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1])) def test_addDocument_existingKey(self): newDocModifiedWithExistingSignature = Document( 3, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) newDocModifiedWithExistingSignature.signature = Signature( self.doc1.signature.to01()) self.pm.addDocument(newDocModifiedWithExistingSignature) self.assertEqual( self.pm.signatureTrie[self.doc1.signature.permutate( self.pm).to01()], set([1, 3])) def test_getNearestDocument_usingAKeyAlreadyInTrie(self): self.assertEqual(self.pm.getNearestDocuments(self.doc1), set([1])) def test_getNearestDocument_usingANearbyKeyInTrie(self): digitReplacement = {'0': '1', '1': '0'} newDocWithANearbySignature = Document( 3, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) exactSignature = self.doc1.signature.to01() newDocWithANearbySignature.signature = Signature( exactSignature[:-1] + digitReplacement[exactSignature[-1]]) self.assertNotEquals(self.doc1.signature.to01(), newDocWithANearbySignature.signature.to01()) self.assertEqual( self.pm.getNearestDocuments(newDocWithANearbySignature), set([1]) ) # This assertion can sometimes fail because of randomization. Run the tests again. It's OK! def test_getNearestDocument_emptyTrie(self): permutationWithEmptyTrie = SignaturePermutationWithTrie( signatureLength=self.signatureLength) self.assertEqual( permutationWithEmptyTrie.getNearestDocuments(self.doc1), set()) def test_removeDocument_documents(self): newDocModifiedWithExistingSignature = Document( 3, VectorGenerator.getRandomGaussianUnitVector( dimension=self.dimension, mu=0, sigma=1)) newDocModifiedWithExistingSignature.signature = Signature( self.doc1.signature.to01()) self.pm.addDocument(newDocModifiedWithExistingSignature) self.assertEqual( self.pm.signatureTrie[self.doc1.signature.permutate( self.pm).to01()], set([1, 3])) self.pm.removeDocument(newDocModifiedWithExistingSignature) self.assertEqual( self.pm.signatureTrie[self.doc1.signature.permutate( self.pm).to01()], set([1])) self.pm.removeDocument(self.doc1) self.assertEqual( None, self.pm.signatureTrie.get( self.doc1.signature.permutate(self.pm).to01())) def test_resetSignatureTrie(self): self.assertTrue(len(self.pm.signatureTrie) > 0) self.pm.resetSignatureDataStructure() self.assertTrue(len(self.pm.signatureTrie) == 0)
def test_setSignatureUsingVectorPermutations(self): dimensions, signatureLength = 53, 13 phraseTextAndDimensionMap = TwoWayMap() for i in range(dimensions): phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, i, i) phraseTextAndDimensionMapWithMissingDimensions = TwoWayMap() for i in range(dimensions - 50): phraseTextAndDimensionMapWithMissingDimensions.set( TwoWayMap.MAP_FORWARD, i, i) unitVector = RandomGaussianUnitVector(dimensions=dimensions, mu=0, sigma=1) vectorPermutations = VectorPermutation.getPermutations( signatureLength, dimensions, unitVector) permutatedUnitVectors = [ unitVector.getPermutedVector(r) for r in vectorPermutations ] documentVector = VectorGenerator.getRandomGaussianUnitVector( dimension=dimensions, mu=0, sigma=1) documentWithSignatureByVectors = Document(1, documentVector) documentWithSignatureByVectorPermutations = Document(2, documentVector) documentWithSignatureByVectors.setSignatureUsingVectors( permutatedUnitVectors, phraseTextAndDimensionMap) documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations( unitVector, vectorPermutations, phraseTextAndDimensionMap) self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature) documentWithSignatureByVectors.setSignatureUsingVectors( permutatedUnitVectors, phraseTextAndDimensionMapWithMissingDimensions) documentWithSignatureByVectorPermutations.setSignatureUsingVectorPermutations( unitVector, vectorPermutations, phraseTextAndDimensionMapWithMissingDimensions) self.assertEqual(documentWithSignatureByVectors.signature, documentWithSignatureByVectorPermutations.signature)
def test_addDocument_newKey(self): doc1=Document(1, VectorGenerator.getRandomGaussianUnitVector(dimension=self.dimension, mu=0, sigma=1)) doc1.setSignatureUsingVectors(self.unitRandomVectors, self.phraseTextAndDimensionMap) pm = SignaturePermutationWithTrie(signatureLength=self.signatureLength) pm.addDocument(doc1) self.assertEqual(pm.signatureTrie[doc1.signature.permutate(pm).to01()], set([1]))