def testResolution(self): p = ScalarEncoderParameters() p.activeBits = 10 p.minimum = 0 p.maximum = 100 p.resolution = .5 enc = ScalarEncoder(p) sdr1 = SDR( enc.parameters.size ) sdr2 = SDR( enc.parameters.size ) enc.encode( .0, sdr1 ) enc.encode( .1, sdr2 ) assert( sdr1 == sdr2 ) enc.encode( .0, sdr1 ) enc.encode( .6, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 9 ) enc.encode( 70, sdr1 ) enc.encode( 72.5, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 5 ) enc.encode( 70, sdr1 ) enc.encode( 75, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 0 ) enc.encode( 60, sdr1 ) enc.encode( 80, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 0 )
def testRadius(self): p = ScalarEncoderParameters() p.activeBits = 10 p.minimum = 0 p.maximum = 100 p.radius = 10 enc = ScalarEncoder(p) sdr1 = SDR( enc.parameters.size ) sdr2 = SDR( enc.parameters.size ) enc.encode( 77, sdr1 ) enc.encode( 77, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 10 ) enc.encode( 0, sdr1 ) enc.encode( 1, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 9 ) enc.encode( 60, sdr1 ) enc.encode( 69, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 1 ) enc.encode( 45, sdr1 ) enc.encode( 55, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 0 )
def testUnicode(self): testDocUni1 = [ "\u0395\u0396\u0397\u0398\u0399", "\u0400\u0401\u0402\u0403\u0404", "\u0405\u0406\u0407\u0408\u0409"] testDocUni2 = [ "\u0395\u0396\u0397\u0398\u0399\u0410", "\u0400\u0401\u0402\u0403\u0404\u0410", "\u0405\u0406\u0407\u0408\u0409\u0410"] params = SimHashDocumentEncoderParameters() params.size = 400 params.sparsity = 0.33 # unicode 'tokenSimilarity' ON params.tokenSimilarity = True encoder1 = SimHashDocumentEncoder(params) output1 = SDR(params.size) output2 = SDR(params.size) encoder1.encode(testDocUni1, output1) encoder1.encode(testDocUni2, output2) assert(output1.getOverlap(output2) > 65) # unicode 'tokenSimilarity' OFF params.tokenSimilarity = False encoder2 = SimHashDocumentEncoder(params) output1.zero() output2.zero() encoder2.encode(testDocUni1, output1) encoder2.encode(testDocUni2, output2) assert(output1.getOverlap(output2) < 65)
def testGetOverlap(self): A = SDR((103, )) B = SDR((103, )) assert (A.getOverlap(B) == 0) A.dense[:10] = 1 B.dense[:20] = 1 A.dense = A.dense B.dense = B.dense assert (A.getOverlap(B) == 10) A.dense[:20] = 1 A.dense = A.dense assert (A.getOverlap(B) == 20) A.dense[50:60] = 1 B.dense[0] = 0 A.dense = A.dense B.dense = B.dense assert (A.getOverlap(B) == 19) # Test wrong dimensions C = SDR((1, 1, 1, 1, 103)) C.randomize(.5) try: A.getOverlap(C) except RuntimeError: pass else: self.fail()
def testAddNoise(self): A = SDR((103, )) B = SDR((103, )) A.randomize(.1) B.setSDR(A) A.addNoise(.5) assert (A.getOverlap(B) == 5) # Check different seed makes different results. A.randomize(.3, 42) B.randomize(.3, 42) A.addNoise(.5) B.addNoise(.5) assert (A != B) # Check same seed makes same results. A.randomize(.3, 42) B.randomize(.3, 42) A.addNoise(.5, 42) B.addNoise(.5, 42) assert (A == B) # Check that it returns itself. C = A.addNoise(.5) assert (C is A)
def testTokenSimilarity(self): params = SimHashDocumentEncoderParameters() params.size = 400 params.sparsity = 0.33 params.caseSensitivity = True # tokenSimilarity ON params.tokenSimilarity = True encoder1 = SimHashDocumentEncoder(params) output1 = SDR(params.size) output2 = SDR(params.size) output3 = SDR(params.size) output4 = SDR(params.size) encoder1.encode(testDoc1, output1) encoder1.encode(testDoc2, output2) encoder1.encode(testDoc3, output3) encoder1.encode(testDoc4, output4) assert(output3.getOverlap(output4) > output2.getOverlap(output3)) assert(output2.getOverlap(output3) > output1.getOverlap(output3)) assert(output1.getOverlap(output3) > output1.getOverlap(output4)) # tokenSimilarity OFF params.tokenSimilarity = False encoder2 = SimHashDocumentEncoder(params) output1.zero() output2.zero() output3.zero() output4.zero() encoder2.encode(testDoc1, output1) encoder2.encode(testDoc2, output2) encoder2.encode(testDoc3, output3) encoder2.encode(testDoc4, output4) assert(output1.getOverlap(output2) > output2.getOverlap(output3)) assert(output2.getOverlap(output3) > output3.getOverlap(output4)) assert(output3.getOverlap(output4) > output1.getOverlap(output3))