def testReadWrite(self): le = LogEncoder(w=5, resolution=0.1, minval=1, maxval=10000, name="amount", forced=True) originalValue = le.encode(1.0) proto1 = LogEncoderProto.new_message() le.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = LogEncoderProto.read(f) encoder = LogEncoder.read(proto2) self.assertIsInstance(encoder, LogEncoder) self.assertEqual(encoder.minScaledValue, le.minScaledValue) self.assertEqual(encoder.maxScaledValue, le.maxScaledValue) self.assertEqual(encoder.minval, le.minval) self.assertEqual(encoder.maxval, le.maxval) self.assertEqual(encoder.name, le.name) self.assertEqual(encoder.verbosity, le.verbosity) self.assertEqual(encoder.clipInput, le.clipInput) self.assertEqual(encoder.width, le.width) self.assertEqual(encoder.description, le.description) self.assertIsInstance(encoder.encoder, ScalarEncoder) self.assertTrue(numpy.array_equal(encoder.encode(1), originalValue)) self.assertEqual(le.decode(encoder.encode(1)), encoder.decode(le.encode(1))) # Feed in a new value and ensure the encodings match result1 = le.encode(10) result2 = encoder.encode(10) self.assertTrue(numpy.array_equal(result1, result2))
def testLogEncoder(self): # Create the encoder # use of forced=True is not recommended, but is used in the example for # readibility, see scalar.py le = LogEncoder(w=5, resolution=0.1, minval=1, maxval=10000, name="amount", forced=True) # Verify we're setting the description properly self.assertEqual(le.getDescription(), [("amount", 0)]) # Verify we're getting the correct field types types = le.getDecoderOutputFieldTypes() self.assertEqual(types[0], FieldMetaType.float) # Verify the encoder ends up with the correct width # # 10^0 -> 10^4 => 0 -> 4; With a resolution of 0.1 # 41 possible values plus padding = 4 = width 45 self.assertEqual(le.getWidth(), 45) # Verify we have the correct number of possible values self.assertEqual(len(le.getBucketValues()), 41) # Verify closeness calculations testTuples = [([1], [10000], 0.0), ([1], [1000], 0.25), ([1], [1], 1.0), ([1], [-200], 1.0)] for tm in testTuples: expected = tm[0] actual = tm[1] expectedResult = tm[2] self.assertEqual(le.closenessScores(expected, actual), expectedResult, "exp: %s act: %s expR: %s" % (str(expected), str(actual), str(expectedResult))) # Verify a value of 1.0 is encoded as expected value = 1.0 output = le.encode(value) # Our expected encoded representation of the value 1 is the first # w bits on in an array of len width. expected = [1, 1, 1, 1, 1] + 40 * [0] # Convert to numpy array expected = numpy.array(expected, dtype="uint8") self.assertTrue(numpy.array_equal(output, expected)) # Test reverse lookup decoded = le.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [1, 1])) # Verify an input representing a missing value is handled properly mvOutput = le.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(sum(mvOutput), 0) # Test top-down for all values value = le.minval while value <= le.maxval: output = le.encode(value) topDown = le.topDownCompute(output) # Do the scaling by hand here. scaledVal = math.log10(value) # Find the range of values that would also produce this top down # output. minTopDown = math.pow(10, (scaledVal - le.encoder.resolution)) maxTopDown = math.pow(10, (scaledVal + le.encoder.resolution)) # Verify the range surrounds this scaled val self.assertGreaterEqual(topDown.value, minTopDown) self.assertLessEqual(topDown.value, maxTopDown) # Test bucket support bucketIndices = le.getBucketIndices(value) topDown = le.getBucketInfo(bucketIndices)[0] # Verify our reconstructed value is in the valid range self.assertGreaterEqual(topDown.value, minTopDown) self.assertLessEqual(topDown.value, maxTopDown) # Same for the scalar value self.assertGreaterEqual(topDown.scalar, minTopDown) self.assertLessEqual(topDown.scalar, maxTopDown) # That the encoding portion of our EncoderResult matched the result of # encode() self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Verify our reconstructed value is the same as the bucket value bucketValues = le.getBucketValues() self.assertEqual(topDown.value, bucketValues[bucketIndices[0]]) # Next value scaledVal += le.encoder.resolution / 4.0 value = math.pow(10, scaledVal) # Verify next power of 10 encoding output = le.encode(100) # increase of 2 decades = 20 decibels # bit 0, 1 are padding; bit 3 is 1, ..., bit 22 is 20 (23rd bit) expected = 20 * [0] + [1, 1, 1, 1, 1] + 20 * [0] expected = numpy.array(expected, dtype="uint8") self.assertTrue(numpy.array_equal(output, expected)) # Test reverse lookup decoded = le.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [100, 100])) # Verify next power of 10 encoding output = le.encode(10000) expected = 40 * [0] + [1, 1, 1, 1, 1] expected = numpy.array(expected, dtype="uint8") self.assertTrue(numpy.array_equal(output, expected)) # Test reverse lookup decoded = le.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10000, 10000]))
def testLogEncoder(self): # Create the encoder # use of forced=True is not recommended, but is used in the example for # readibility, see scalar.py le = LogEncoder(w=5, resolution=0.1, minval=1, maxval=10000, name="amount", forced=True) # Verify we're setting the description properly self.assertEqual(le.getDescription(), [("amount", 0)]) # Verify we're getting the correct field types types = le.getDecoderOutputFieldTypes() self.assertEqual(types[0], FieldMetaType.float) # Verify the encoder ends up with the correct width # # 10^0 -> 10^4 => 0 -> 4; With a resolution of 0.1 # 41 possible values plus padding = 4 = width 45 self.assertEqual(le.getWidth(), 45) # Verify we have the correct number of possible values self.assertEqual(len(le.getBucketValues()), 41) # Verify closeness calculations testTuples = [([1], [10000], 0.0), ([1], [1000], 0.25), ([1], [1], 1.0), ([1], [-200], 1.0)] for tm in testTuples: expected = tm[0] actual = tm[1] expectedResult = tm[2] self.assertEqual( le.closenessScores(expected, actual), expectedResult, "exp: %s act: %s expR: %s" % (str(expected), str(actual), str(expectedResult))) # Verify a value of 1.0 is encoded as expected value = 1.0 output = le.encode(value) # Our expected encoded representation of the value 1 is the first # w bits on in an array of len width. expected = [1, 1, 1, 1, 1] + 40 * [0] # Convert to numpy array expected = numpy.array(expected, dtype="uint8") self.assertTrue(numpy.array_equal(output, expected)) # Test reverse lookup decoded = le.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [1, 1])) # Verify an input representing a missing value is handled properly mvOutput = le.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(sum(mvOutput), 0) # Test top-down for all values value = le.minval while value <= le.maxval: output = le.encode(value) topDown = le.topDownCompute(output) # Do the scaling by hand here. scaledVal = math.log10(value) # Find the range of values that would also produce this top down # output. minTopDown = math.pow(10, (scaledVal - le.encoder.resolution)) maxTopDown = math.pow(10, (scaledVal + le.encoder.resolution)) # Verify the range surrounds this scaled val self.assertGreaterEqual(topDown.value, minTopDown) self.assertLessEqual(topDown.value, maxTopDown) # Test bucket support bucketIndices = le.getBucketIndices(value) topDown = le.getBucketInfo(bucketIndices)[0] # Verify our reconstructed value is in the valid range self.assertGreaterEqual(topDown.value, minTopDown) self.assertLessEqual(topDown.value, maxTopDown) # Same for the scalar value self.assertGreaterEqual(topDown.scalar, minTopDown) self.assertLessEqual(topDown.scalar, maxTopDown) # That the encoding portion of our EncoderResult matched the result of # encode() self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Verify our reconstructed value is the same as the bucket value bucketValues = le.getBucketValues() self.assertEqual(topDown.value, bucketValues[int(bucketIndices[0])]) # Next value scaledVal += le.encoder.resolution / 4.0 value = math.pow(10, scaledVal) # Verify next power of 10 encoding output = le.encode(100) # increase of 2 decades = 20 decibels # bit 0, 1 are padding; bit 3 is 1, ..., bit 22 is 20 (23rd bit) expected = 20 * [0] + [1, 1, 1, 1, 1] + 20 * [0] expected = numpy.array(expected, dtype="uint8") self.assertTrue(numpy.array_equal(output, expected)) # Test reverse lookup decoded = le.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [100, 100])) # Verify next power of 10 encoding output = le.encode(10000) expected = 40 * [0] + [1, 1, 1, 1, 1] expected = numpy.array(expected, dtype="uint8") self.assertTrue(numpy.array_equal(output, expected)) # Test reverse lookup decoded = le.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10000, 10000]))