def testDecodeAndResolution(self): """Test the input description generation, top-down compute, and bucket support on a periodic encoder """ l = self._l v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) self.assertEqual(len(fieldNames), 1) self.assertEqual(fieldNames, fieldsDict.keys()) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) # Test with a "hole" decoded = l.decode(numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 7.5])) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 8])) self.assertTrue(numpy.array_equal(ranges[1], [1, 1])) # Test with something wider than w, no hole decoded = l.decode(numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 2.5])) # Test with 2 ranges decoded = l.decode(numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode(numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertTrue(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0]))
def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name='scalar', n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) print "\nTesting non-periodic encoder encoding, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a non-periodic # encoder v = l.minval print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) #Test min and max l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and there is no value greater than max or min l = ScalarEncoder(name='scalar', n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertTrue(decoded.value <= 141) self.assertTrue(decoded.value >= 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small number # non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test setting fieldStats after initialization if False: #TODO: remove all this? (and fieldstats from ScalarEncoder (if applicable) )? # Modified on 11/20/12 12:53 PM - setFieldStats not applicable for ScalarEncoder l = ScalarEncoder(n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) l = ScalarEncoder(name='scalar', n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) l = ScalarEncoder(name='scalar', n=14, w=5, minval=100, maxval=1000, periodic=False, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":10}}) print "\nTesting non-periodic encoding using setFieldStats, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all())
class ScalarEncoderTest(unittest.TestCase): """Unit tests for ScalarEncoder class""" def setUp(self): # use of forced is not recommended, but used here for readability, see scalar.py self._l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) ############################################################################ def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0) # -------------------------------------------------------------------- def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0) # ------------------------------------------------------------------------ def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("[1:8]", 0)]) l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("scalar", 0)]) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) self.assertEqual(l.resolution, 0.5) self.assertEqual(l.radius, 1.5) # Test that we get the same encoder when we construct it using resolution # instead of n def testCreateResolution(self): """Test that we get the same encoder when we construct it using resolution instead of n""" l = self._l d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=0.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation, top-down compute, and bucket # support on a periodic encoder def testDecodeAndResolution(self): """Testing periodic encoder decoding, resolution of """ l = self._l print l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) print "\nTesting periodic encoder decoding, resolution of %f..." % \ l.resolution # Test with a "hole" decoded = l.decode(numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [7.5, 7.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [7.5, 8]) \ and numpy.array_equal(ranges[1], [1, 1])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, no hole decoded = l.decode(numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [1.5, 2.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges decoded = l.decode(numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode(numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # ============================================================================ def testCloseness(self): """Test closenessScores for a periodic encoder""" encoder = ScalarEncoder(w=7, minval=0, maxval=7, radius=1, periodic=True, name="day of week", forced=True) scores = encoder.closenessScores((2, 4, 7), (4, 2, 1), fractional=False) for actual, score in itertools.izip((2, 2, 1), scores): self.assertEqual(actual, score) # ============================================================================ def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name='scalar', n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) print "\nTesting non-periodic encoder encoding, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a non-periodic # encoder v = l.minval print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) #Test min and max l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and there is no value greater than max or min l = ScalarEncoder(name='scalar', n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertTrue(decoded.value <= 141) self.assertTrue(decoded.value >= 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small number # non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test setting fieldStats after initialization if False: #TODO: remove all this? (and fieldstats from ScalarEncoder (if applicable) )? # Modified on 11/20/12 12:53 PM - setFieldStats not applicable for ScalarEncoder l = ScalarEncoder(n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) l = ScalarEncoder(name='scalar', n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) l = ScalarEncoder(name='scalar', n=14, w=5, minval=100, maxval=1000, periodic=False, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":10}}) print "\nTesting non-periodic encoding using setFieldStats, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # ============================================================================ def testEncodeInvalidInputType(self): encoder = ScalarEncoder(name='enc', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) with self.assertRaises(TypeError): encoder.encode("String") # ============================================================================ def testGetBucketInfoIntResolution(self): """Ensures that passing resolution as an int doesn't truncate values.""" encoder = ScalarEncoder(w=3, resolution=1, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(4.5, encoder.topDownCompute(encoder.encode(4.5))[0].scalar) def testReadWrite(self): """Test ScalarEncoder Cap'n Proto serialization implementation.""" originalValue = self._l.encode(1) proto1 = ScalarEncoderProto.new_message() self._l.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = ScalarEncoderProto.read(f) encoder = ScalarEncoder.read(proto2) self.assertIsInstance(encoder, ScalarEncoder) self.assertEqual(encoder.w, self._l.w) self.assertEqual(encoder.minval, self._l.minval) self.assertEqual(encoder.maxval, self._l.maxval) self.assertEqual(encoder.periodic, self._l.periodic) self.assertEqual(encoder.n, self._l.n) self.assertEqual(encoder.radius, self._l.radius) self.assertEqual(encoder.resolution, self._l.resolution) self.assertEqual(encoder.name, self._l.name) self.assertEqual(encoder.verbosity, self._l.verbosity) self.assertEqual(encoder.clipInput, self._l.clipInput) self.assertTrue(numpy.array_equal(encoder.encode(1), originalValue)) self.assertEqual(self._l.decode(encoder.encode(1)), encoder.decode(self._l.encode(1))) # Feed in a new value and ensure the encodings match result1 = self._l.encode(7) result2 = encoder.encode(7) self.assertTrue(numpy.array_equal(result1, result2))
class ScalarEncoderTest(unittest.TestCase): """Unit tests for ScalarEncoder class""" def setUp(self): # use of forced is not recommended, but used here for readability, see # scalar.py self._l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(empty.sum(), 0) def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) self.assertEqual(empty.sum(), 0) def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("[1:8]", 0)]) l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("scalar", 0)]) self.assertTrue( numpy.array_equal( l.encode(3), numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal(l.encode(3.1), l.encode(3))) self.assertTrue( numpy.array_equal( l.encode(3.5), numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal(l.encode(3.6), l.encode(3.5))) self.assertTrue(numpy.array_equal(l.encode(3.7), l.encode(3.5))) self.assertTrue( numpy.array_equal( l.encode(4), numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(1), numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(1.5), numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(7), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(7.5), numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype))) self.assertEqual(l.resolution, 0.5) self.assertEqual(l.radius, 1.5) def testCreateResolution(self): """Test that we get the same encoder when we construct it using resolution instead of n """ l = self._l d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=0.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) def testDecodeAndResolution(self): """Test the input description generation, top-down compute, and bucket support on a periodic encoder """ l = self._l v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) self.assertEqual(len(fieldNames), 1) self.assertEqual(fieldNames, list(fieldsDict.keys())) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) # Test with a "hole" decoded = l.decode( numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 7.5])) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 8])) self.assertTrue(numpy.array_equal(ranges[1], [1, 1])) # Test with something wider than w, no hole decoded = l.decode( numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 2.5])) # Test with 2 ranges decoded = l.decode( numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode( numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertTrue(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) def testCloseness(self): """Test closenessScores for a periodic encoder""" encoder = ScalarEncoder(w=7, minval=0, maxval=7, radius=1, periodic=True, name="day of week", forced=True) scores = encoder.closenessScores((2, 4, 7), (4, 2, 1), fractional=False) for actual, score in zip((2, 2, 1), scores): self.assertEqual(actual, score) def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name="scalar", n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertTrue( numpy.array_equal( l.encode(1), numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(2), numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(10), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype))) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a # non-periodic encoder v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) decoded = l.decode( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) #Test min and max l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and #there is no value greater than max or min l = ScalarEncoder(name="scalar", n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i + 3): iterlist[j] = 1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertLessEqual(decoded.value, 141) self.assertGreaterEqual(decoded.value, 1) self.assertTrue(decoded.value < 141 or i == 137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small # number non-periodic encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic # encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 def testEncodeInvalidInputType(self): encoder = ScalarEncoder(name="enc", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) with self.assertRaises(TypeError): encoder.encode("String") def testGetBucketInfoIntResolution(self): """Ensures that passing resolution as an int doesn't truncate values.""" encoder = ScalarEncoder(w=3, resolution=1, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(4.5, encoder.topDownCompute(encoder.encode(4.5))[0].scalar) @unittest.skipUnless( capnp, "pycapnp is not installed, skipping serialization test.") def testReadWrite(self): """Test ScalarEncoder Cap'n Proto serialization implementation.""" originalValue = self._l.encode(1) proto1 = ScalarEncoderProto.new_message() self._l.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = ScalarEncoderProto.read(f) encoder = ScalarEncoder.read(proto2) self.assertIsInstance(encoder, ScalarEncoder) self.assertEqual(encoder.w, self._l.w) self.assertEqual(encoder.minval, self._l.minval) self.assertEqual(encoder.maxval, self._l.maxval) self.assertEqual(encoder.periodic, self._l.periodic) self.assertEqual(encoder.n, self._l.n) self.assertEqual(encoder.radius, self._l.radius) self.assertEqual(encoder.resolution, self._l.resolution) self.assertEqual(encoder.name, self._l.name) self.assertEqual(encoder.verbosity, self._l.verbosity) self.assertEqual(encoder.clipInput, self._l.clipInput) self.assertTrue(numpy.array_equal(encoder.encode(1), originalValue)) self.assertEqual(self._l.decode(encoder.encode(1)), encoder.decode(self._l.encode(1))) # Feed in a new value and ensure the encodings match result1 = self._l.encode(7) result2 = encoder.encode(7) self.assertTrue(numpy.array_equal(result1, result2)) def testSettingNWithMaxvalMinvalNone(self): """Setting n when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name="scalar", n=14, radius=0, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder) def testSettingScalarAndResolution(self): """Setting both scalar and resolution not allowed.""" with self.assertRaises(ValueError): ScalarEncoder(3, None, None, name="scalar", n=0, radius=None, resolution=0.5, forced=True) def testSettingRadiusWithMaxvalMinvalNone(self): """If radius when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name="scalar", n=0, radius=1.5, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder)
def testDecodeAndResolution(self): """Testing periodic encoder decoding, resolution of """ l = self._l print l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) print "\nTesting periodic encoder decoding, resolution of %f..." % \ l.resolution # Test with a "hole" decoded = l.decode(numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [7.5, 7.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [7.5, 8]) \ and numpy.array_equal(ranges[1], [1, 1])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, no hole decoded = l.decode(numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [1.5, 2.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges decoded = l.decode(numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode(numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded)
class CategoryEncoder(Encoder): """Encodes a list of discrete categories (described by strings), that aren't related to each other, so we never emit a mixture of categories. The value of zero is reserved for "unknown category" Internally we use a ScalarEncoder with a radius of 1, but since we only encode integers, we never get mixture outputs. The SDRCategoryEncoder uses a different method to encode categories""" def __init__(self, w, categoryList, name="category", verbosity=0, forced=False): """params: forced (default False) : if True, skip checks for parameters' settings; see encoders/scalar.py for details """ self.encoders = None self.verbosity = verbosity # number of categories includes "unknown" self.ncategories = len(categoryList) + 1 self.categoryToIndex = dict() # check_later: what is the purpose of categoryToIndex and indexToCategory? self.indexToCategory = dict() self.indexToCategory[0] = UNKNOWN for i in xrange(len(categoryList)): self.categoryToIndex[categoryList[i]] = i+1 self.indexToCategory[i+1] = categoryList[i] self.encoder = ScalarEncoder(w, minval=0, maxval=self.ncategories - 1, radius=1, periodic=False, forced=forced) self.width = w * self.ncategories assert self.encoder.getWidth() == self.width self.description = [(name, 0)] self.name = name # These are used to support the topDownCompute method self._topDownMappingM = None # This gets filled in by getBucketValues self._bucketValues = None ############################################################################ def getDecoderOutputFieldTypes(self): """ [Encoder class virtual method override] """ # TODO: change back to string meta-type after the decoding logic is fixed # to output strings instead of internal index values. #return (FieldMetaType.string,) return (FieldMetaType.integer,) ############################################################################ def getWidth(self): return self.width ############################################################################ def getDescription(self): return self.description ############################################################################ def getScalars(self, input): """ See method description in base.py """ if input == SENTINEL_VALUE_FOR_MISSING_DATA: return numpy.array([None]) else: return numpy.array([self.categoryToIndex.get(input, 0)]) # to_note: returns the scalar value of the input, as stored in the categoryToIndex # Will return in the format of a numpy array (e.g. [1] or [2]), return [0] if the # input does not match with any of the key in categoryToIndex dictionary ############################################################################ def getBucketIndices(self, input): """ See method description in base.py """ # Get the bucket index from the underlying scalar encoder if input == SENTINEL_VALUE_FOR_MISSING_DATA: return [None] else: return self.encoder.getBucketIndices(self.categoryToIndex.get(input, 0)) # to_note: get the first ON bit from the ScalarEncoder for a given input. # Unknown value will have the first ON bit at position 1, then other values at k*w # Value NONE will have all 0s # problem_with_this_approach: this approach might be fast, but treating # category encoding as rigid scalar encoding might make it hard for learning ############################################################################ def encodeIntoArray(self, input, output): # if not found, we encode category 0 if input == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:] = 0 val = "<missing>" else: val = self.categoryToIndex.get(input, 0) self.encoder.encodeIntoArray(val, output) if self.verbosity >= 2: print "input:", input, "va:", val, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) ############################################################################ def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert(len(fieldsDict) == 1) # Get the list of categories the scalar values correspond to and # generate the description from the category name(s). (inRanges, inDesc) = fieldsDict.values()[0] # to_note: dict.values() returns values in [list] form, that's why we need [0] outRanges = [] desc = "" for (minV, maxV) in inRanges: minV = int(round(minV)) maxV = int(round(maxV)) outRanges.append((minV, maxV)) while minV <= maxV: if len(desc) > 0: desc += ", " desc += self.indexToCategory[minV] minV += 1 """ ## Test with noisy encoding (very likely if such encoding comes from output of the predicting process) catfish = numpy.zeros(20, 'int') # catfish[5:10] = 1 # catfish[15:20] = 1 catfish[5:10] = 1 catfish[8] = 1 catfish[11] = 1 catfish[14] = 1 catfish[17] = 1 print "Cat fish =", catfish; print ## Note: this kind of encoding is highly unstable, for a little noisy output like this [0 0 0 0 0 1 1 1 1 1 0 1 0 0 1 0 0 1 0 0], ## it should safely generate 'cat'. However, it generates 'cat', 'dog', 'fish'. To improve this encoding/decoding scheme, ## we might need to replace the filling in process, even might need to think about other way to process information. ## problem_with_this_approach """ # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) ############################################################################ def closenessScores(self, expValues, actValues, fractional=True,): """ See the function description in base.py kwargs will have the keyword "fractional", which is ignored by this encoder """ expValue = expValues[0] actValue = actValues[0] if expValue == actValue: closeness = 1.0 else: closeness = 0.0 if not fractional: closeness = 1.0 - closeness return numpy.array([closeness]) ############################################################################ def getBucketValues(self): """ See the function description in base.py """ if self._bucketValues is None: numBuckets = len(self.encoder.getBucketValues()) self._bucketValues = [] for bucketIndex in range(numBuckets): self._bucketValues.append(self.getBucketInfo([bucketIndex])[0].value) # to_note: list of category corresponding to bucket indices # each bucket is a number that is spaced (radius/w) each other return self._bucketValues ############################################################################ def getBucketInfo(self, buckets): """ See the function description in base.py """ # For the category encoder, the bucket index is the category index bucketInfo = self.encoder.getBucketInfo(buckets)[0] categoryIndex = int(round(bucketInfo.value)) category = self.indexToCategory[categoryIndex] # to_note: map the bucket index to category return [EncoderResult(value=category, scalar=categoryIndex, encoding=bucketInfo.encoding)] ############################################################################ def topDownCompute(self, encoded): """ See the function description in base.py """ encoderResult = self.encoder.topDownCompute(encoded)[0] # to_note: return EncoderResult, which includes the value (depend on ScalarEncoder) value = encoderResult.value categoryIndex = int(round(value)) category = self.indexToCategory[categoryIndex] return EncoderResult(value=category, scalar=categoryIndex, encoding=encoderResult.encoding) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.width = proto.width encoder.description = [(proto.name, 0)] encoder.name = proto.name encoder.indexToCategory = {x.index: x.category for x in proto.indexToCategory} encoder.categoryToIndex = {category: index for index, category in encoder.indexToCategory.items() if category != UNKNOWN} encoder._topDownMappingM = None encoder._bucketValues = None return encoder def write(self, proto): proto.width = self.width proto.indexToCategory = [ {"index": index, "category": category} for index, category in self.indexToCategory.items() ] proto.name = self.name proto.verbosity = self.verbosity self.encoder.write(proto.encoder)
def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name="scalar", n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertTrue( numpy.array_equal( l.encode(1), numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(2), numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(10), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype))) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a # non-periodic encoder v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) decoded = l.decode( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) #Test min and max l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and #there is no value greater than max or min l = ScalarEncoder(name="scalar", n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i + 3): iterlist[j] = 1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertLessEqual(decoded.value, 141) self.assertGreaterEqual(decoded.value, 1) self.assertTrue(decoded.value < 141 or i == 137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small # number non-periodic encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic # encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4
class ScalarEncoderTest(unittest.TestCase): """Unit tests for ScalarEncoder class""" def setUp(self): # use of forced is not recommended, but used here for readability, see scalar.py self._l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) ############################################################################ def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0) # -------------------------------------------------------------------- def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0) # ------------------------------------------------------------------------ def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("[1:8]", 0)]) l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("scalar", 0)]) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) self.assertEqual(l.resolution, 0.5) self.assertEqual(l.radius, 1.5) # Test that we get the same encoder when we construct it using resolution # instead of n def testCreateResolution(self): """Test that we get the same encoder when we construct it using resolution instead of n""" l = self._l d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=0.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation, top-down compute, and bucket # support on a periodic encoder def testDecodeAndResolution(self): """Testing periodic encoder decoding, resolution of """ l = self._l print l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) print "\nTesting periodic encoder decoding, resolution of %f..." % \ l.resolution # Test with a "hole" decoded = l.decode(numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [7.5, 7.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [7.5, 8]) \ and numpy.array_equal(ranges[1], [1, 1])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, no hole decoded = l.decode(numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [1.5, 2.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges decoded = l.decode(numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode(numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # ============================================================================ def testCloseness(self): """Test closenessScores for a periodic encoder""" encoder = ScalarEncoder(w=7, minval=0, maxval=7, radius=1, periodic=True, name="day of week", forced=True) scores = encoder.closenessScores((2, 4, 7), (4, 2, 1), fractional=False) for actual, score in itertools.izip((2, 2, 1), scores): self.assertEqual(actual, score) # ============================================================================ def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name='scalar', n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) print "\nTesting non-periodic encoder encoding, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a non-periodic # encoder v = l.minval print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) #Test min and max l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and there is no value greater than max or min l = ScalarEncoder(name='scalar', n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertTrue(decoded.value <= 141) self.assertTrue(decoded.value >= 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small number # non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test setting fieldStats after initialization if False: #TODO: remove all this? (and fieldstats from ScalarEncoder (if applicable) )? # Modified on 11/20/12 12:53 PM - setFieldStats not applicable for ScalarEncoder l = ScalarEncoder(n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) l = ScalarEncoder(name='scalar', n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) l = ScalarEncoder(name='scalar', n=14, w=5, minval=100, maxval=1000, periodic=False, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":10}}) print "\nTesting non-periodic encoding using setFieldStats, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # ============================================================================ def testEncodeInvalidInputType(self): encoder = ScalarEncoder(name='enc', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) with self.assertRaises(TypeError): encoder.encode("String") # ============================================================================ def testGetBucketInfoIntResolution(self): """Ensures that passing resolution as an int doesn't truncate values.""" encoder = ScalarEncoder(w=3, resolution=1, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(4.5, encoder.topDownCompute(encoder.encode(4.5))[0].scalar) def testReadWrite(self): """Test ScalarEncoder Cap'n Proto serialization implementation.""" originalValue = self._l.encode(1) proto1 = ScalarEncoderProto.new_message() self._l.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = ScalarEncoderProto.read(f) encoder = ScalarEncoder.read(proto2) self.assertIsInstance(encoder, ScalarEncoder) self.assertEqual(encoder.w, self._l.w) self.assertEqual(encoder.minval, self._l.minval) self.assertEqual(encoder.maxval, self._l.maxval) self.assertEqual(encoder.periodic, self._l.periodic) self.assertEqual(encoder.n, self._l.n) self.assertEqual(encoder.radius, self._l.radius) self.assertEqual(encoder.resolution, self._l.resolution) self.assertEqual(encoder.name, self._l.name) self.assertEqual(encoder.verbosity, self._l.verbosity) self.assertEqual(encoder.clipInput, self._l.clipInput) self.assertTrue(numpy.array_equal(encoder.encode(1), originalValue)) self.assertEqual(self._l.decode(encoder.encode(1)), encoder.decode(self._l.encode(1))) # Feed in a new value and ensure the encodings match result1 = self._l.encode(7) result2 = encoder.encode(7) self.assertTrue(numpy.array_equal(result1, result2)) # ============================================================================ # Tests for #1966 def testSettingNWithMaxvalMinvalNone(self): """Setting n when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name='scalar', n=14, radius=0, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder) def testSettingScalarAndResolution(self): """Setting both scalar and resolution not allowed.""" with self.assertRaises(ValueError): encoder = ScalarEncoder(3, None, None, name='scalar', n=0, radius=None, resolution=0.5, forced=True) def testSettingRadiusWithMaxvalMinvalNone(self): """If radius when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name='scalar', n=0, radius=1.5, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder)
class LogEncoder(Encoder): """ This class wraps the ScalarEncoder class. A Log encoder represents a floating point value on a logarithmic scale. valueToEncode = log10(input) w -- number of bits to set in output minval -- minimum input value. must be greater than 0. Lower values are reset to this value maxval -- maximum input value (input is strictly less if periodic == True) periodic -- If true, then the input value "wraps around" such that minval = maxval For a periodic value, the input must be strictly less than maxval, otherwise maxval is a true upper bound. Exactly one of n, radius, resolution must be set. "0" is a special value that means "not set". n -- number of bits in the representation (must be > w) radius -- inputs separated by more than this distance in log space will have non-overlapping representations resolution -- The minimum change in scaled value needed to produce a change in encoding. This should be specified in log space. For example, the scaled values 10 and 11 will be distinguishable in the output. In terms of the original input values, this means 10^1 (1) and 10^1.1 (1.25) will be distinguishable. name -- an optional string which will become part of the description verbosity -- level of debugging output you want the encoder to provide. clipInput -- if true, non-periodic inputs smaller than minval or greater than maxval will be clipped to minval/maxval forced -- (default False), if True, skip some safety checks """ def __init__(self, w=5, minval=1e-07, maxval=10000, periodic=False, n=0, radius=0, resolution=0, name="log", verbosity=0, clipInput=True, forced=False): # Lower bound for log encoding near machine precision limit lowLimit = 1e-07 # Limit minval as log10(0) is undefined. if minval < lowLimit: minval = lowLimit # Check that minval is still lower than maxval if not minval < maxval: raise ValueError( "Max val must be larger than min val or the lower limit " "for this encoder %.7f" % lowLimit) self.encoders = None self.verbosity = verbosity # Scale values for calculations within the class self.minScaledValue = math.log10(minval) self.maxScaledValue = math.log10(maxval) if not self.maxScaledValue > self.minScaledValue: raise ValueError( "Max val must be larger, in log space, than min val.") self.clipInput = clipInput self.minval = minval self.maxval = maxval self.encoder = ScalarEncoder(w=w, minval=self.minScaledValue, maxval=self.maxScaledValue, periodic=False, n=n, radius=radius, resolution=resolution, verbosity=self.verbosity, clipInput=self.clipInput, forced=forced) self.width = self.encoder.getWidth() self.description = [(name, 0)] self.name = name # This list is created by getBucketValues() the first time it is called, # and re-created whenever our buckets would be re-arranged. self._bucketValues = None ############################################################################ def getWidth(self): return self.width ############################################################################ def getDescription(self): return self.description ############################################################################ def getDecoderOutputFieldTypes(self): """ Encoder class virtual method override """ return (FieldMetaType.float, ) ############################################################################ def _getScaledValue(self, inpt): """ Convert the input, which is in normal space, into log space """ if inpt == SENTINEL_VALUE_FOR_MISSING_DATA: return None else: val = inpt if val < self.minval: val = self.minval elif val > self.maxval: val = self.maxval scaledVal = math.log10(val) return scaledVal ############################################################################ def getBucketIndices(self, inpt): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: return [None] else: return self.encoder.getBucketIndices(scaledVal) ############################################################################ def encodeIntoArray(self, inpt, output): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: output[0:] = 0 else: self.encoder.encodeIntoArray(scaledVal, output) if self.verbosity >= 2: print "input:", inpt, "scaledVal:", scaledVal, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) ############################################################################ def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert (len(fieldsDict) == 1) # Convert each range into normal space (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] for (minV, maxV) in inRanges: outRanges.append((math.pow(10, minV), math.pow(10, maxV))) # Generate a text description of the ranges desc = "" numRanges = len(outRanges) for i in xrange(numRanges): if outRanges[i][0] != outRanges[i][1]: desc += "%.2f-%.2f" % (outRanges[i][0], outRanges[i][1]) else: desc += "%.2f" % (outRanges[i][0]) if i < numRanges - 1: desc += ", " # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) ############################################################################ def getBucketValues(self): """ See the function description in base.py """ # Need to re-create? if self._bucketValues is None: scaledValues = self.encoder.getBucketValues() self._bucketValues = [] for scaledValue in scaledValues: value = math.pow(10, scaledValue) self._bucketValues.append(value) return self._bucketValues ############################################################################ def getBucketInfo(self, buckets): """ See the function description in base.py """ scaledResult = self.encoder.getBucketInfo(buckets)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return [ EncoderResult(value=value, scalar=value, encoding=scaledResult.encoding) ] ############################################################################ def topDownCompute(self, encoded): """ See the function description in base.py """ scaledResult = self.encoder.topDownCompute(encoded)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return EncoderResult(value=value, scalar=value, encoding=scaledResult.encoding) ############################################################################ def closenessScores(self, expValues, actValues, fractional=True): """ See the function description in base.py """ # Compute the percent error in log space if expValues[0] > 0: expValue = math.log10(expValues[0]) else: expValue = self.minScaledValue if actValues[0] > 0: actValue = math.log10(actValues[0]) else: actValue = self.minScaledValue if fractional: err = abs(expValue - actValue) pctErr = err / (self.maxScaledValue - self.minScaledValue) pctErr = min(1.0, pctErr) closeness = 1.0 - pctErr else: err = abs(expValue - actValue) closeness = err #print "log::", "expValue:", expValues[0], "actValue:", actValues[0], \ # "closeness", closeness #import pdb; pdb.set_trace() return numpy.array([closeness]) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.minScaledValue = proto.minScaledValue encoder.maxScaledValue = proto.maxScaledValue encoder.clipInput = proto.clipInput encoder.minval = proto.minval encoder.maxval = proto.maxval encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.name = proto.name encoder.width = encoder.encoder.getWidth() encoder.description = [(encoder.name, 0)] encoder._bucketValues = None return encoder def write(self, proto): proto.verbosity = self.verbosity proto.minScaledValue = self.minScaledValue proto.maxScaledValue = self.maxScaledValue proto.clipInput = self.clipInput proto.minval = self.minval proto.maxval = self.maxval self.encoder.write(proto.encoder) proto.name = self.name
class CategoryEncoder(Encoder): """Encodes a list of discrete categories (described by strings), that aren't related to each other, so we never emit a mixture of categories. The value of zero is reserved for "unknown category" Internally we use a ScalarEncoder with a radius of 1, but since we only encode integers, we never get mixture outputs. The SDRCategoryEncoder uses a different method to encode categories""" def __init__(self, w, categoryList, name="category", verbosity=0, forced=False): """params: forced (default False) : if True, skip checks for parameters' settings; see encoders/scalar.py for details """ self.encoders = None self.verbosity = verbosity # number of categories includes "unknown" self.ncategories = len(categoryList) + 1 self.categoryToIndex = dict() self.indexToCategory = dict() self.indexToCategory[0] = UNKNOWN for i in xrange(len(categoryList)): self.categoryToIndex[categoryList[i]] = i + 1 self.indexToCategory[i + 1] = categoryList[i] self.encoder = ScalarEncoder(w, minval=0, maxval=self.ncategories - 1, radius=1, periodic=False, forced=forced) self.width = w * self.ncategories assert self.encoder.getWidth() == self.width self.description = [(name, 0)] self.name = name # These are used to support the topDownCompute method self._topDownMappingM = None # This gets filled in by getBucketValues self._bucketValues = None def getDecoderOutputFieldTypes(self): """ [Encoder class virtual method override] """ # TODO: change back to string meta-type after the decoding logic is fixed # to output strings instead of internal index values. #return (FieldMetaType.string,) return (FieldMetaType.integer, ) def getWidth(self): return self.width def getDescription(self): return self.description def getScalars(self, input): """ See method description in base.py """ if input == SENTINEL_VALUE_FOR_MISSING_DATA: return numpy.array([None]) else: return numpy.array([self.categoryToIndex.get(input, 0)]) def getBucketIndices(self, input): """ See method description in base.py """ # Get the bucket index from the underlying scalar encoder if input == SENTINEL_VALUE_FOR_MISSING_DATA: return [None] else: return self.encoder.getBucketIndices( self.categoryToIndex.get(input, 0)) def encodeIntoArray(self, input, output): # if not found, we encode category 0 if input == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:] = 0 val = "<missing>" else: val = self.categoryToIndex.get(input, 0) self.encoder.encodeIntoArray(val, output) if self.verbosity >= 2: print "input:", input, "va:", val, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert (len(fieldsDict) == 1) # Get the list of categories the scalar values correspond to and # generate the description from the category name(s). (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] desc = "" for (minV, maxV) in inRanges: minV = int(round(minV)) maxV = int(round(maxV)) outRanges.append((minV, maxV)) while minV <= maxV: if len(desc) > 0: desc += ", " desc += self.indexToCategory[minV] minV += 1 # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) def closenessScores( self, expValues, actValues, fractional=True, ): """ See the function description in base.py kwargs will have the keyword "fractional", which is ignored by this encoder """ expValue = expValues[0] actValue = actValues[0] if expValue == actValue: closeness = 1.0 else: closeness = 0.0 if not fractional: closeness = 1.0 - closeness return numpy.array([closeness]) def getBucketValues(self): """ See the function description in base.py """ if self._bucketValues is None: numBuckets = len(self.encoder.getBucketValues()) self._bucketValues = [] for bucketIndex in range(numBuckets): self._bucketValues.append( self.getBucketInfo([bucketIndex])[0].value) return self._bucketValues def getBucketInfo(self, buckets): """ See the function description in base.py """ # For the category encoder, the bucket index is the category index bucketInfo = self.encoder.getBucketInfo(buckets)[0] categoryIndex = int(round(bucketInfo.value)) category = self.indexToCategory[categoryIndex] return [ EncoderResult(value=category, scalar=categoryIndex, encoding=bucketInfo.encoding) ] def topDownCompute(self, encoded): """ See the function description in base.py """ encoderResult = self.encoder.topDownCompute(encoded)[0] value = encoderResult.value categoryIndex = int(round(value)) category = self.indexToCategory[categoryIndex] return EncoderResult(value=category, scalar=categoryIndex, encoding=encoderResult.encoding) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.width = proto.width encoder.description = [(proto.name, 0)] encoder.name = proto.name encoder.indexToCategory = { x.index: x.category for x in proto.indexToCategory } encoder.categoryToIndex = { category: index for index, category in encoder.indexToCategory.items() if category != UNKNOWN } encoder._topDownMappingM = None encoder._bucketValues = None return encoder def write(self, proto): proto.width = self.width proto.indexToCategory = [{ "index": index, "category": category } for index, category in self.indexToCategory.items()] proto.name = self.name proto.verbosity = self.verbosity self.encoder.write(proto.encoder)
class CategoryEncoder(Encoder): """ Encodes a list of discrete categories (described by strings), that aren't related to each other, so we never emit a mixture of categories. The value of zero is reserved for "unknown category" Internally we use a :class:`.ScalarEncoder` with a radius of 1, but since we only encode integers, we never get mixture outputs. The :class:`.SDRCategoryEncoder` uses a different method to encode categories. :param categoryList: list of discrete string categories :param forced: if True, skip checks for parameters' settings; see :class:`.ScalarEncoder` for details. (default False) """ def __init__(self, w, categoryList, name="category", verbosity=0, forced=False): self.encoders = None self.verbosity = verbosity # number of categories includes "unknown" self.ncategories = len(categoryList) + 1 self.categoryToIndex = dict() self.indexToCategory = dict() self.indexToCategory[0] = UNKNOWN for i in xrange(len(categoryList)): self.categoryToIndex[categoryList[i]] = i+1 self.indexToCategory[i+1] = categoryList[i] self.encoder = ScalarEncoder(w, minval=0, maxval=self.ncategories - 1, radius=1, periodic=False, forced=forced) self.width = w * self.ncategories assert self.encoder.getWidth() == self.width self.description = [(name, 0)] self.name = name # These are used to support the topDownCompute method self._topDownMappingM = None # This gets filled in by getBucketValues self._bucketValues = None def getDecoderOutputFieldTypes(self): """ [Encoder class virtual method override] """ # TODO: change back to string meta-type after the decoding logic is fixed # to output strings instead of internal index values. #return (FieldMetaType.string,) return (FieldMetaType.integer,) def getWidth(self): return self.width def getDescription(self): return self.description def getScalars(self, input): """ See method description in base.py """ if input == SENTINEL_VALUE_FOR_MISSING_DATA: return numpy.array([None]) else: return numpy.array([self.categoryToIndex.get(input, 0)]) def getBucketIndices(self, input): """ See method description in base.py """ # Get the bucket index from the underlying scalar encoder if input == SENTINEL_VALUE_FOR_MISSING_DATA: return [None] else: return self.encoder.getBucketIndices(self.categoryToIndex.get(input, 0)) def encodeIntoArray(self, input, output): # if not found, we encode category 0 if input == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:] = 0 val = "<missing>" else: val = self.categoryToIndex.get(input, 0) self.encoder.encodeIntoArray(val, output) if self.verbosity >= 2: print "input:", input, "va:", val, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert(len(fieldsDict) == 1) # Get the list of categories the scalar values correspond to and # generate the description from the category name(s). (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] desc = "" for (minV, maxV) in inRanges: minV = int(round(minV)) maxV = int(round(maxV)) outRanges.append((minV, maxV)) while minV <= maxV: if len(desc) > 0: desc += ", " desc += self.indexToCategory[minV] minV += 1 # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) def closenessScores(self, expValues, actValues, fractional=True,): """ See the function description in base.py kwargs will have the keyword "fractional", which is ignored by this encoder """ expValue = expValues[0] actValue = actValues[0] if expValue == actValue: closeness = 1.0 else: closeness = 0.0 if not fractional: closeness = 1.0 - closeness return numpy.array([closeness]) def getBucketValues(self): """ See the function description in base.py """ if self._bucketValues is None: numBuckets = len(self.encoder.getBucketValues()) self._bucketValues = [] for bucketIndex in range(numBuckets): self._bucketValues.append(self.getBucketInfo([bucketIndex])[0].value) return self._bucketValues def getBucketInfo(self, buckets): """ See the function description in base.py """ # For the category encoder, the bucket index is the category index bucketInfo = self.encoder.getBucketInfo(buckets)[0] categoryIndex = int(round(bucketInfo.value)) category = self.indexToCategory[categoryIndex] return [EncoderResult(value=category, scalar=categoryIndex, encoding=bucketInfo.encoding)] def topDownCompute(self, encoded): """ See the function description in base.py """ encoderResult = self.encoder.topDownCompute(encoded)[0] value = encoderResult.value categoryIndex = int(round(value)) category = self.indexToCategory[categoryIndex] return EncoderResult(value=category, scalar=categoryIndex, encoding=encoderResult.encoding) @classmethod def getSchema(cls): return CategoryEncoderProto @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.width = proto.width encoder.description = [(proto.name, 0)] encoder.name = proto.name encoder.indexToCategory = {x.index: x.category for x in proto.indexToCategory} encoder.categoryToIndex = {category: index for index, category in encoder.indexToCategory.items() if category != UNKNOWN} encoder._topDownMappingM = None encoder.ncategories = len(proto.indexToCategory) encoder._bucketValues = None encoder.encoders = None return encoder def write(self, proto): proto.width = self.width proto.indexToCategory = [ {"index": index, "category": category} for index, category in self.indexToCategory.items() ] proto.name = self.name proto.verbosity = self.verbosity self.encoder.write(proto.encoder)
class LogEncoder(Encoder): """ This class wraps the ScalarEncoder class. A Log encoder represents a floating point value on a logarithmic scale. valueToEncode = log10(input) w -- number of bits to set in output minval -- minimum input value. must be greater than 0. Lower values are reset to this value maxval -- maximum input value (input is strictly less if periodic == True) periodic -- If true, then the input value "wraps around" such that minval = maxval For a periodic value, the input must be strictly less than maxval, otherwise maxval is a true upper bound. Exactly one of n, radius, resolution must be set. "0" is a special value that means "not set". n -- number of bits in the representation (must be > w) radius -- inputs separated by more than this distance in log space will have non-overlapping representations resolution -- The minimum change in scaled value needed to produce a change in encoding. This should be specified in log space. For example, the scaled values 10 and 11 will be distinguishable in the output. In terms of the original input values, this means 10^1 (1) and 10^1.1 (1.25) will be distinguishable. name -- an optional string which will become part of the description verbosity -- level of debugging output you want the encoder to provide. clipInput -- if true, non-periodic inputs smaller than minval or greater than maxval will be clipped to minval/maxval forced -- (default False), if True, skip some safety checks """ def __init__( self, w=5, minval=1e-07, maxval=10000, periodic=False, n=0, radius=0, resolution=0, name="log", verbosity=0, clipInput=True, forced=False, ): # Lower bound for log encoding near machine precision limit lowLimit = 1e-07 # Limit minval as log10(0) is undefined. if minval < lowLimit: minval = lowLimit # Check that minval is still lower than maxval if not minval < maxval: raise ValueError( "Max val must be larger than min val or the lower limit " "for this encoder %.7f" % lowLimit ) self.encoders = None self.verbosity = verbosity # Scale values for calculations within the class self.minScaledValue = math.log10(minval) self.maxScaledValue = math.log10(maxval) if not self.maxScaledValue > self.minScaledValue: raise ValueError("Max val must be larger, in log space, than min val.") self.clipInput = clipInput self.minval = minval self.maxval = maxval self.encoder = ScalarEncoder( w=w, minval=self.minScaledValue, maxval=self.maxScaledValue, periodic=False, n=n, radius=radius, resolution=resolution, verbosity=self.verbosity, clipInput=self.clipInput, forced=forced, ) self.width = self.encoder.getWidth() self.description = [(name, 0)] self.name = name # This list is created by getBucketValues() the first time it is called, # and re-created whenever our buckets would be re-arranged. self._bucketValues = None ############################################################################ def getWidth(self): return self.width ############################################################################ def getDescription(self): return self.description ############################################################################ def getDecoderOutputFieldTypes(self): """ Encoder class virtual method override """ return (FieldMetaType.float,) ############################################################################ def _getScaledValue(self, inpt): """ Convert the input, which is in normal space, into log space """ if inpt == SENTINEL_VALUE_FOR_MISSING_DATA: return None else: val = inpt if val < self.minval: val = self.minval elif val > self.maxval: val = self.maxval scaledVal = math.log10(val) return scaledVal ############################################################################ def getBucketIndices(self, inpt): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: return [None] else: return self.encoder.getBucketIndices(scaledVal) ############################################################################ def encodeIntoArray(self, inpt, output): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: output[0:] = 0 else: self.encoder.encodeIntoArray(scaledVal, output) if self.verbosity >= 2: print "input:", inpt, "scaledVal:", scaledVal, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) ############################################################################ def decode(self, encoded, parentFieldName=""): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert len(fieldsDict) == 1 # Convert each range into normal space (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] for (minV, maxV) in inRanges: outRanges.append((math.pow(10, minV), math.pow(10, maxV))) # Generate a text description of the ranges desc = "" numRanges = len(outRanges) for i in xrange(numRanges): if outRanges[i][0] != outRanges[i][1]: desc += "%.2f-%.2f" % (outRanges[i][0], outRanges[i][1]) else: desc += "%.2f" % (outRanges[i][0]) if i < numRanges - 1: desc += ", " # Return result if parentFieldName != "": fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) ############################################################################ def getBucketValues(self): """ See the function description in base.py """ # Need to re-create? if self._bucketValues is None: scaledValues = self.encoder.getBucketValues() self._bucketValues = [] for scaledValue in scaledValues: value = math.pow(10, scaledValue) self._bucketValues.append(value) return self._bucketValues ############################################################################ def getBucketInfo(self, buckets): """ See the function description in base.py """ scaledResult = self.encoder.getBucketInfo(buckets)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return [EncoderResult(value=value, scalar=value, encoding=scaledResult.encoding)] ############################################################################ def topDownCompute(self, encoded): """ See the function description in base.py """ scaledResult = self.encoder.topDownCompute(encoded)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return EncoderResult(value=value, scalar=value, encoding=scaledResult.encoding) ############################################################################ def closenessScores(self, expValues, actValues, fractional=True): """ See the function description in base.py """ # Compute the percent error in log space if expValues[0] > 0: expValue = math.log10(expValues[0]) else: expValue = self.minScaledValue if actValues[0] > 0: actValue = math.log10(actValues[0]) else: actValue = self.minScaledValue if fractional: err = abs(expValue - actValue) pctErr = err / (self.maxScaledValue - self.minScaledValue) pctErr = min(1.0, pctErr) closeness = 1.0 - pctErr else: err = abs(expValue - actValue) closeness = err # print "log::", "expValue:", expValues[0], "actValue:", actValues[0], \ # "closeness", closeness # import pdb; pdb.set_trace() return numpy.array([closeness])
def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name="scalar", n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertTrue(numpy.array_equal( l.encode(1), numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(2), numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(10), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype))) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a # non-periodic encoder v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) #Test min and max l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and #there is no value greater than max or min l = ScalarEncoder(name="scalar", n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertLessEqual(decoded.value, 141) self.assertGreaterEqual(decoded.value, 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small # number non-periodic encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic # encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4
def testDecodeAndResolution(self): """Test the input description generation, top-down compute, and bucket support on a periodic encoder """ l = self._l v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) self.assertEqual(len(fieldNames), 1) self.assertEqual(fieldNames, list(fieldsDict.keys())) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) # Test with a "hole" decoded = l.decode( numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 7.5])) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 8])) self.assertTrue(numpy.array_equal(ranges[1], [1, 1])) # Test with something wider than w, no hole decoded = l.decode( numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 2.5])) # Test with 2 ranges decoded = l.decode( numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode( numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertTrue(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0]))
class ScalarEncoderTest(unittest.TestCase): """Unit tests for ScalarEncoder class""" def setUp(self): # use of forced is not recommended, but used here for readability, see # scalar.py self._l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(empty.sum(), 0) def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) self.assertEqual(empty.sum(), 0) def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("[1:8]", 0)]) l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("scalar", 0)]) self.assertTrue(numpy.array_equal( l.encode(3), numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal(l.encode(3.1), l.encode(3))) self.assertTrue(numpy.array_equal( l.encode(3.5), numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal(l.encode(3.6), l.encode(3.5))) self.assertTrue(numpy.array_equal(l.encode(3.7), l.encode(3.5))) self.assertTrue(numpy.array_equal( l.encode(4), numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(1), numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(1.5), numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(7), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(7.5), numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype))) self.assertEqual(l.resolution, 0.5) self.assertEqual(l.radius, 1.5) def testCreateResolution(self): """Test that we get the same encoder when we construct it using resolution instead of n """ l = self._l d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=0.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) def testDecodeAndResolution(self): """Test the input description generation, top-down compute, and bucket support on a periodic encoder """ l = self._l v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) self.assertEqual(len(fieldNames), 1) self.assertEqual(fieldNames, fieldsDict.keys()) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) # Test with a "hole" decoded = l.decode(numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 7.5])) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 8])) self.assertTrue(numpy.array_equal(ranges[1], [1, 1])) # Test with something wider than w, no hole decoded = l.decode(numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 2.5])) # Test with 2 ranges decoded = l.decode(numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode(numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertTrue(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) def testCloseness(self): """Test closenessScores for a periodic encoder""" encoder = ScalarEncoder(w=7, minval=0, maxval=7, radius=1, periodic=True, name="day of week", forced=True) scores = encoder.closenessScores((2, 4, 7), (4, 2, 1), fractional=False) for actual, score in itertools.izip((2, 2, 1), scores): self.assertEqual(actual, score) def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name="scalar", n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertTrue(numpy.array_equal( l.encode(1), numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(2), numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(10), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype))) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a # non-periodic encoder v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) #Test min and max l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and #there is no value greater than max or min l = ScalarEncoder(name="scalar", n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertLessEqual(decoded.value, 141) self.assertGreaterEqual(decoded.value, 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small # number non-periodic encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic # encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 def testEncodeInvalidInputType(self): encoder = ScalarEncoder(name="enc", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) with self.assertRaises(TypeError): encoder.encode("String") def testGetBucketInfoIntResolution(self): """Ensures that passing resolution as an int doesn't truncate values.""" encoder = ScalarEncoder(w=3, resolution=1, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(4.5, encoder.topDownCompute(encoder.encode(4.5))[0].scalar) @unittest.skipUnless( capnp, "pycapnp is not installed, skipping serialization test.") def testReadWrite(self): """Test ScalarEncoder Cap'n Proto serialization implementation.""" originalValue = self._l.encode(1) proto1 = ScalarEncoderProto.new_message() self._l.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = ScalarEncoderProto.read(f) encoder = ScalarEncoder.read(proto2) self.assertIsInstance(encoder, ScalarEncoder) self.assertEqual(encoder.w, self._l.w) self.assertEqual(encoder.minval, self._l.minval) self.assertEqual(encoder.maxval, self._l.maxval) self.assertEqual(encoder.periodic, self._l.periodic) self.assertEqual(encoder.n, self._l.n) self.assertEqual(encoder.radius, self._l.radius) self.assertEqual(encoder.resolution, self._l.resolution) self.assertEqual(encoder.name, self._l.name) self.assertEqual(encoder.verbosity, self._l.verbosity) self.assertEqual(encoder.clipInput, self._l.clipInput) self.assertTrue(numpy.array_equal(encoder.encode(1), originalValue)) self.assertEqual(self._l.decode(encoder.encode(1)), encoder.decode(self._l.encode(1))) # Feed in a new value and ensure the encodings match result1 = self._l.encode(7) result2 = encoder.encode(7) self.assertTrue(numpy.array_equal(result1, result2)) def testSettingNWithMaxvalMinvalNone(self): """Setting n when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name="scalar", n=14, radius=0, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder) def testSettingScalarAndResolution(self): """Setting both scalar and resolution not allowed.""" with self.assertRaises(ValueError): ScalarEncoder(3, None, None, name="scalar", n=0, radius=None, resolution=0.5, forced=True) def testSettingRadiusWithMaxvalMinvalNone(self): """If radius when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name="scalar", n=0, radius=1.5, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder)
class LogEncoder(Encoder): """ This class wraps the :class:`.ScalarEncoder`. A Log encoder represents a floating point value on a logarithmic scale. .. code-block:: python valueToEncode = log10(input) :param resolution: The minimum change in scaled value needed to produce a change in encoding. This should be specified in log space. For example, the scaled values 10 and 11 will be distinguishable in the output. In terms of the original input values, this means 10^1 (1) and 10^1.1 (1.25) will be distinguishable. :param radius: inputs separated by more than this distance in log space will have non-overlapping representations """ def __init__(self, w=5, minval=1e-07, maxval=10000, periodic=False, n=0, radius=0, resolution=0, name="log", verbosity=0, clipInput=True, forced=False): # Lower bound for log encoding near machine precision limit lowLimit = 1e-07 # Limit minval as log10(0) is undefined. if minval < lowLimit: minval = lowLimit # Check that minval is still lower than maxval if not minval < maxval: raise ValueError("Max val must be larger than min val or the lower limit " "for this encoder %.7f" % lowLimit) self.encoders = None self.verbosity = verbosity # Scale values for calculations within the class self.minScaledValue = math.log10(minval) self.maxScaledValue = math.log10(maxval) if not self.maxScaledValue > self.minScaledValue: raise ValueError("Max val must be larger, in log space, than min val.") self.clipInput = clipInput self.minval = minval self.maxval = maxval self.encoder = ScalarEncoder(w=w, minval=self.minScaledValue, maxval=self.maxScaledValue, periodic=False, n=n, radius=radius, resolution=resolution, verbosity=self.verbosity, clipInput=self.clipInput, forced=forced) self.width = self.encoder.getWidth() self.description = [(name, 0)] self.name = name # This list is created by getBucketValues() the first time it is called, # and re-created whenever our buckets would be re-arranged. self._bucketValues = None def getWidth(self): return self.width def getDescription(self): return self.description def getDecoderOutputFieldTypes(self): """ Encoder class virtual method override """ return (FieldMetaType.float, ) def _getScaledValue(self, inpt): """ Convert the input, which is in normal space, into log space """ if inpt == SENTINEL_VALUE_FOR_MISSING_DATA: return None else: val = inpt if val < self.minval: val = self.minval elif val > self.maxval: val = self.maxval scaledVal = math.log10(val) return scaledVal def getBucketIndices(self, inpt): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: return [None] else: return self.encoder.getBucketIndices(scaledVal) def encodeIntoArray(self, inpt, output): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: output[0:] = 0 else: self.encoder.encodeIntoArray(scaledVal, output) if self.verbosity >= 2: print "input:", inpt, "scaledVal:", scaledVal, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert(len(fieldsDict) == 1) # Convert each range into normal space (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] for (minV, maxV) in inRanges: outRanges.append((math.pow(10, minV), math.pow(10, maxV))) # Generate a text description of the ranges desc = "" numRanges = len(outRanges) for i in xrange(numRanges): if outRanges[i][0] != outRanges[i][1]: desc += "%.2f-%.2f" % (outRanges[i][0], outRanges[i][1]) else: desc += "%.2f" % (outRanges[i][0]) if i < numRanges-1: desc += ", " # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) def getBucketValues(self): """ See the function description in base.py """ # Need to re-create? if self._bucketValues is None: scaledValues = self.encoder.getBucketValues() self._bucketValues = [] for scaledValue in scaledValues: value = math.pow(10, scaledValue) self._bucketValues.append(value) return self._bucketValues def getBucketInfo(self, buckets): """ See the function description in base.py """ scaledResult = self.encoder.getBucketInfo(buckets)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return [EncoderResult(value=value, scalar=value, encoding = scaledResult.encoding)] def topDownCompute(self, encoded): """ See the function description in base.py """ scaledResult = self.encoder.topDownCompute(encoded)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return EncoderResult(value=value, scalar=value, encoding = scaledResult.encoding) def closenessScores(self, expValues, actValues, fractional=True): """ See the function description in base.py """ # Compute the percent error in log space if expValues[0] > 0: expValue = math.log10(expValues[0]) else: expValue = self.minScaledValue if actValues [0] > 0: actValue = math.log10(actValues[0]) else: actValue = self.minScaledValue if fractional: err = abs(expValue - actValue) pctErr = err / (self.maxScaledValue - self.minScaledValue) pctErr = min(1.0, pctErr) closeness = 1.0 - pctErr else: err = abs(expValue - actValue) closeness = err #print "log::", "expValue:", expValues[0], "actValue:", actValues[0], \ # "closeness", closeness #import pdb; pdb.set_trace() return numpy.array([closeness]) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.minScaledValue = proto.minScaledValue encoder.maxScaledValue = proto.maxScaledValue encoder.clipInput = proto.clipInput encoder.minval = proto.minval encoder.maxval = proto.maxval encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.name = proto.name encoder.width = encoder.encoder.getWidth() encoder.description = [(encoder.name, 0)] encoder._bucketValues = None return encoder def write(self, proto): proto.verbosity = self.verbosity proto.minScaledValue = self.minScaledValue proto.maxScaledValue = self.maxScaledValue proto.clipInput = self.clipInput proto.minval = self.minval proto.maxval = self.maxval self.encoder.write(proto.encoder) proto.name = self.name