def testResolution(self): """ Test that numbers within the same resolution return the same encoding. Numbers outside the resolution should return different encodings. """ encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0) # Since 23.0 is the first encoded number, it will be the offset. # Since resolution is 1, 22.9 and 23.4 should have the same bucket index and # encoding. e23 = encoder.encode(23.0) e23p1 = encoder.encode(23.1) e22p9 = encoder.encode(22.9) e24 = encoder.encode(24.0) self.assertEqual(e23.sum(), encoder.w) self.assertEqual((e23 == e23p1).sum(), encoder.getWidth(), "Numbers within resolution don't have the same encoding") self.assertEqual((e23 == e22p9).sum(), encoder.getWidth(), "Numbers within resolution don't have the same encoding") self.assertNotEqual((e23 == e24).sum(), encoder.getWidth(), "Numbers outside resolution have the same encoding") e22p9 = encoder.encode(22.5) self.assertNotEqual((e23 == e22p9).sum(), encoder.getWidth(), "Numbers outside resolution have the same encoding")
def testSeed(self): """ Test that initializing twice with the same seed returns identical encodings and different when not specified """ encoder1 = RandomDistributedScalarEncoder(name="encoder1", resolution=1.0, seed=42) encoder2 = RandomDistributedScalarEncoder(name="encoder2", resolution=1.0, seed=42) encoder3 = RandomDistributedScalarEncoder(name="encoder3", resolution=1.0, seed=-1) encoder4 = RandomDistributedScalarEncoder(name="encoder4", resolution=1.0, seed=-1) e1 = encoder1.encode(23.0) e2 = encoder2.encode(23.0) e3 = encoder3.encode(23.0) e4 = encoder4.encode(23.0) self.assertEqual((e1 == e2).sum(), encoder1.getWidth(), "Same seed gives rise to different encodings") self.assertNotEqual((e1 == e3).sum(), encoder1.getWidth(), "Different seeds gives rise to same encodings") self.assertNotEqual((e3 == e4).sum(), encoder1.getWidth(), "seeds of -1 give rise to same encodings")
def testMapBucketIndexToNonZeroBits(self): """ Test that mapBucketIndexToNonZeroBits works and that max buckets and clipping are handled properly. """ enc = RandomDistributedScalarEncoder(resolution=1.0, w=11, n=150) # Set a low number of max buckets enc._initializeBucketMap(10, None) enc.encode(0.0) enc.encode(-7.0) enc.encode(7.0) self.assertEqual(len(enc.bucketMap), enc._maxBuckets, "_maxBuckets exceeded") self.assertTrue( (enc.mapBucketIndexToNonZeroBits(-1) == enc.bucketMap[0]).all(), "mapBucketIndexToNonZeroBits did not handle negative index") self.assertTrue( (enc.mapBucketIndexToNonZeroBits(1000) == enc.bucketMap[9]).all(), "mapBucketIndexToNonZeroBits did not handle negative index") e23 = enc.encode(23.0) e6 = enc.encode(6) self.assertEqual((e23 == e6).sum(), enc.getWidth(), "Values not clipped correctly during encoding") e_8 = enc.encode(-8) e_7 = enc.encode(-7) self.assertEqual((e_8 == e_7).sum(), enc.getWidth(), "Values not clipped correctly during encoding") self.assertEqual(enc.getBucketIndices(-8)[0], 0, "getBucketIndices returned negative bucket index") self.assertEqual(enc.getBucketIndices(23)[0], enc._maxBuckets-1, "getBucketIndices returned bucket index that is too large")
def testWriteRead(self): original = RandomDistributedScalarEncoder( name="encoder", resolution=1.0, w=23, n=500, offset=0.0) originalValue = original.encode(1) proto1 = RandomDistributedScalarEncoderProto.new_message() original.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = RandomDistributedScalarEncoderProto.read(f) encoder = RandomDistributedScalarEncoder.read(proto2) self.assertIsInstance(encoder, RandomDistributedScalarEncoder) self.assertEqual(encoder.resolution, original.resolution) self.assertEqual(encoder.w, original.w) self.assertEqual(encoder.n, original.n) self.assertEqual(encoder.name, original.name) self.assertEqual(encoder.verbosity, original.verbosity) self.assertEqual(encoder.minIndex, original.minIndex) self.assertEqual(encoder.maxIndex, original.maxIndex) encodedFromOriginal = original.encode(1) encodedFromNew = encoder.encode(1) self.assertTrue(numpy.array_equal(encodedFromNew, originalValue)) self.assertEqual(original.decode(encodedFromNew), encoder.decode(encodedFromOriginal)) self.assertEqual(original.random.getSeed(), encoder.random.getSeed()) for key, value in original.bucketMap.items(): self.assertTrue(numpy.array_equal(value, encoder.bucketMap[key]))
class RDSEEncoder(): def __init__(self, resolution=.5): """Create the encoder instance for our test and return it.""" self.resolution = resolution self.series_encoder = RandomDistributedScalarEncoder( self.resolution, name="RDSE-(res={})".format(self.resolution)) self.encoder = MultiEncoder() self.encoder.addEncoder("series", self.series_encoder) self.last_m_encode = np.zeros(1) def get_encoder(self): return self.encoder def get_resolution(self): return self.resolution def m_encode(self, inputData): self.last_m_encode = self.encoder.encode(inputData) return self.last_m_encode def m_overlap(self, inputData): temp = self.last_m_encode self.last_m_encode = self.encoder.encode(inputData) return numpy.sum(numpy.multiply(self.last_m_encode, temp)) def r_encode(self, inputData): return self.series_encoder.encode(inputData) def r_overlap(self, inputA, inputB): return numpy.sum( numpy.multiply(self.series_encoder.encode(inputA), self.series_encoder.encode(inputB)))
def testMissingValues(self): """ Test that missing values and NaN return all zero's. """ encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0) empty = encoder.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(empty.sum(), 0) empty = encoder.encode(float("nan")) self.assertEqual(empty.sum(), 0)
def testOffset(self): """ Test that offset is working properly """ encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0) encoder.encode(23.0) self.assertEqual(encoder._offset, 23.0, "Offset not specified and not initialized to first input") encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0, offset=25.0) encoder.encode(23.0) self.assertEqual(encoder._offset, 25.0, "Offset not initialized to specified constructor" " parameter")
def testEncoding(self): """ Test basic encoding functionality. Create encodings without crashing and check they contain the correct number of on and off bits. Check some encodings for expected overlap. Test that encodings for old values don't change once we generate new buckets. """ # Initialize with non-default parameters and encode with a number close to # the offset encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0, w=23, n=500, offset=0.0) e0 = encoder.encode(-0.1) self.assertEqual(e0.sum(), 23, "Number of on bits is incorrect") self.assertEqual(e0.size, 500, "Width of the vector is incorrect") self.assertEqual( encoder.getBucketIndices(0.0)[0], encoder._maxBuckets / 2, "Offset doesn't correspond to middle bucket") self.assertEqual(len(encoder.bucketMap), 1, "Number of buckets is not 1") # Encode with a number that is resolution away from offset. Now we should # have two buckets and this encoding should be one bit away from e0 e1 = encoder.encode(1.0) self.assertEqual(len(encoder.bucketMap), 2, "Number of buckets is not 2") self.assertEqual(e1.sum(), 23, "Number of on bits is incorrect") self.assertEqual(e1.size, 500, "Width of the vector is incorrect") self.assertEqual(computeOverlap(e0, e1), 22, "Overlap is not equal to w-1") # Encode with a number that is resolution*w away from offset. Now we should # have many buckets and this encoding should have very little overlap with # e0 e25 = encoder.encode(25.0) self.assertGreater(len(encoder.bucketMap), 23, "Number of buckets is not 2") self.assertEqual(e25.sum(), 23, "Number of on bits is incorrect") self.assertEqual(e25.size, 500, "Width of the vector is incorrect") self.assertLess(computeOverlap(e0, e25), 4, "Overlap is too high") # Test encoding consistency. The encodings for previous numbers # shouldn't change even though we have added additional buckets self.assertTrue( numpy.array_equal(e0, encoder.encode(-0.1)), "Encodings are not consistent - they have changed after new buckets " "have been created") self.assertTrue( numpy.array_equal(e1, encoder.encode(1.0)), "Encodings are not consistent - they have changed after new buckets " "have been created")
def profileEnc(maxValue, nRuns): minV = 0 maxV = nRuns # generate input data data = numpy.random.randint(minV, maxV + 1, nRuns) # instantiate measured encoders encScalar = ScalarEncoder(w=21, minval=minV, maxval=maxV, resolution=1) encRDSE = RDSE(resolution=1) # profile! for d in data: encScalar.encode(d) encRDSE.encode(d) print("Scalar n=", encScalar.n, " RDSE n=", encRDSE.n)
def profileEnc(maxValue, nRuns): minV=0 maxV=nRuns # generate input data data=numpy.random.randint(minV, maxV+1, nRuns) # instantiate measured encoders encScalar = ScalarEncoder(w=21, minval=minV, maxval=maxV, resolution=1) encRDSE = RDSE(resolution=1) # profile! for d in data: encScalar.encode(d) encRDSE.encode(d) print "Scalar n=",encScalar.n," RDSE n=",encRDSE.n
def testEncoding(self): """ Test basic encoding functionality. Create encodings without crashing and check they contain the correct number of on and off bits. Check some encodings for expected overlap. Test that encodings for old values don't change once we generate new buckets. """ # Initialize with non-default parameters and encode with a number close to # the offset enc = RandomDistributedScalarEncoder(name='enc', resolution=1.0, w=23, n=500, offset = 0.0) e0 = enc.encode(-0.1) self.assertEqual(e0.sum(), 23, "Number of on bits is incorrect") self.assertEqual(e0.size, 500, "Width of the vector is incorrect") self.assertEqual(enc.getBucketIndices(0.0)[0], enc._maxBuckets / 2, "Offset doesn't correspond to middle bucket") self.assertEqual(len(enc.bucketMap), 1, "Number of buckets is not 1") # Encode with a number that is resolution away from offset. Now we should # have two buckets and this encoding should be one bit away from e0 e1 = enc.encode(1.0) self.assertEqual(len(enc.bucketMap), 2, "Number of buckets is not 2") self.assertEqual(e1.sum(), 23, "Number of on bits is incorrect") self.assertEqual(e1.size, 500, "Width of the vector is incorrect") self.assertEqual(computeOverlap(e0, e1), 22, "Overlap is not equal to w-1") # Encode with a number that is resolution*w away from offset. Now we should # have many buckets and this encoding should have very little overlap with # e0 e25 = enc.encode(25.0) self.assertGreater(len(enc.bucketMap), 23, "Number of buckets is not 2") self.assertEqual(e25.sum(), 23, "Number of on bits is incorrect") self.assertEqual(e25.size, 500, "Width of the vector is incorrect") self.assertLess(computeOverlap(e0, e25), 4, "Overlap is too high") # Test encoding consistency. The encodings for previous numbers # shouldn't change even though we have added additional buckets self.assertEqual((e0 == enc.encode(-0.1)).sum(), 500, "Encodings are not consistent - they have changed after new buckets " "have been created") self.assertEqual((e1 == enc.encode(1.0)).sum(), 500, "Encodings are not consistent - they have changed after new buckets " "have been created")
def testMapBucketIndexToNonZeroBits(self): """ Test that mapBucketIndexToNonZeroBits works and that max buckets and clipping are handled properly. """ encoder = RandomDistributedScalarEncoder(resolution=1.0, w=11, n=150) # Set a low number of max buckets encoder._initializeBucketMap(10, None) encoder.encode(0.0) encoder.encode(-7.0) encoder.encode(7.0) self.assertEqual(len(encoder.bucketMap), encoder._maxBuckets, "_maxBuckets exceeded") self.assertTrue( numpy.array_equal(encoder.mapBucketIndexToNonZeroBits(-1), encoder.bucketMap[0]), "mapBucketIndexToNonZeroBits did not handle negative" " index") self.assertTrue( numpy.array_equal(encoder.mapBucketIndexToNonZeroBits(1000), encoder.bucketMap[9]), "mapBucketIndexToNonZeroBits did not handle negative index") e23 = encoder.encode(23.0) e6 = encoder.encode(6) self.assertEqual((e23 == e6).sum(), encoder.getWidth(), "Values not clipped correctly during encoding") ep8 = encoder.encode(-8) ep7 = encoder.encode(-7) self.assertEqual((ep8 == ep7).sum(), encoder.getWidth(), "Values not clipped correctly during encoding") self.assertEqual(encoder.getBucketIndices(-8)[0], 0, "getBucketIndices returned negative bucket index") self.assertEqual(encoder.getBucketIndices(23)[0], encoder._maxBuckets-1, "getBucketIndices returned bucket index that is too" " large")
def _generateSequence(): scalarEncoder = RandomDistributedScalarEncoder(0.88) sequence = [] with open (_INPUT_FILE_PATH) as fin: reader = csv.reader(fin) reader.next() reader.next() reader.next() for _ in xrange(NUM_PATTERNS): record = reader.next() value = float(record[1]) encodedValue = scalarEncoder.encode(value) activeBits = set(encodedValue.nonzero()[0]) sequence.append(activeBits) return sequence
def _generateSequence(): scalarEncoder = RandomDistributedScalarEncoder(0.88) sequence = [] with open(_INPUT_FILE_PATH) as fin: reader = csv.reader(fin) reader.next() reader.next() reader.next() for _ in xrange(NUM_PATTERNS): record = reader.next() value = float(record[1]) encodedValue = scalarEncoder.encode(value) activeBits = set(encodedValue.nonzero()[0]) sequence.append(activeBits) return sequence
def testOverlapStatistics(self): """ Check that the overlaps for the encodings are within the expected range. Here we ask the encoder to create a bunch of representations under somewhat stressful conditions, and then verify they are correct. We rely on the fact that the _overlapOK and _countOverlapIndices methods are working correctly. """ seed = getSeed() # Generate about 600 encodings. Set n relatively low to increase # chance of false overlaps encoder = RandomDistributedScalarEncoder(resolution=1.0, w=11, n=150, seed=seed) encoder.encode(0.0) encoder.encode(-300.0) encoder.encode(300.0) self.assertTrue(validateEncoder(encoder, subsampling=3), "Illegal overlap encountered in encoder")
ScalarEncoder? enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=False, forced=True) print "3 =", enc.encode(3) print "4 =", enc.encode(4) print "5 =", enc.encode(5) print "1000 =", enc.encode(1000) from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder RandomDistributedScalarEncoder? rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5) print "3 = ", rdse.encode(3) print "4 = ", rdse.encode(4) print "5 = ", rdse.encode(5) print print "100 = ", rdse.encode(100) print "100000 =", rdse.encode(1000) import datetime from nupic.encoders.date import DateEncoder DateEncoder? de = DateEncoder(season=5)
class TemporalMemoryPerformanceTest(unittest.TestCase): def setUp(self): self.tmPy = TemporalMemoryPy(columnDimensions=[2048], cellsPerColumn=32, initialPermanence=0.5, connectedPermanence=0.8, minThreshold=10, maxNewSynapseCount=12, permanenceIncrement=0.1, permanenceDecrement=0.05, activationThreshold=15) self.tmCPP = TemporalMemoryCPP(columnDimensions=[2048], cellsPerColumn=32, initialPermanence=0.5, connectedPermanence=0.8, minThreshold=10, maxNewSynapseCount=12, permanenceIncrement=0.1, permanenceDecrement=0.05, activationThreshold=15) self.tp = TP(numberOfCols=2048, cellsPerColumn=32, initialPerm=0.5, connectedPerm=0.8, minThreshold=10, newSynapseCount=12, permanenceInc=0.1, permanenceDec=0.05, activationThreshold=15, globalDecay=0, burnIn=1, checkSynapseConsistency=False, pamLength=1) self.tp10x2 = TP10X2(numberOfCols=2048, cellsPerColumn=32, initialPerm=0.5, connectedPerm=0.8, minThreshold=10, newSynapseCount=12, permanenceInc=0.1, permanenceDec=0.05, activationThreshold=15, globalDecay=0, burnIn=1, checkSynapseConsistency=False, pamLength=1) self.scalarEncoder = RandomDistributedScalarEncoder(0.88) def testSingleSequence(self): print "Test: Single sequence" sequence = self._generateSequence() times = self._feedAll(sequence) self.assertTrue(times[1] < times[0]) self.assertTrue(times[3] < times[2]) # ============================== # Helper functions # ============================== def _generateSequence(self): sequence = [] with open(_INPUT_FILE_PATH) as fin: reader = csv.reader(fin) reader.next() reader.next() reader.next() for _ in xrange(NUM_PATTERNS): record = reader.next() value = float(record[1]) encodedValue = self.scalarEncoder.encode(value) activeBits = set(encodedValue.nonzero()[0]) sequence.append(activeBits) return sequence def _feedAll(self, sequence, learn=True, num=1): repeatedSequence = sequence * num def tmComputeFn(pattern, instance): instance.compute(pattern, learn) def tpComputeFn(pattern, instance): array = self._patternToNumpyArray(pattern) instance.compute(array, enableLearn=learn, computeInfOutput=True) modelParams = [(self.tmPy, tmComputeFn), (self.tmCPP, tmComputeFn), (self.tp, tpComputeFn), (self.tp10x2, tpComputeFn)] times = [0] * len(modelParams) for patNum, pattern in enumerate(repeatedSequence): for ix, params in enumerate(modelParams): times[ix] += self._feedOne(pattern, *params) self._printProgressBar(patNum, len(repeatedSequence), 50) print print "TM (py):\t{0}s".format(times[0]) print "TM (C++):\t{0}s".format(times[1]) print "TP:\t\t{0}s".format(times[2]) print "TP10X2:\t\t{0}s".format(times[3]) return times @staticmethod def _feedOne(pattern, instance, computeFn): start = time.clock() if pattern == None: instance.reset() else: computeFn(pattern, instance) elapsed = time.clock() - start return elapsed @staticmethod def _patternToNumpyArray(pattern): array = numpy.zeros(2048, dtype='int32') array[list(pattern)] = 1 return array @staticmethod def _printProgressBar(completed, total, nDots): def numberOfDots(n): return (n * nDots) // total completedDots = numberOfDots(completed) if completedDots != numberOfDots(completed - 1): print "\r|" + ("." * completedDots) + (" " * (nDots - completedDots)) + "|", sys.stdout.flush()
def testEncodeInvalidInputType(self): encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0, verbosity=0) with self.assertRaises(TypeError): encoder.encode("String")
class TemporalMemoryPerformanceTest(unittest.TestCase): def setUp(self): self.tmPy = TemporalMemoryPy(columnDimensions=[2048], cellsPerColumn=32, initialPermanence=0.5, connectedPermanence=0.8, minThreshold=10, maxNewSynapseCount=12, permanenceIncrement=0.1, permanenceDecrement=0.05, activationThreshold=15) self.tmCPP = TemporalMemoryCPP(columnDimensions=[2048], cellsPerColumn=32, initialPermanence=0.5, connectedPermanence=0.8, minThreshold=10, maxNewSynapseCount=12, permanenceIncrement=0.1, permanenceDecrement=0.05, activationThreshold=15) self.tp = TP(numberOfCols=2048, cellsPerColumn=32, initialPerm=0.5, connectedPerm=0.8, minThreshold=10, newSynapseCount=12, permanenceInc=0.1, permanenceDec=0.05, activationThreshold=15, globalDecay=0, burnIn=1, checkSynapseConsistency=False, pamLength=1) self.tp10x2 = TP10X2(numberOfCols=2048, cellsPerColumn=32, initialPerm=0.5, connectedPerm=0.8, minThreshold=10, newSynapseCount=12, permanenceInc=0.1, permanenceDec=0.05, activationThreshold=15, globalDecay=0, burnIn=1, checkSynapseConsistency=False, pamLength=1) self.scalarEncoder = RandomDistributedScalarEncoder(0.88) def testSingleSequence(self): print "Test: Single sequence" sequence = self._generateSequence() times = self._feedAll(sequence) self.assertTrue(times[1] < times[0]) self.assertTrue(times[3] < times[2]) # ============================== # Helper functions # ============================== def _generateSequence(self): sequence = [] with open (_INPUT_FILE_PATH) as fin: reader = csv.reader(fin) reader.next() reader.next() reader.next() for _ in xrange(NUM_PATTERNS): record = reader.next() value = float(record[1]) encodedValue = self.scalarEncoder.encode(value) activeBits = set(encodedValue.nonzero()[0]) sequence.append(activeBits) return sequence def _feedAll(self, sequence, learn=True, num=1): repeatedSequence = sequence * num def tmComputeFn(pattern, instance): instance.compute(pattern, learn) def tpComputeFn(pattern, instance): array = self._patternToNumpyArray(pattern) instance.compute(array, enableLearn=learn, computeInfOutput=True) modelParams = [ (self.tmPy, tmComputeFn), (self.tmCPP, tmComputeFn), (self.tp, tpComputeFn), (self.tp10x2, tpComputeFn) ] times = [0] * len(modelParams) for patNum, pattern in enumerate(repeatedSequence): for ix, params in enumerate(modelParams): times[ix] += self._feedOne(pattern, *params) self._printProgressBar(patNum, len(repeatedSequence), 50) print print "TM (py):\t{0}s".format(times[0]) print "TM (C++):\t{0}s".format(times[1]) print "TP:\t\t{0}s".format(times[2]) print "TP10X2:\t\t{0}s".format(times[3]) return times @staticmethod def _feedOne(pattern, instance, computeFn): start = time.clock() if pattern == None: instance.reset() else: computeFn(pattern, instance) elapsed = time.clock() - start return elapsed @staticmethod def _patternToNumpyArray(pattern): array = numpy.zeros(2048, dtype='int32') array[list(pattern)] = 1 return array @staticmethod def _printProgressBar(completed, total, nDots): def numberOfDots(n): return (n * nDots) // total completedDots = numberOfDots(completed) if completedDots != numberOfDots(completed - 1): print "\r|" + ("." * completedDots) + (" " * (nDots - completedDots)) + "|", sys.stdout.flush()