def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("[1:8]", 0)]) l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("scalar", 0)]) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) self.assertEqual(l.resolution, 0.5) self.assertEqual(l.radius, 1.5)
def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True) assert l.getDescription() == [("[1:8]", 0)] l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True) assert l.getDescription() == [("scalar", 0)] assert (l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all() assert (l.encode(3.1) == l.encode(3)).all() assert (l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all() assert (l.encode(3.6) == l.encode(3.5)).all() assert (l.encode(3.7) == l.encode(3.5)).all() assert (l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all() assert (l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all() assert (l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all() assert (l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all() assert (l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all() assert l.resolution == 0.5 assert l.radius == 1.5
def testEncodeInvalidInputType(self): encoder = ScalarEncoder(name="enc", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) with self.assertRaises(TypeError): encoder.encode("String")
def profileEnc(maxValue, nRuns): minV=0 maxV=nRuns # generate input data data=numpy.random.randint(minV, maxV+1, nRuns) # instantiate measured encoders encScalar = ScalarEncoder(w=21, minval=minV, maxval=maxV, resolution=1) encRDSE = RDSE(resolution=1) # profile! for d in data: encScalar.encode(d) encRDSE.encode(d) print "Scalar n=",encScalar.n," RDSE n=",encRDSE.n
def profileEnc(maxValue, nRuns): minV = 0 maxV = nRuns # generate input data data = numpy.random.randint(minV, maxV + 1, nRuns) # instantiate measured encoders encScalar = ScalarEncoder(w=21, minval=minV, maxval=maxV, resolution=1) encRDSE = RDSE(resolution=1) # profile! for d in data: encScalar.encode(d) encRDSE.encode(d) print("Scalar n=", encScalar.n, " RDSE n=", encRDSE.n)
class ScalarBucketEncoder(Encoder): def __init__(self): self.encoder = NupicScalarEncoder(w=1, minval=0, maxval=40000, n=22, forced=True) def encode(self, symbol): encoding = self.encoder.encode(symbol) return encoding
def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(empty.sum(), 0)
class Agent(object): def __init__(self): self.encoder = CoordinateEncoder(n=1024, w=21) self.motorEncoder = ScalarEncoder(21, -1, 1, n=1024) self.tm = MonitoredGeneralTemporalMemory(columnDimensions=[2048], cellsPerColumn=1, initialPermanence=0.5, connectedPermanence=0.6, permanenceIncrement=0.1, permanenceDecrement=0.02, minThreshold=35, activationThreshold=35, maxNewSynapseCount=40) self.plotter = Plotter(self.tm) self.lastState = None self.lastAction = None def sync(self, outputData): if not ("location" in outputData and "steer" in outputData): print "Warning: Missing data:", outputData return if outputData.get("reset"): print "Reset." self.tm.reset() location = outputData["location"] steer = outputData["steer"] x = int(location["x"] * SCALE) z = int(location["z"] * SCALE) coordinate = numpy.array([x, z]) encoding = self.encoder.encode((coordinate, RADIUS)) motorEncoding = self.motorEncoder.encode(steer) sensorPattern = set(encoding.nonzero()[0]) motorPattern = set(motorEncoding.nonzero()[0]) self.tm.compute(sensorPattern, activeExternalCells=motorPattern, formInternalConnections=True) print self.tm.mmPrettyPrintMetrics(self.tm.mmGetDefaultMetrics()) overlap = 0 if self.lastState is not None: overlap = (self.lastState & encoding).sum() self.plotter.update(overlap) if outputData.get("reset"): self.plotter.render() self.lastState = encoding self.lastAction = steer
def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0)
def testGetBucketInfoIntResolution(self): """Ensures that passing resolution as an int doesn't truncate values.""" encoder = ScalarEncoder(w=3, resolution=1, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(4.5, encoder.topDownCompute(encoder.encode(4.5))[0].scalar)
def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False) empty = mv.encode(float("nan")) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0)
def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) self.assertEqual(empty.sum(), 0)
def generateInputVectors(self, params): if params['dataType'] == 'randomSDR': self._inputVectors = generateRandomSDR( params['numInputVectors'], params['inputSize'], params['numActiveInputBits'], params['seed']) elif params['dataType'] == 'randomSDRVaryingSparsity': self._inputVectors = generateRandomSDRVaryingSparsity( params['numInputVectors'], params['inputSize'], params['minSparsity'], params['maxSparsity'], params['seed']) elif params['dataType'] == 'denseVectors': self._inputVectors = generateDenseVectors( params['numInputVectors'], params['inputSize'], params['seed']) elif params['dataType'] == 'randomBarPairs': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): seed = (params['seed'] * numInputVectors + i) * 2 bar1 = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], seed, False, 'horizontal') bar2 = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], seed + 1, False, 'vertical') data = bar1 + bar2 data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'randomBarSets': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): data = 0 seed = (params['seed'] * numInputVectors + i) * params['numBarsPerInput'] for barI in range(params['numBarsPerInput']): bar = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], seed + barI, True) data += bar data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'randomCross': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): seed = (params['seed'] * numInputVectors + i) * params['numCrossPerInput'] data = 0 for j in range(params['numCrossPerInput']): data += getCross(params['nX'], params['nY'], params['barHalfLength'], seed+j) data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'correlatedSDRPairs': (inputVectors, inputVectors1, inputVectors2, corrPairs) = \ generateCorrelatedSDRPairs( params['numInputVectors'], params['inputSize'], params['numInputVectorPerSensor'], params['numActiveInputBits'], params['corrStrength'], params['seed']) self._inputVectors = inputVectors self._additionalInfo = {"inputVectors1": inputVectors1, "inputVectors2": inputVectors2, "corrPairs": corrPairs} elif params['dataType'] == 'nyc_taxi': from nupic.encoders.scalar import ScalarEncoder df = pd.read_csv('./data/nyc_taxi.csv', header=0, skiprows=[1, 2]) inputVectors = np.zeros((5000, params['n'])) for i in range(5000): inputRecord = { "passenger_count": float(df["passenger_count"][i]), "timeofday": float(df["timeofday"][i]), "dayofweek": float(df["dayofweek"][i]), } enc = ScalarEncoder(w=params['w'], minval=params['minval'], maxval=params['maxval'], n=params['n']) inputSDR = enc.encode(inputRecord["passenger_count"]) inputVectors[i, :] = inputSDR self._inputVectors = inputVectors elif params['dataType'] == 'mnist': imagePath = 'data/mnist/training/' imagePath = os.path.abspath(imagePath) categoryList = [c for c in sorted(os.listdir(imagePath)) if c[0] != "." and os.path.isdir(os.path.join(imagePath, c))] fileList = {} numImages = 0 for category in categoryList: categoryFilenames = [] walkPath = os.path.join(imagePath, category) w = os.walk(walkPath) while True: try: dirpath, dirnames, filenames = w.next() except StopIteration: break # Don't enter directories that begin with '.' for d in dirnames[:]: if d.startswith("."): dirnames.remove(d) dirnames.sort() # Ignore files that begin with "." filenames = [f for f in filenames if not f.startswith(".")] filenames.sort() imageFilenames = [os.path.join(dirpath, f) for f in filenames] # Add our new images and masks to the list for this category categoryFilenames.extend(imageFilenames) numImages += len(categoryFilenames) fileList[category] = categoryFilenames inputVectors = np.zeros((numImages, 1024)) counter = 0 for category in categoryList: categoryFilenames = fileList[category] for filename in categoryFilenames: image = misc.imread(filename).astype('float32') image /= 255 image = image.round() paddedImage = np.zeros((32, 32)) paddedImage[2:30, 2:30] = image inputVectors[counter, :] = np.reshape(paddedImage, newshape=(1, 1024)) counter += 1 self._inputVectors = inputVectors
def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name='scalar', n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) print "\nTesting non-periodic encoder encoding, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a non-periodic # encoder v = l.minval print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) #Test min and max l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and there is no value greater than max or min l = ScalarEncoder(name='scalar', n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertTrue(decoded.value <= 141) self.assertTrue(decoded.value >= 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small number # non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test setting fieldStats after initialization if False: #TODO: remove all this? (and fieldstats from ScalarEncoder (if applicable) )? # Modified on 11/20/12 12:53 PM - setFieldStats not applicable for ScalarEncoder l = ScalarEncoder(n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) l = ScalarEncoder(name='scalar', n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) l = ScalarEncoder(name='scalar', n=14, w=5, minval=100, maxval=1000, periodic=False, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":10}}) print "\nTesting non-periodic encoding using setFieldStats, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all())
def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name="scalar", n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertTrue(numpy.array_equal( l.encode(1), numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(2), numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(10), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype))) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a # non-periodic encoder v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) #Test min and max l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and #there is no value greater than max or min l = ScalarEncoder(name="scalar", n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertLessEqual(decoded.value, 141) self.assertGreaterEqual(decoded.value, 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small # number non-periodic encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic # encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4
class OneDDepthEncoder(Encoder): """ Given an array of numbers, each representing distance to the closest object, returns an SDR representation of that depth data. At each given position, computes the closest distance within radius 3, and encodes that distance with a scalar encoder. The concatenation of all these scalar encodings is the final encoding. """ def __init__(self, positions=range(36), radius=3, wrapAround=False, nPerPosition=57, wPerPosition=3, minVal=0, maxVal=1, name=None, verbosity=0): """ See `nupic.encoders.base.Encoder` for more information. @param positions (list) Positions at which to encode distance @param radius (int) Radius of positions over which to consider to get closest distance for encoding @param wrapAround (bool) Whether radius should wrap around the sides of the input array @param nPerPosition (int) Number of bits available for scalar encoder when encoding each position @param wPerPosition (int) Number of bits active for scalar encoder when encoding each position @param minVal (int) Minimum distance that can be encoded @param maxVal (int) Maximum distance that can be encoded """ self.positions = positions self.radius = radius self.wrapAround = wrapAround self.scalarEncoder = ScalarEncoder(wPerPosition, minVal, maxVal, n=nPerPosition, forced=True) self.verbosity = verbosity self.encoders = None self.n = len(self.positions) * nPerPosition self.w = len(self.positions) * wPerPosition if name is None: name = "[%s:%s]" % (self.n, self.w) self.name = name def getWidth(self): """See `nupic.encoders.base.Encoder` for more information.""" return self.n def getDescription(self): """See `nupic.encoders.base.Encoder` for more information.""" return [('data', 0)] def getScalars(self, inputData): """See `nupic.encoders.base.Encoder` for more information.""" return numpy.array([0]*len(inputData)) def encodeIntoArray(self, inputData, output): """ See `nupic.encoders.base.Encoder` for more information. @param inputData (tuple) Contains depth data (numpy.array) @param output (numpy.array) Stores encoded SDR in this numpy array """ output[:] = 0 for i, position in enumerate(self.positions): indices = range(position-self.radius, position+self.radius+1) mode = 'wrap' if self.wrapAround else 'clip' values = inputData.take(indices, mode=mode) start = i * self.scalarEncoder.getWidth() end = (i + 1) * self.scalarEncoder.getWidth() output[start:end] = self.scalarEncoder.encode(max(values)) def dump(self): print "OneDDepthEncoder:" print " w: %d" % self.w print " n: %d" % self.n @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.w = proto.w encoder.n = proto.n encoder.radius = proto.radius encoder.verbosity = proto.verbosity encoder.name = proto.name return encoder def write(self, proto): proto.w = self.w proto.n = self.n proto.radius = self.radius proto.verbosity = self.verbosity proto.name = self.name
class OneDDepthEncoder(Encoder): """ Given an array of numbers, each representing distance to the closest object, returns an SDR representation of that depth data. At each given position, computes the closest distance within radius 3, and encodes that distance with a scalar encoder. The concatenation of all these scalar encodings is the final encoding. """ def __init__(self, positions=range(36), radius=3, wrapAround=False, nPerPosition=57, wPerPosition=3, minVal=0, maxVal=1, name=None, verbosity=0): """ See `nupic.encoders.base.Encoder` for more information. @param positions (list) Positions at which to encode distance @param radius (int) Radius of positions over which to consider to get closest distance for encoding @param wrapAround (bool) Whether radius should wrap around the sides of the input array @param nPerPosition (int) Number of bits available for scalar encoder when encoding each position @param wPerPosition (int) Number of bits active for scalar encoder when encoding each position @param minVal (int) Minimum distance that can be encoded @param maxVal (int) Maximum distance that can be encoded """ self.positions = positions self.radius = radius self.wrapAround = wrapAround self.scalarEncoder = ScalarEncoder(wPerPosition, minVal, maxVal, n=nPerPosition, forced=True) self.verbosity = verbosity self.encoders = None self.n = len(self.positions) * nPerPosition self.w = len(self.positions) * wPerPosition if name is None: name = "[%s:%s]" % (self.n, self.w) self.name = name def getWidth(self): """See `nupic.encoders.base.Encoder` for more information.""" return self.n def getDescription(self): """See `nupic.encoders.base.Encoder` for more information.""" return [('data', 0)] def getScalars(self, inputData): """See `nupic.encoders.base.Encoder` for more information.""" return numpy.array([0] * len(inputData)) def encodeIntoArray(self, inputData, output): """ See `nupic.encoders.base.Encoder` for more information. @param inputData (tuple) Contains depth data (numpy.array) @param output (numpy.array) Stores encoded SDR in this numpy array """ output[:] = 0 for i, position in enumerate(self.positions): indices = range(position - self.radius, position + self.radius + 1) mode = 'wrap' if self.wrapAround else 'clip' values = inputData.take(indices, mode=mode) start = i * self.scalarEncoder.getWidth() end = (i + 1) * self.scalarEncoder.getWidth() output[start:end] = self.scalarEncoder.encode(max(values)) def dump(self): print "OneDDepthEncoder:" print " w: %d" % self.w print " n: %d" % self.n @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.w = proto.w encoder.n = proto.n encoder.radius = proto.radius encoder.verbosity = proto.verbosity encoder.name = proto.name return encoder def write(self, proto): proto.w = self.w proto.n = self.n proto.radius = self.radius proto.verbosity = self.verbosity proto.name = self.name
def generateInputVectors(self, params): if params['dataType'] == 'randomSDR': self._inputVectors = generateRandomSDR( params['numInputVectors'], params['inputSize'], params['numActiveInputBits'], params['seed']) elif params['dataType'] == 'randomSDRVaryingSparsity': self._inputVectors = generateRandomSDRVaryingSparsity( params['numInputVectors'], params['inputSize'], params['minSparsity'], params['maxSparsity'], params['seed']) elif params['dataType'] == 'denseVectors': self._inputVectors = generateDenseVectors( params['numInputVectors'], params['inputSize'], params['seed']) elif params['dataType'] == 'randomBarPairs': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): seed = (params['seed'] * numInputVectors + i) * 2 bar1 = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], seed, False, 'horizontal') bar2 = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], seed + 1, False, 'vertical') data = bar1 + bar2 data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'randomBarSets': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): data = 0 seed = (params['seed'] * numInputVectors + i) * params['numBarsPerInput'] for barI in range(params['numBarsPerInput']): bar = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], seed + barI, True) data += bar data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'randomCross': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): seed = (params['seed'] * numInputVectors + i) * params['numCrossPerInput'] data = 0 for j in range(params['numCrossPerInput']): data += getCross(params['nX'], params['nY'], params['barHalfLength'], seed + j) data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'correlatedSDRPairs': (inputVectors, inputVectors1, inputVectors2, corrPairs) = \ generateCorrelatedSDRPairs( params['numInputVectors'], params['inputSize'], params['numInputVectorPerSensor'], params['numActiveInputBits'], params['corrStrength'], params['seed']) self._inputVectors = inputVectors self._additionalInfo = { "inputVectors1": inputVectors1, "inputVectors2": inputVectors2, "corrPairs": corrPairs } elif params['dataType'] == 'nyc_taxi': from nupic.encoders.scalar import ScalarEncoder df = pd.read_csv('./data/nyc_taxi.csv', header=0, skiprows=[1, 2]) inputVectors = np.zeros((5000, params['n'])) for i in range(5000): inputRecord = { "passenger_count": float(df["passenger_count"][i]), "timeofday": float(df["timeofday"][i]), "dayofweek": float(df["dayofweek"][i]), } enc = ScalarEncoder(w=params['w'], minval=params['minval'], maxval=params['maxval'], n=params['n']) inputSDR = enc.encode(inputRecord["passenger_count"]) inputVectors[i, :] = inputSDR self._inputVectors = inputVectors
def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0)
class NIK(object): """Class implementing NIK""" def __init__( self, minDx=-2.0, maxDx=2.0, minDy=-2.0, maxDy=2.0, minTheta1=0.0, maxTheta1=85.0, minTheta2=0.0, maxTheta2=360.0, ): self.dxEncoder = ScalarEncoder(5, minDx, maxDx, n=75, forced=True) self.dyEncoder = ScalarEncoder(5, minDy, maxDy, n=75, forced=True) self.externalSize = self.dxEncoder.getWidth()**2 self.externalOnBits = self.dxEncoder.w**2 self.theta1Encoder = ScalarEncoder(5, minTheta1, maxTheta1, n=75, forced=True) self.theta2Encoder = ScalarEncoder(5, minTheta2, maxTheta2, n=75, forced=True) self.bottomUpInputSize = self.theta1Encoder.getWidth( ) * self.theta2Encoder.getWidth() self.bottomUpOnBits = self.theta1Encoder.w * self.theta2Encoder.w self.minDx = 100.0 self.maxDx = -100.0 self.minTheta1 = minTheta1 self.minTheta2 = minTheta2 self.maxTheta1 = maxTheta1 self.maxTheta2 = maxTheta2 self.trainingIterations = 0 self.testIterations = 0 self.maxPredictionError = 0 self.totalPredictionError = 0 self.numMissedPredictions = 0 self.tm = TM(columnDimensions=(self.bottomUpInputSize, ), basalInputDimensions=(self.externalSize, ), cellsPerColumn=1, initialPermanence=0.4, connectedPermanence=0.5, minThreshold=self.externalOnBits, maxNewSynapseCount=40, permanenceIncrement=0.1, permanenceDecrement=0.00, activationThreshold=int( 0.75 * (self.externalOnBits + self.bottomUpOnBits)), predictedSegmentDecrement=0.00, checkInputs=False) print >> sys.stderr, "TM parameters:" print >> sys.stderr, " num columns=", self.tm.getColumnDimensions() print >> sys.stderr, " activation threshold=", self.tm.getActivationThreshold( ) print >> sys.stderr, " min threshold=", self.tm.getMinThreshold() print >> sys.stderr, " basal input dimensions=", self.tm.getBasalInputDimensions( ) print >> sys.stderr print >> sys.stderr def compute(self, xt1, yt1, xt, yt, theta1t1, theta2t1, theta1, theta2, learn): """ The main function to call. If learn is False, it will print a prediction: (theta1, theta2) """ dx = xt - xt1 dy = yt - yt1 self.minDx = min(self.minDx, dx) self.maxDx = max(self.maxDx, dx) print >> sys.stderr, "Learn: ", learn print >> sys.stderr, "Training iterations: ", self.trainingIterations print >> sys.stderr, "Test iterations: ", self.testIterations print >> sys.stderr, "Xt's: ", xt1, yt1, xt, yt, "Delta's: ", dx, dy print >> sys.stderr, "Theta t-1: ", theta1t1, theta2t1, "t:", theta1, theta2 bottomUpSDR = self.encodeThetas(theta1, theta2) self.decodeThetas(bottomUpSDR) # Encode the inputs appropriately and train the HTM externalSDR = self.encodeDeltas(dx, dy) if learn: # During learning we provide the current pose angle as bottom up input bottomUpSDR = self.encodeThetas(theta1, theta2) self.trainTM(bottomUpSDR, externalSDR) self.trainingIterations += 1 else: # During inference we provide the previous pose angle as bottom up input # If we don't get a prediction, we keep trying random shifts until we get # something. predictedCells = [] newt1 = theta1t1 newt2 = theta2t1 newdx = dx newdy = dy angleRange = 10 numAttempts = 1 while len(predictedCells) == 0 and numAttempts < 3: print >> sys.stderr, "Attempt:", numAttempts, print >> sys.stderr, "Trying to predict using thetas:", newt1, newt2, print >> sys.stderr, "and deltas:", newdx, newdy externalSDR = self.encodeDeltas(newdx, newdy) bottomUpSDR = self.encodeThetas(newt1, newt2) predictedCells = self.inferTM(bottomUpSDR, externalSDR) predictedValues = self.decodeThetas(predictedCells) print >> sys.stderr, "Predicted values", predictedValues newt1 = theta1t1 + random.randrange(-angleRange, angleRange) newt2 = theta2t1 + random.randrange(-angleRange, angleRange) newdx = dx + (random.random() / 2.0 - 0.25) newdy = dy + (random.random() / 2.0 - 0.25) # Ensure we are in bounds otherwise we get an exception newt1 = min(self.maxTheta1, max(self.minTheta1, newt1)) newt2 = min(self.maxTheta2, max(self.minTheta2, newt2)) newdx = min(2.0, max(-2.0, newdx)) newdy = min(2.0, max(-2.0, newdy)) numAttempts += 1 if numAttempts % 10 == 0: angleRange += 2 print predictedValues # Accumulate errors for our metrics if len(predictedCells) == 0: self.numMissedPredictions += 1 self.testIterations += 1 error = abs(predictedValues[0] - theta1) + abs(predictedValues[1] - theta2) self.totalPredictionError += error if self.maxPredictionError < error: self.maxPredictionError = error print >> sys.stderr, "Error: ", error print >> sys.stderr def reset(self): self.tm.reset() def encodeDeltas(self, dx, dy): """Return the SDR for dx,dy""" dxe = self.dxEncoder.encode(dx) dye = self.dyEncoder.encode(dy) ex = numpy.outer(dxe, dye) return ex.flatten().nonzero()[0] def encodeThetas(self, theta1, theta2): """Return the SDR for theta1 and theta2""" # print >> sys.stderr, "encoded theta1 value = ", theta1 # print >> sys.stderr, "encoded theta2 value = ", theta2 t1e = self.theta1Encoder.encode(theta1) t2e = self.theta2Encoder.encode(theta2) # print >> sys.stderr, "encoded theta1 = ", t1e.nonzero()[0] # print >> sys.stderr, "encoded theta2 = ", t2e.nonzero()[0] ex = numpy.outer(t2e, t1e) return ex.flatten().nonzero()[0] def decodeThetas(self, predictedCells): """ Given the set of predicted cells, return the predicted theta1 and theta2 """ a = numpy.zeros(self.bottomUpInputSize) a[predictedCells] = 1 a = a.reshape( (self.theta1Encoder.getWidth(), self.theta1Encoder.getWidth())) theta1PredictedBits = a.mean(axis=0).nonzero()[0] theta2PredictedBits = a.mean(axis=1).nonzero()[0] # To decode it we need to create a flattened array again and pass it # to encoder. # TODO: We use encoder's topDownCompute method - not sure if that is best. t1 = numpy.zeros(self.theta1Encoder.getWidth()) t1[theta1PredictedBits] = 1 t1Prediction = self.theta1Encoder.topDownCompute(t1)[0].value t2 = numpy.zeros(self.theta2Encoder.getWidth()) t2[theta2PredictedBits] = 1 t2Prediction = self.theta2Encoder.topDownCompute(t2)[0].value # print >> sys.stderr, "predicted cells = ", predictedCells # print >> sys.stderr, "decoded theta1 bits = ", theta1PredictedBits # print >> sys.stderr, "decoded theta2 bits = ", theta2PredictedBits # print >> sys.stderr, "decoded theta1 value = ", t1Prediction # print >> sys.stderr, "decoded theta2 value = ", t2Prediction return t1Prediction, t2Prediction def printStats(self): print >> sys.stderr, "min/max dx=", self.minDx, self.maxDx print >> sys.stderr, "Total number of segments=", numSegments(self.tm) if self.testIterations > 0: print >> sys.stderr, "Maximum prediction error: ", self.maxPredictionError print >> sys.stderr, "Mean prediction error: ", self.totalPredictionError / self.testIterations print >> sys.stderr, "Num missed predictions: ", self.numMissedPredictions def trainTM(self, bottomUp, externalInput): # print >> sys.stderr, "Bottom up: ", bottomUp # print >> sys.stderr, "ExternalInput: ",externalInput self.tm.depolarizeCells(externalInput, learn=True) self.tm.activateCells(bottomUp, reinforceCandidatesExternalBasal=externalInput, growthCandidatesExternalBasal=externalInput, learn=True) # print >> sys.stderr, ("new active cells " + str(self.tm.getActiveCells())) print >> sys.stderr, "Total number of segments=", numSegments(self.tm) def inferTM(self, bottomUp, externalInput): """ Run inference and return the set of predicted cells """ self.reset() # print >> sys.stderr, "Bottom up: ", bottomUp # print >> sys.stderr, "ExternalInput: ",externalInput self.tm.compute(bottomUp, activeCellsExternalBasal=externalInput, learn=False) # print >> sys.stderr, ("new active cells " + str(self.tm.getActiveCells())) # print >> sys.stderr, ("new predictive cells " + str(self.tm.getPredictiveCells())) return self.tm.getPredictiveCells() def save(self, filename="temp.pkl"): """ Save TM in the filename specified above """ output = open(filename, 'wb') cPickle.dump(self.tm, output, protocol=cPickle.HIGHEST_PROTOCOL) def load(self, filename="temp.pkl"): """ Save TM in the filename specified above """ inputFile = open(filename, 'rb') self.tm = cPickle.load(inputFile)
predictions = np.transpose(likelihoodsVecAll) truth = np.roll(actual_data, -5) from nupic.encoders.scalar import ScalarEncoder as NupicScalarEncoder encoder = NupicScalarEncoder(w=1, minval=0, maxval=40000, n=22, forced=True) from plot import computeLikelihood, plotAccuracy bucketIndex2 = [] negLL = [] minProb = 0.0001 for i in xrange(len(truth)): bucketIndex2.append(np.where(encoder.encode(truth[i]))[0]) outOfBucketProb = 1 - sum(predictions[i, :]) prob = predictions[i, bucketIndex2[i]] if prob == 0: prob = outOfBucketProb if prob < minProb: prob = minProb negLL.append(-np.log(prob)) negLL = computeLikelihood(predictions, truth, encoder) negLL[:5000] = np.nan x = range(len(negLL)) plt.figure() plotAccuracy((negLL, x), truth, window=480, errorType='negLL') np.save('./result/' + dataSet + classifierType + 'TMprediction.npy',
class ScalarEncoderTest(unittest.TestCase): """Unit tests for ScalarEncoder class""" def setUp(self): # use of forced is not recommended, but used here for readability, see # scalar.py self._l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(empty.sum(), 0) def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) self.assertEqual(empty.sum(), 0) def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("[1:8]", 0)]) l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("scalar", 0)]) self.assertTrue( numpy.array_equal( l.encode(3), numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal(l.encode(3.1), l.encode(3))) self.assertTrue( numpy.array_equal( l.encode(3.5), numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal(l.encode(3.6), l.encode(3.5))) self.assertTrue(numpy.array_equal(l.encode(3.7), l.encode(3.5))) self.assertTrue( numpy.array_equal( l.encode(4), numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(1), numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(1.5), numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(7), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(7.5), numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype))) self.assertEqual(l.resolution, 0.5) self.assertEqual(l.radius, 1.5) def testCreateResolution(self): """Test that we get the same encoder when we construct it using resolution instead of n """ l = self._l d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=0.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) def testDecodeAndResolution(self): """Test the input description generation, top-down compute, and bucket support on a periodic encoder """ l = self._l v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) self.assertEqual(len(fieldNames), 1) self.assertEqual(fieldNames, list(fieldsDict.keys())) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) # Test with a "hole" decoded = l.decode( numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 7.5])) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 8])) self.assertTrue(numpy.array_equal(ranges[1], [1, 1])) # Test with something wider than w, no hole decoded = l.decode( numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 2.5])) # Test with 2 ranges decoded = l.decode( numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode( numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertTrue(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) def testCloseness(self): """Test closenessScores for a periodic encoder""" encoder = ScalarEncoder(w=7, minval=0, maxval=7, radius=1, periodic=True, name="day of week", forced=True) scores = encoder.closenessScores((2, 4, 7), (4, 2, 1), fractional=False) for actual, score in zip((2, 2, 1), scores): self.assertEqual(actual, score) def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name="scalar", n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertTrue( numpy.array_equal( l.encode(1), numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(2), numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(10), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype))) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a # non-periodic encoder v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) decoded = l.decode( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) #Test min and max l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and #there is no value greater than max or min l = ScalarEncoder(name="scalar", n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i + 3): iterlist[j] = 1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertLessEqual(decoded.value, 141) self.assertGreaterEqual(decoded.value, 1) self.assertTrue(decoded.value < 141 or i == 137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small # number non-periodic encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic # encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 def testEncodeInvalidInputType(self): encoder = ScalarEncoder(name="enc", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) with self.assertRaises(TypeError): encoder.encode("String") def testGetBucketInfoIntResolution(self): """Ensures that passing resolution as an int doesn't truncate values.""" encoder = ScalarEncoder(w=3, resolution=1, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(4.5, encoder.topDownCompute(encoder.encode(4.5))[0].scalar) @unittest.skipUnless( capnp, "pycapnp is not installed, skipping serialization test.") def testReadWrite(self): """Test ScalarEncoder Cap'n Proto serialization implementation.""" originalValue = self._l.encode(1) proto1 = ScalarEncoderProto.new_message() self._l.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = ScalarEncoderProto.read(f) encoder = ScalarEncoder.read(proto2) self.assertIsInstance(encoder, ScalarEncoder) self.assertEqual(encoder.w, self._l.w) self.assertEqual(encoder.minval, self._l.minval) self.assertEqual(encoder.maxval, self._l.maxval) self.assertEqual(encoder.periodic, self._l.periodic) self.assertEqual(encoder.n, self._l.n) self.assertEqual(encoder.radius, self._l.radius) self.assertEqual(encoder.resolution, self._l.resolution) self.assertEqual(encoder.name, self._l.name) self.assertEqual(encoder.verbosity, self._l.verbosity) self.assertEqual(encoder.clipInput, self._l.clipInput) self.assertTrue(numpy.array_equal(encoder.encode(1), originalValue)) self.assertEqual(self._l.decode(encoder.encode(1)), encoder.decode(self._l.encode(1))) # Feed in a new value and ensure the encodings match result1 = self._l.encode(7) result2 = encoder.encode(7) self.assertTrue(numpy.array_equal(result1, result2)) def testSettingNWithMaxvalMinvalNone(self): """Setting n when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name="scalar", n=14, radius=0, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder) def testSettingScalarAndResolution(self): """Setting both scalar and resolution not allowed.""" with self.assertRaises(ValueError): ScalarEncoder(3, None, None, name="scalar", n=0, radius=None, resolution=0.5, forced=True) def testSettingRadiusWithMaxvalMinvalNone(self): """If radius when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name="scalar", n=0, radius=1.5, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder)
def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name="scalar", n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertTrue( numpy.array_equal( l.encode(1), numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(2), numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue( numpy.array_equal( l.encode(10), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype))) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a # non-periodic encoder v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) decoded = l.decode( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) #Test min and max l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and #there is no value greater than max or min l = ScalarEncoder(name="scalar", n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i + 3): iterlist[j] = 1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertLessEqual(decoded.value, 141) self.assertGreaterEqual(decoded.value, 1) self.assertTrue(decoded.value < 141 or i == 137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small # number non-periodic encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic # encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = list(fieldsDict.values())[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4
class FourSquareAnomalyDetector(): def __init__(self): self.lat = ScalarEncoder(name='latitude', w=3, n=100, minval=-90, maxval=90, periodic=False) self.long= ScalarEncoder(name='longitude', w=3, n=100, minval=-180, maxval=180, periodic=True) self.timeenc= DateEncoder(season=0, dayOfWeek=1, weekend=3, timeOfDay=5) self.likes = ScalarEncoder(name='likes', w=3, n=50, minval=0, maxval=100000, periodic=False) self.people = ScalarEncoder(name='numpeople', w=3, n=20, minval=0, maxval=100, periodic=False) self.categories = SDRCategoryEncoder(n=87, w=3, categoryList = None, name="cats", verbosity=0) self.run() def run(self): check1=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check2=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check3=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check4=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check5=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check6=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check7=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check8=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") list_of_unencoded_checkins=[check1,check2,check3,check4,check5,check6,check7,check8] list_of_encoded_checkins=[] for check in list_of_unencoded_checkins: print check list_of_encoded_checkins.append(self.encode(check)) print self.LastAnomalyScore(list_of_encoded_checkins) def createModel(self): return ModelFactory.create(model_params.MODEL_PARAMS) def encode(self, checkin): print checkin latenc=self.lat.encode(checkin.latitude) longenc=self.long.encode(checkin.longitude) timenc=self.timeenc.encode(checkin.time) likeenc=self.likes.encode(checkin.likes) peoplenc=self.people.encode(checkin.people) for cat in checkin.categories: try: catenc=numpy.logical_or(catenc,self.categories.encode(cat)) except: catenc=self.categories.encode(cat) checkinsdr=numpy.concatenate((latenc,longenc,timenc,likeenc,peoplenc,catenc)) print checkinsdr print type(checkinsdr) return checkinsdr def LastAnomalyScore(self, checkin_list): model = self.createModel() model.enableInference({'predictedField': 'checkin'}) last_anomaly = 0 for i, record in enumerate(checkin_list, start=1): modelInput = {"checkin": record} result = model.run(modelInput) anomalyScore = result.inferences['anomalyScore'] last_anomaly = anomalyScore return last_anomaly
class ScalarEncoderTest(unittest.TestCase): """Unit tests for ScalarEncoder class""" def setUp(self): # use of forced is not recommended, but used here for readability, see scalar.py self._l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) ############################################################################ def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0) # -------------------------------------------------------------------- def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0) # ------------------------------------------------------------------------ def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("[1:8]", 0)]) l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("scalar", 0)]) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) self.assertEqual(l.resolution, 0.5) self.assertEqual(l.radius, 1.5) # Test that we get the same encoder when we construct it using resolution # instead of n def testCreateResolution(self): """Test that we get the same encoder when we construct it using resolution instead of n""" l = self._l d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=0.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation, top-down compute, and bucket # support on a periodic encoder def testDecodeAndResolution(self): """Testing periodic encoder decoding, resolution of """ l = self._l print l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) print "\nTesting periodic encoder decoding, resolution of %f..." % \ l.resolution # Test with a "hole" decoded = l.decode(numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [7.5, 7.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [7.5, 8]) \ and numpy.array_equal(ranges[1], [1, 1])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, no hole decoded = l.decode(numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [1.5, 2.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges decoded = l.decode(numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode(numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # ============================================================================ def testCloseness(self): """Test closenessScores for a periodic encoder""" encoder = ScalarEncoder(w=7, minval=0, maxval=7, radius=1, periodic=True, name="day of week", forced=True) scores = encoder.closenessScores((2, 4, 7), (4, 2, 1), fractional=False) for actual, score in itertools.izip((2, 2, 1), scores): self.assertEqual(actual, score) # ============================================================================ def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name='scalar', n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) print "\nTesting non-periodic encoder encoding, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a non-periodic # encoder v = l.minval print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) #Test min and max l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and there is no value greater than max or min l = ScalarEncoder(name='scalar', n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertTrue(decoded.value <= 141) self.assertTrue(decoded.value >= 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small number # non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test setting fieldStats after initialization if False: #TODO: remove all this? (and fieldstats from ScalarEncoder (if applicable) )? # Modified on 11/20/12 12:53 PM - setFieldStats not applicable for ScalarEncoder l = ScalarEncoder(n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) l = ScalarEncoder(name='scalar', n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) l = ScalarEncoder(name='scalar', n=14, w=5, minval=100, maxval=1000, periodic=False, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":10}}) print "\nTesting non-periodic encoding using setFieldStats, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # ============================================================================ def testEncodeInvalidInputType(self): encoder = ScalarEncoder(name='enc', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) with self.assertRaises(TypeError): encoder.encode("String") # ============================================================================ def testGetBucketInfoIntResolution(self): """Ensures that passing resolution as an int doesn't truncate values.""" encoder = ScalarEncoder(w=3, resolution=1, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(4.5, encoder.topDownCompute(encoder.encode(4.5))[0].scalar) def testReadWrite(self): """Test ScalarEncoder Cap'n Proto serialization implementation.""" originalValue = self._l.encode(1) proto1 = ScalarEncoderProto.new_message() self._l.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = ScalarEncoderProto.read(f) encoder = ScalarEncoder.read(proto2) self.assertIsInstance(encoder, ScalarEncoder) self.assertEqual(encoder.w, self._l.w) self.assertEqual(encoder.minval, self._l.minval) self.assertEqual(encoder.maxval, self._l.maxval) self.assertEqual(encoder.periodic, self._l.periodic) self.assertEqual(encoder.n, self._l.n) self.assertEqual(encoder.radius, self._l.radius) self.assertEqual(encoder.resolution, self._l.resolution) self.assertEqual(encoder.name, self._l.name) self.assertEqual(encoder.verbosity, self._l.verbosity) self.assertEqual(encoder.clipInput, self._l.clipInput) self.assertTrue(numpy.array_equal(encoder.encode(1), originalValue)) self.assertEqual(self._l.decode(encoder.encode(1)), encoder.decode(self._l.encode(1))) # Feed in a new value and ensure the encodings match result1 = self._l.encode(7) result2 = encoder.encode(7) self.assertTrue(numpy.array_equal(result1, result2))
class FileProcesser(object): DATA_DIR = "data" ALPHA = "ABCDEF" CROP_FILE = 200 N_INPUTS = 36 CPR = [12**2] auto_predict = 16 def __init__(self, filename="simple_pattern2.txt", with_classifier=True, delay=50, animate=True): self.b = CHTMBrain(cells_per_region=self.CPR, min_overlap=1, r1_inputs=self.N_INPUTS) self.b.initialize() self.printer = CHTMPrinter(self.b) self.printer.setup() self.classifier = None self.animate = animate self.current_batch_target = 0 self.current_batch_counter = 0 self.delay = delay if with_classifier: self.classifier = CHTMClassifier(self.b, categories=self.ALPHA, region_index=len(self.CPR)-1, history_window=self.CROP_FILE/2) if True: self.encoder = SimpleFullWidthEncoder(n_inputs=self.N_INPUTS, n_cats=len(self.ALPHA)) else: self.encoder = ScalarEncoder(n=self.N_INPUTS, w=5, minval=1, maxval=self.N_INPUTS, periodic=False, forced=True) with open(self.DATA_DIR+"/"+filename, 'r') as myfile: self.data = myfile.read() self.cursor = 0 def encode_letter(self, c): i = ord(c) - 64 # A == 0 return self.encoder.encode(i) def run(self): self.printer.window.after(self.delay, self.process) self.printer.window.mainloop() def process(self): finished = self.cursor >= self.CROP_FILE if finished: self.do_prediction() else: in_batch = self.current_batch_counter < self.current_batch_target if in_batch: # Process one step self.cursor += 1 self.current_batch_counter += 1 char = self.data[self.cursor].upper() inputs = self.encode_letter(char) self.b.process(inputs, learning=True) if self.classifier: self.classifier.read(char) if self.animate: self.printer.render() else: # Get user input for next batch self.current_batch_counter = 0 n_steps = raw_input("Enter # of steps to run, or 0 to run to end, 'q' to quit...") digit = n_steps.isdigit() quit = n_steps.upper() == "Q" if quit: self.printer.window.destroy() return if not digit: n_steps = 1 else: n_steps = int(n_steps) if n_steps == 0: self.current_batch_target = self.CROP_FILE - self.cursor else: self.current_batch_target = n_steps self.printer.window.after(self.delay, self.process) def do_prediction(self): if self.classifier: if self.auto_predict: predicted_stream = "" for i in range(self.auto_predict): prediction = self.classifier.predict() predicted_stream += prediction inputs = self.encode_letter(prediction) self.b.process(inputs, learning=False) print "Predicted: %s" % predicted_stream done = False while not done: next = raw_input("Enter next letter (q to exit) >> ") if next: done = next.upper() == 'Q' if done: break inputs = self.encode_letter(next) self.b.process(inputs, learning=False) prediction = self.classifier.predict() print "Prediction: %s" % prediction if self.animate: self.printer.render() else: while True: user_char = raw_input("Enter a letter to see prediction at t+1... (! to exit) >> ") if user_char == "!": break else: inputs = self.encode_letter(user_char) self.b.process(inputs, learning=False) prediction = self.classifier.predict() print "Prediction: %s" % prediction self.printer.window.destroy()
bucketValues = encoderOutput.getBucketValues() if encoderOutput is not None: predictedDistribution = np.zeros((len(sequence), encoderOutput.n)) targetDistribution = np.zeros((len(sequence), encoderOutput.n)) for i in xrange(len(sequence)-predictionStep): sample = getSingleSample(i, sequence, useTimeOfDay, useDayOfWeek) netActivation = net.activate(sample) if encoderOutput is None: predictedInput[i] = netActivation else: predictedInput[i] = bucketValues[np.where(netActivation == max(netActivation))[0][0]] predictedDistribution[i, :] = netActivation/sum(netActivation) targetDistribution[i, :] = encoderOutput.encode(sequence['data'][i+predictionStep]) trueData[i] = sequence['data'][i] targetInput[i] = sequence['data'][i+predictionStep] # print " target input: ", targetDistribution[i], " predicted Input: ", predictedInput[i] if encoderOutput is None: predictedInput = (predictedInput * stdSeq) + meanSeq plt.close('all') plt.figure(1) plt.plot(targetInput[nTrain:], color='black') plt.plot(predictedInput[nTrain:], color='red') plt.title('LSTM, useTimeOfDay='+str(useTimeOfDay)+dataSet) plt.xlim([0, 500]) plt.xlabel('Time')
class SoundEncoder(Encoder): """ This is an implementation of a sound encoder. A sound wave is converted into the maximum frequency detected according to FFT, and this frequency is encoded into an SDR using a ScalarEncoder. """ def __init__(self, n, w, rate, chunk, minval=20, maxval=20000, name=None): """ @param n int the length of the encoded SDR @param w int the number of 1s in the encoded SDR @param rate int the number of sound samples per second @param chunk int the number of samples in an input @param minval float the lowest possible frequency detected @param maxval float the highest possible frequency detected @param name string the name of the encoder """ self.n = n self.w = w self.rate = rate self.chunk = chunk self.minval = minval self.maxval = maxval self.name = name self._scalarEncoder = ScalarEncoder(name="scalar_" + str(name), n=n, w=w, minval=minval, maxval=maxval) def _detectFrequency(self, inputArr): """Use FFT to find maximum frequency present in the input.""" fftData = abs(np.fft.rfft(inputArr))**2 maxFreqIdx = np.argmax(fftData) if maxFreqIdx < len(fftData) - 1: # Quadratic interpolation y0, y1, y2 = np.log(fftData[maxFreqIdx - 1:maxFreqIdx + 2:]) x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0) return (maxFreqIdx + x1) * (self.rate / self.chunk) # Maximum idx is last in list, so cannot do quadratic interpolation return (maxFreqIdx + x1) * (self.rate / self.chunk) def encodeIntoArray(self, inputArr, output): if not isinstance(inputArr, (list, np.ndarray)): raise TypeError( "Expected a list or numpy array but got input of type %s" % type(inputArr)) if inputArr == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:self.n] = 0 else: frequency = self._detectFrequency(inputArr) # Fail fast if frequency is outside allowed range. if (frequency < self.minval) or (frequency > self.maxval): raise ValueError( "Frequency value %f is outside allowed range (%f, %f)" % (frequency, self.minval, self.maxval)) output[0:self.n] = self._scalarEncoder.encode(frequency) def getWidth(self): return self.n
class SoundEncoder(Encoder): """ This is an implementation of a sound encoder. A sound wave is converted into the maximum frequency detected according to FFT, and this frequency is encoded into an SDR using a ScalarEncoder. """ def __init__(self, n, w, rate, chunk, minval=20, maxval=20000, name=None): """ @param n int the length of the encoded SDR @param w int the number of 1s in the encoded SDR @param rate int the number of sound samples per second @param chunk int the number of samples in an input @param minval float the lowest possible frequency detected @param maxval float the highest possible frequency detected @param name string the name of the encoder """ self.n = n self.w = w self.rate = rate self.chunk = chunk self.minval = minval self.maxval = maxval self.name = name self._scalarEncoder = ScalarEncoder(name="scalar_"+str(name), n=n, w=w, minval=minval, maxval=maxval) def _detectFrequency(self, inputArr): """Use FFT to find maximum frequency present in the input.""" fftData=abs(np.fft.rfft(inputArr))**2 maxFreqIdx = np.argmax(fftData) if maxFreqIdx < len(fftData)-1: # Quadratic interpolation y0, y1, y2 = np.log(fftData[maxFreqIdx-1:maxFreqIdx+2:]) x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0) return (maxFreqIdx+x1)*(self.rate/self.chunk) # Maximum idx is last in list, so cannot do quadratic interpolation return (maxFreqIdx+x1)*(self.rate/self.chunk) def encodeIntoArray(self, inputArr, output): if not isinstance(inputArr, (list, np.ndarray)): raise TypeError( "Expected a list or numpy array but got input of type %s" % type(inputArr)) if inputArr == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:self.n] = 0 else: frequency = self._detectFrequency(inputArr) # Fail fast if frequency is outside allowed range. if (frequency < self.minval) or (frequency > self.maxval): raise ValueError( "Frequency value %f is outside allowed range (%f, %f)" % ( frequency, self.minval, self.maxval)) output[0:self.n] = self._scalarEncoder.encode(frequency) def getWidth(self): return self.n
class FrequencyEncoder(Encoder): def __init__(self, numFrequencyBins, freqBinN, freqBinW, minval=0, maxval=14.0, log=True): """ The `FrequencyEncoder` encodes a time series chunk (or any 1D array of numeric values) by taking the power spectrum of the signal and discretizing it. The discretization is done by slicing the frequency axis of the power spectrum in frequency bins. The parameter controlling the number of frequency bins is `numFrequencyBins`. The maximum amplitude of the power spectrum in this `frequencyBin` is encoded by a `ScalarEncoder`. The parameter in `FrequencyEncoder` controlling the frequency bin size is `freqBinN`, which corresponds to the parameter `n` of `ScalarEncoder`. The parameter in `FrequencyEncoder` controlling the resolution (width) of a bin size is `freqBinW`, which corresponds to the parameter `w` of `ScalarEncoder`. :param numFrequencyBins: (int) The number of each frequency bin used to discretize the power spectrum. :param freqBinN: (int) The size of each frequency bin in the power spectrum. This determines the 'n' parameter of the ScalarEncoder used to encode each frequency bin. :param freqBinW: (int) The resolution of each frequency bin in the power spectrum. This determines the 'w' parameter of the ScalarEncoder used to encode each frequency bin. :param minval: (float) optional. The minimum value of the power spectrum. This determines the 'minval' parameter of the ScalarEncoder used to encode each frequency bin. In practice, the power spectrum is always positive, so minval=0 works well. :param maxval: (float) optional. The maximum value of the power spectrum. This determines the maxval parameter of the ScalarEncoder used to encode each frequency bin. After analysis, we found that by taking the log of the power spectrum allows us to use a default value of maval=14.0. :param log: (bool) whether to take the log of the power spectrum. Note: It is not recommended to set this to False. Taking the log dampens the amplitude variations of the power spectrum and allows us (after analysis) to set maxval to the default value of 14.0. If you use log=False, you will have to tune the maxval value. """ self.numFrequencyBins = numFrequencyBins self.freqBinN = freqBinN self.freqBinW = freqBinW self.minval = minval self.maxval = maxval self.log = log self.outputWidth = numFrequencyBins * freqBinN self.scalarEncoder = ScalarEncoder(n=freqBinN, w=freqBinW, minval=minval, maxval=maxval, forced=True) def getWidth(self): """ Return the output width, in bits. :return outputWidth: (int) output width """ return self.outputWidth def encodeIntoArray(self, inputData, output): """ Encodes inputData and puts the encoded value into the numpy output array, which is a 1D array of length returned by getWidth(). :param inputData: (np.array) Data to encode. :param output: (np.array) 1D array. Encoder output. """ if type(inputData) != np.ndarray: raise TypeError( 'Expected inputData to be a numpy array but the input ' 'type is %s' % type(inputData)) if inputData is SENTINEL_VALUE_FOR_MISSING_DATA: output[0:self.outputWidth] = 0 else: freqs = getFreqs(inputData, self.log) freqBinSize = len(freqs) / self.numFrequencyBins binEncodings = [] for i in range(self.numFrequencyBins): freqBin = freqs[i * freqBinSize:(i + 1) * freqBinSize] binVal = np.max(freqBin) binEncoding = self.scalarEncoder.encode(binVal) binEncodings.append(binEncoding.tolist()) output[0:self.outputWidth] = np.array(binEncodings).flatten()
inputDimensions = (256, ) columnDimensions = (512, ) encoder = ScalarEncoder(21, -1.0, 1.0, n=inputDimensions[0]) sp = SpatialPooler(inputDimensions=inputDimensions, columnDimensions=columnDimensions, globalInhibition=True, numActiveColumnsPerInhArea=21) tm = TemporalMemory(columnDimensions=columnDimensions) c = SDRClassifier(steps=[1], alpha=0.1, actValueAlpha=0.1, verbosity=0) x_true = x[1:] x_predict = np.zeros(len(x) - 1) for i, xi in tqdm(enumerate(x[:-1])): encoded = encoder.encode(xi) bucketIdx = np.where(encoded > 0)[0][0] spd = np.zeros(columnDimensions[0]) sp.compute(encoded, True, spd) active_indices = np.where(spd > 0)[0] tm.compute(active_indices) active_cell_indices = tm.getActiveCells() predictive_cell_indices = tm.getPredictiveCells() patternNZ = np.asarray(active_cell_indices) patternNZ = np.append(patternNZ, predictive_cell_indices) patternNZ = patternNZ.astype(np.int) patternNZ = list(set(patternNZ)) result = c.compute(recordNum=i, patternNZ=patternNZ,
class LVF(object): """Class implementing Localization with Vision Features""" def __init__( self, minX, maxX, minY, maxY, bottomUpInputSize, bottomUpOnBits, ): self.xEncoder = ScalarEncoder(5, minX, 10 * maxX, n=75, forced=True) self.yEncoder = ScalarEncoder(5, minY, 10 * maxY, n=75, forced=True) self.externalSize = self.xEncoder.getWidth()**2 self.externalOnBits = self.xEncoder.w**2 self.bottomUpInputSize = bottomUpInputSize self.bottomUpOnBits = bottomUpOnBits self.trainingIterations = 0 self.testIterations = 0 self.maxPredictionError = 0 self.totalPredictionError = 0 self.numMissedPredictions = 0 self.tm = TM(columnCount=self.bottomUpInputSize, basalInputSize=self.externalSize, cellsPerColumn=4, initialPermanence=0.4, connectedPermanence=0.5, minThreshold=self.externalOnBits, sampleSize=40, permanenceIncrement=0.1, permanenceDecrement=0.00, activationThreshold=int( 0.75 * (self.externalOnBits + self.bottomUpOnBits)), basalPredictedSegmentDecrement=0.00, seed=42) def compute(self, x, y, bottomUpSDR, learn): # Encode the inputs appropriately and train the HTM externalSDR = self.encodePosition(x, y) if learn: # During learning we provide the current pose angle as bottom up input self.trainTM(bottomUpSDR, externalSDR) self.trainingIterations += 1 else: print >> sys.stderr, "Learn: ", learn def encodePosition(self, x, y): """Return the SDR for x,y""" xe = self.xEncoder.encode(x) ye = self.yEncoder.encode(y) ex = np.outer(xe, ye) return ex.flatten().nonzero()[0] def trainTM(self, bottomUp, externalInput): #print >> sys.stderr, "Bottom up: ", bottomUp #print >> sys.stderr, "ExternalInput: ",externalInput self.tm.compute(bottomUp, basalInput=externalInput, learn=True)
def generateInputVectors(self, params): if params['dataType'] == 'randomSDR': self._inputVectors = generateRandomSDR( params['numInputVectors'], params['inputSize'], params['numActiveInputBits'], params['seed']) elif params['dataType'] == 'randomSDRVaryingSparsity': self._inputVectors = generateRandomSDRVaryingSparsity( params['numInputVectors'], params['inputSize'], params['minSparsity'], params['maxSparsity'], params['seed']) elif params['dataType'] == 'denseVectors': self._inputVectors = generateDenseVectors( params['numInputVectors'], params['inputSize'], params['seed']) elif params['dataType'] == 'randomBarPairs': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): seed = (params['seed'] * numInputVectors + i) * 2 bar1 = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], seed, False, 'horizontal') bar2 = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], seed + 1, False, 'vertical') data = bar1 + bar2 data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'randomBarSets': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): data = 0 seed = (params['seed'] * numInputVectors + i) * params['numBarsPerInput'] for barI in range(params['numBarsPerInput']): bar = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], seed + barI, True) data += bar data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'randomCross': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): seed = (params['seed'] * numInputVectors + i) * params['numCrossPerInput'] data = 0 for j in range(params['numCrossPerInput']): data += getCross(params['nX'], params['nY'], params['barHalfLength'], seed + j) data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'correlatedSDRPairs': (inputVectors, inputVectors1, inputVectors2, corrPairs) = \ generateCorrelatedSDRPairs( params['numInputVectors'], params['inputSize'], params['numInputVectorPerSensor'], params['numActiveInputBits'], params['corrStrength'], params['seed']) self._inputVectors = inputVectors self._additionalInfo = { "inputVectors1": inputVectors1, "inputVectors2": inputVectors2, "corrPairs": corrPairs } elif params['dataType'] == 'nyc_taxi': from nupic.encoders.scalar import ScalarEncoder df = pd.read_csv('./data/nyc_taxi.csv', header=0, skiprows=[1, 2]) inputVectors = np.zeros((5000, params['n'])) for i in range(5000): inputRecord = { "passenger_count": float(df["passenger_count"][i]), "timeofday": float(df["timeofday"][i]), "dayofweek": float(df["dayofweek"][i]), } enc = ScalarEncoder(w=params['w'], minval=params['minval'], maxval=params['maxval'], n=params['n']) inputSDR = enc.encode(inputRecord["passenger_count"]) inputVectors[i, :] = inputSDR self._inputVectors = inputVectors elif params['dataType'] == 'mnist': imagePath = 'data/mnist/training/' imagePath = os.path.abspath(imagePath) categoryList = [ c for c in sorted(os.listdir(imagePath)) if c[0] != "." and os.path.isdir(os.path.join(imagePath, c)) ] fileList = {} numImages = 0 for category in categoryList: categoryFilenames = [] walkPath = os.path.join(imagePath, category) w = os.walk(walkPath) while True: try: dirpath, dirnames, filenames = w.next() except StopIteration: break # Don't enter directories that begin with '.' for d in dirnames[:]: if d.startswith("."): dirnames.remove(d) dirnames.sort() # Ignore files that begin with "." filenames = [f for f in filenames if not f.startswith(".")] filenames.sort() imageFilenames = [ os.path.join(dirpath, f) for f in filenames ] # Add our new images and masks to the list for this category categoryFilenames.extend(imageFilenames) numImages += len(categoryFilenames) fileList[category] = categoryFilenames inputVectors = np.zeros((params['numInputVectors'], 1024)) numImagePerCategory = int(params['numInputVectors'] / len(categoryList)) counter = 0 for category in categoryList: categoryFilenames = fileList[category][:numImagePerCategory] for filename in categoryFilenames: image = misc.imread(filename).astype('float32') image /= 255 image = image.round() paddedImage = np.zeros((32, 32)) paddedImage[2:30, 2:30] = image inputVectors[counter, :] = np.reshape(paddedImage, newshape=(1, 1024)) counter += 1 self._inputVectors = inputVectors
NRMSE_TM = NRMSE(actual_data[nTrain:nTrain+nTest], predData_TM_n_step[nTrain:nTrain+nTest]) print "NRMSE on test data: ", NRMSE_TM # calculate neg-likelihood predictions = np.transpose(likelihoodsVecAll) truth = np.roll(actual_data, -5) from nupic.encoders.scalar import ScalarEncoder as NupicScalarEncoder encoder = NupicScalarEncoder(w=1, minval=0, maxval=40000, n=22, forced=True) bucketIndex2 = [] negLL = [] minProb = 0.0001 for i in xrange(len(truth)): bucketIndex2.append(np.where(encoder.encode(truth[i]))[0]) outOfBucketProb = 1 - sum(predictions[i,:]) prob = predictions[i, bucketIndex2[i]] if prob == 0: prob = outOfBucketProb if prob < minProb: prob = minProb negLL.append( -np.log(prob)) negLL = computeLikelihood(predictions, truth, encoder) negLL[:5000] = np.nan x = range(len(negLL)) if not os.path.exists("./results/nyc_taxi/"): os.makedirs("./results/nyc_taxi/") np.savez('./results/nyc_taxi/{}{}TMprediction_SPLearning_{}_boost_{}'.format(
class Agent(object): def __init__(self): self.encoder = CoordinateEncoder(n=1024, w=21) self.motorEncoder = ScalarEncoder(21, -1, 1, n=1024) self.tm = MonitoredExtendedTemporalMemory( columnDimensions=[2048], cellsPerColumn=1, initialPermanence=0.5, connectedPermanence=0.6, permanenceIncrement=0.1, permanenceDecrement=0.02, minThreshold=35, activationThreshold=35, maxNewSynapseCount=40) self.plotter = Plotter(self.tm, showOverlaps=False, showOverlapsValues=False) self.lastState = None self.lastAction = None def sync(self, outputData): if not ("location" in outputData and "steer" in outputData): print "Warning: Missing data:", outputData return reset = outputData.get("reset") or False if reset: print "Reset." self.tm.reset() location = outputData["location"] steer = outputData["steer"] x = int(location["x"] * SCALE) z = int(location["z"] * SCALE) coordinate = numpy.array([x, z]) encoding = self.encoder.encode((coordinate, RADIUS)) motorEncoding = self.motorEncoder.encode(steer) sensorPattern = set(encoding.nonzero()[0]) motorPattern = set(motorEncoding.nonzero()[0]) self.tm.compute(sensorPattern, activeExternalCells=motorPattern, formInternalConnections=True) print self.tm.mmPrettyPrintMetrics(self.tm.mmGetDefaultMetrics()) self.plotter.update(encoding, reset) if reset: self.plotter.render() self.lastState = encoding self.lastAction = steer
class ScalarEncoderTest(unittest.TestCase): """Unit tests for ScalarEncoder class""" def setUp(self): # use of forced is not recommended, but used here for readability, see # scalar.py self._l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(empty.sum(), 0) def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name="mv", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) self.assertEqual(empty.sum(), 0) def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("[1:8]", 0)]) l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("scalar", 0)]) self.assertTrue(numpy.array_equal( l.encode(3), numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal(l.encode(3.1), l.encode(3))) self.assertTrue(numpy.array_equal( l.encode(3.5), numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal(l.encode(3.6), l.encode(3.5))) self.assertTrue(numpy.array_equal(l.encode(3.7), l.encode(3.5))) self.assertTrue(numpy.array_equal( l.encode(4), numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(1), numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(1.5), numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(7), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(7.5), numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype))) self.assertEqual(l.resolution, 0.5) self.assertEqual(l.radius, 1.5) def testCreateResolution(self): """Test that we get the same encoder when we construct it using resolution instead of n """ l = self._l d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=0.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) def testDecodeAndResolution(self): """Test the input description generation, top-down compute, and bucket support on a periodic encoder """ l = self._l v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) self.assertEqual(len(fieldNames), 1) self.assertEqual(fieldNames, fieldsDict.keys()) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) # Test with a "hole" decoded = l.decode(numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 7.5])) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [7.5, 8])) self.assertTrue(numpy.array_equal(ranges[1], [1, 1])) # Test with something wider than w, no hole decoded = l.decode(numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 2.5])) # Test with 2 ranges decoded = l.decode(numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode(numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertTrue(len(ranges), 2) self.assertTrue(numpy.array_equal(ranges[0], [1.5, 1.5])) self.assertTrue(numpy.array_equal(ranges[1], [5.5, 6.0])) def testCloseness(self): """Test closenessScores for a periodic encoder""" encoder = ScalarEncoder(w=7, minval=0, maxval=7, radius=1, periodic=True, name="day of week", forced=True) scores = encoder.closenessScores((2, 4, 7), (4, 2, 1), fractional=False) for actual, score in itertools.izip((2, 2, 1), scores): self.assertEqual(actual, score) def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name="scalar", n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertTrue(numpy.array_equal( l.encode(1), numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(2), numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype))) self.assertTrue(numpy.array_equal( l.encode(10), numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype))) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name="scalar", resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name="scalar", radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a # non-periodic encoder v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0] self.assertTrue(numpy.array_equal(topDown.encoding, output)) self.assertLessEqual(abs(topDown.value - v), l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) topDown = l.getBucketInfo(bucketIndices)[0] self.assertLessEqual(abs(topDown.value - v), l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue(numpy.array_equal(topDown.encoding, output)) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) self.assertTrue(numpy.array_equal(ranges[0], [10, 10])) #Test min and max l = ScalarEncoder(name="scalar", n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute( numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute( numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and #there is no value greater than max or min l = ScalarEncoder(name="scalar", n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertLessEqual(decoded.value, 141) self.assertGreaterEqual(decoded.value, 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small # number non-periodic encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic # encoder l = ScalarEncoder(name="scalar", n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, _) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertLess(abs(rangeMin - v), l.resolution) topDown = l.topDownCompute(output)[0].value self.assertLessEqual(abs(topDown - v), l.resolution / 2) v += l.resolution / 4 def testEncodeInvalidInputType(self): encoder = ScalarEncoder(name="enc", n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) with self.assertRaises(TypeError): encoder.encode("String") def testGetBucketInfoIntResolution(self): """Ensures that passing resolution as an int doesn't truncate values.""" encoder = ScalarEncoder(w=3, resolution=1, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(4.5, encoder.topDownCompute(encoder.encode(4.5))[0].scalar) @unittest.skipUnless( capnp, "pycapnp is not installed, skipping serialization test.") def testReadWrite(self): """Test ScalarEncoder Cap'n Proto serialization implementation.""" originalValue = self._l.encode(1) proto1 = ScalarEncoderProto.new_message() self._l.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = ScalarEncoderProto.read(f) encoder = ScalarEncoder.read(proto2) self.assertIsInstance(encoder, ScalarEncoder) self.assertEqual(encoder.w, self._l.w) self.assertEqual(encoder.minval, self._l.minval) self.assertEqual(encoder.maxval, self._l.maxval) self.assertEqual(encoder.periodic, self._l.periodic) self.assertEqual(encoder.n, self._l.n) self.assertEqual(encoder.radius, self._l.radius) self.assertEqual(encoder.resolution, self._l.resolution) self.assertEqual(encoder.name, self._l.name) self.assertEqual(encoder.verbosity, self._l.verbosity) self.assertEqual(encoder.clipInput, self._l.clipInput) self.assertTrue(numpy.array_equal(encoder.encode(1), originalValue)) self.assertEqual(self._l.decode(encoder.encode(1)), encoder.decode(self._l.encode(1))) # Feed in a new value and ensure the encodings match result1 = self._l.encode(7) result2 = encoder.encode(7) self.assertTrue(numpy.array_equal(result1, result2)) def testSettingNWithMaxvalMinvalNone(self): """Setting n when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name="scalar", n=14, radius=0, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder) def testSettingScalarAndResolution(self): """Setting both scalar and resolution not allowed.""" with self.assertRaises(ValueError): ScalarEncoder(3, None, None, name="scalar", n=0, radius=None, resolution=0.5, forced=True) def testSettingRadiusWithMaxvalMinvalNone(self): """If radius when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name="scalar", n=0, radius=1.5, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder)
class ScalarEncoderTest(unittest.TestCase): """Unit tests for ScalarEncoder class""" def setUp(self): # use of forced is not recommended, but used here for readability, see scalar.py self._l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) ############################################################################ def testScalarEncoder(self): """Testing ScalarEncoder...""" # ------------------------------------------------------------------------- # test missing values mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(SENTINEL_VALUE_FOR_MISSING_DATA) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0) # -------------------------------------------------------------------- def testNaNs(self): """test NaNs""" mv = ScalarEncoder(name='mv', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) empty = mv.encode(float("nan")) print "\nEncoded missing data \'None\' as %s" % empty self.assertEqual(empty.sum(), 0) # ------------------------------------------------------------------------ def testBottomUpEncodingPeriodicEncoder(self): """Test bottom-up encoding for a Periodic encoder""" l = ScalarEncoder(n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("[1:8]", 0)]) l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.getDescription(), [("scalar", 0)]) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) self.assertEqual(l.resolution, 0.5) self.assertEqual(l.radius, 1.5) # Test that we get the same encoder when we construct it using resolution # instead of n def testCreateResolution(self): """Test that we get the same encoder when we construct it using resolution instead of n""" l = self._l d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=0.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation, top-down compute, and bucket # support on a periodic encoder def testDecodeAndResolution(self): """Testing periodic encoder decoding, resolution of """ l = self._l print l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.value, l.getBucketValues()[bucketIndices[0]]) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # ----------------------------------------------------------------------- # Test the input description generation on a large number, periodic encoder l = ScalarEncoder(name='scalar', radius=1.5, w=3, minval=1, maxval=8, periodic=True, forced=True) print "\nTesting periodic encoder decoding, resolution of %f..." % \ l.resolution # Test with a "hole" decoded = l.decode(numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [7.5, 7.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, and with a hole, and wrapped decoded = l.decode(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [7.5, 8]) \ and numpy.array_equal(ranges[1], [1, 1])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with something wider than w, no hole decoded = l.decode(numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [1.5, 2.5])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges decoded = l.decode(numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # Test with 2 ranges, 1 of which is narrower than w decoded = l.decode(numpy.array([0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 2 and numpy.array_equal(ranges[0], [1.5, 1.5]) \ and numpy.array_equal(ranges[1], [5.5, 6.0])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) # ============================================================================ def testCloseness(self): """Test closenessScores for a periodic encoder""" encoder = ScalarEncoder(w=7, minval=0, maxval=7, radius=1, periodic=True, name="day of week", forced=True) scores = encoder.closenessScores((2, 4, 7), (4, 2, 1), fractional=False) for actual, score in itertools.izip((2, 2, 1), scores): self.assertEqual(actual, score) # ============================================================================ def testNonPeriodicBottomUp(self): """Test Non-periodic encoder bottom-up""" l = ScalarEncoder(name='scalar', n=14, w=5, minval=1, maxval=10, periodic=False, forced=True) print "\nTesting non-periodic encoder encoding, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # Test that we get the same encoder when we construct it using resolution # instead of n d = l.__dict__ l = ScalarEncoder(name='scalar', resolution=1, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # Test that we get the same encoder when we construct it using radius # instead of n l = ScalarEncoder(name='scalar', radius=5, w=5, minval=1, maxval=10, periodic=False, forced=True) self.assertEqual(l.__dict__, d) # ------------------------------------------------------------------------- # Test the input description generation and topDown decoding of a non-periodic # encoder v = l.minval print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0] print "topdown =>", topDown self.assertTrue((topDown.encoding == output).all()) self.assertTrue(abs(topDown.value - v) <= l.resolution) # Test bucket support bucketIndices = l.getBucketIndices(v) print "bucket index =>", bucketIndices[0] topDown = l.getBucketInfo(bucketIndices)[0] self.assertTrue(abs(topDown.value - v) <= l.resolution / 2) self.assertEqual(topDown.scalar, topDown.value) self.assertTrue((topDown.encoding == output).all()) # Next value v += l.resolution / 4 # Make sure we can fill in holes decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) decoded = l.decode(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertTrue(len(ranges) == 1 and numpy.array_equal(ranges[0], [10, 10])) print "decodedToStr of", ranges, "=>", l.decodedToStr(decoded) #Test min and max l = ScalarEncoder(name='scalar', n=14, w=3, minval=1, maxval=10, periodic=False, forced=True) decoded = l.topDownCompute(numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]))[0] self.assertEqual(decoded.value, 10) decoded = l.topDownCompute(numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))[0] self.assertEqual(decoded.value, 1) #Make sure only the last and first encoding encodes to max and min, and there is no value greater than max or min l = ScalarEncoder(name='scalar', n=140, w=3, minval=1, maxval=141, periodic=False, forced=True) for i in range(137): iterlist = [0 for _ in range(140)] for j in range(i, i+3): iterlist[j] =1 npar = numpy.array(iterlist) decoded = l.topDownCompute(npar)[0] self.assertTrue(decoded.value <= 141) self.assertTrue(decoded.value >= 1) self.assertTrue(decoded.value < 141 or i==137) self.assertTrue(decoded.value > 1 or i == 0) # ------------------------------------------------------------------------- # Test the input description generation and top-down compute on a small number # non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=.001, maxval=.002, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test the input description generation on a large number, non-periodic encoder l = ScalarEncoder(name='scalar', n=15, w=3, minval=1, maxval=1000000000, periodic=False, forced=True) print "\nTesting non-periodic encoder decoding, resolution of %f..." % \ l.resolution v = l.minval while v < l.maxval: output = l.encode(v) decoded = l.decode(output) print "decoding", output, "(%f)=>" % v, l.decodedToStr(decoded) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 1) (ranges, desc) = fieldsDict.values()[0] self.assertEqual(len(ranges), 1) (rangeMin, rangeMax) = ranges[0] self.assertEqual(rangeMin, rangeMax) self.assertTrue(abs(rangeMin - v) < l.resolution) topDown = l.topDownCompute(output)[0].value print "topdown =>", topDown self.assertTrue(abs(topDown - v) <= l.resolution / 2) v += l.resolution / 4 # ------------------------------------------------------------------------- # Test setting fieldStats after initialization if False: #TODO: remove all this? (and fieldstats from ScalarEncoder (if applicable) )? # Modified on 11/20/12 12:53 PM - setFieldStats not applicable for ScalarEncoder l = ScalarEncoder(n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) l = ScalarEncoder(name='scalar', n=14, w=3, minval=100, maxval=800, periodic=True, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":8}}) self.assertTrue((l.encode(3) == numpy.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.1) == l.encode(3)).all()) self.assertTrue((l.encode(3.5) == numpy.array([0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(3.6) == l.encode(3.5)).all()) self.assertTrue((l.encode(3.7) == l.encode(3.5)).all()) self.assertTrue((l.encode(4) == numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1) == numpy.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(1.5) == numpy.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1], dtype=defaultDtype)).all()) self.assertTrue((l.encode(7.5) == numpy.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=defaultDtype)).all()) l = ScalarEncoder(name='scalar', n=14, w=5, minval=100, maxval=1000, periodic=False, forced=True) l.setFieldStats("this", {"this":{"min":1, "max":10}}) print "\nTesting non-periodic encoding using setFieldStats, resolution of %f..." % \ l.resolution self.assertTrue((l.encode(1) == numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(2) == numpy.array([0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=defaultDtype)).all()) self.assertTrue((l.encode(10) == numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=defaultDtype)).all()) # ============================================================================ def testEncodeInvalidInputType(self): encoder = ScalarEncoder(name='enc', n=14, w=3, minval=1, maxval=8, periodic=False, forced=True) with self.assertRaises(TypeError): encoder.encode("String") # ============================================================================ def testGetBucketInfoIntResolution(self): """Ensures that passing resolution as an int doesn't truncate values.""" encoder = ScalarEncoder(w=3, resolution=1, minval=1, maxval=8, periodic=True, forced=True) self.assertEqual(4.5, encoder.topDownCompute(encoder.encode(4.5))[0].scalar) def testReadWrite(self): """Test ScalarEncoder Cap'n Proto serialization implementation.""" originalValue = self._l.encode(1) proto1 = ScalarEncoderProto.new_message() self._l.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = ScalarEncoderProto.read(f) encoder = ScalarEncoder.read(proto2) self.assertIsInstance(encoder, ScalarEncoder) self.assertEqual(encoder.w, self._l.w) self.assertEqual(encoder.minval, self._l.minval) self.assertEqual(encoder.maxval, self._l.maxval) self.assertEqual(encoder.periodic, self._l.periodic) self.assertEqual(encoder.n, self._l.n) self.assertEqual(encoder.radius, self._l.radius) self.assertEqual(encoder.resolution, self._l.resolution) self.assertEqual(encoder.name, self._l.name) self.assertEqual(encoder.verbosity, self._l.verbosity) self.assertEqual(encoder.clipInput, self._l.clipInput) self.assertTrue(numpy.array_equal(encoder.encode(1), originalValue)) self.assertEqual(self._l.decode(encoder.encode(1)), encoder.decode(self._l.encode(1))) # Feed in a new value and ensure the encodings match result1 = self._l.encode(7) result2 = encoder.encode(7) self.assertTrue(numpy.array_equal(result1, result2)) # ============================================================================ # Tests for #1966 def testSettingNWithMaxvalMinvalNone(self): """Setting n when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name='scalar', n=14, radius=0, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder) def testSettingScalarAndResolution(self): """Setting both scalar and resolution not allowed.""" with self.assertRaises(ValueError): encoder = ScalarEncoder(3, None, None, name='scalar', n=0, radius=None, resolution=0.5, forced=True) def testSettingRadiusWithMaxvalMinvalNone(self): """If radius when maxval/minval = None creates instance.""" encoder = ScalarEncoder(3, None, None, name='scalar', n=0, radius=1.5, resolution=0, forced=True) self.assertIsInstance(encoder, ScalarEncoder)
def generateInputVectors(self, params): if params['dataType'] == 'randomSDR': self._inputVectors = generateRandomSDR( params['numInputVectors'], params['inputSize'], params['numActiveInputBits'], params['seed']) elif params['dataType'] == 'denseVectors': self._inputVectors = generateDenseVectors( params['numInputVectors'], params['inputSize'], params['seed']) elif params['dataType'] == 'randomBarPairs': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): bar1 = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], 'horizontal') bar2 = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], 'vertical') data = bar1 + bar2 data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'randomBarSets': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): data = 0 for barI in range(params['numBarsPerInput']): orientation = np.random.choice(['horizontal', 'vertical']) bar = getRandomBar((params['nX'], params['nY']), params['barHalfLength'], orientation) data += bar data[data > 0] = 1 self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'randomCross': inputSize = params['nX'] * params['nY'] numInputVectors = params['numInputVectors'] self._inputVectors = np.zeros((numInputVectors, inputSize), dtype=uintType) for i in range(numInputVectors): data = getCross(params['nX'], params['nY'], params['barHalfLength']) self._inputVectors[i, :] = np.reshape(data, newshape=(1, inputSize)) elif params['dataType'] == 'correlatedSDRPairs': (inputVectors, inputVectors1, inputVectors2, corrPairs) = \ generateCorrelatedSDRPairs( params['numInputVectors'], params['inputSize'], params['numInputVectorPerSensor'], params['numActiveInputBits'], params['corrStrength'], params['seed']) self._inputVectors = inputVectors self._additionalInfo = {"inputVectors1": inputVectors1, "inputVectors2": inputVectors2, "corrPairs": corrPairs} elif params['dataType'] == 'nyc_taxi': from nupic.encoders.scalar import ScalarEncoder df = pd.read_csv('./data/nyc_taxi.csv', header=0, skiprows=[1, 2]) inputVectors = np.zeros((5000, params['n'])) for i in range(5000): inputRecord = { "passenger_count": float(df["passenger_count"][i]), "timeofday": float(df["timeofday"][i]), "dayofweek": float(df["dayofweek"][i]), } enc = ScalarEncoder(w=params['w'], minval=params['minval'], maxval=params['maxval'], n=params['n']) inputSDR = enc.encode(inputRecord["passenger_count"]) inputVectors[i, :] = inputSDR self._inputVectors = inputVectors
class Agent(object): def __init__(self): self.encoder = CoordinateEncoder(n=1024, w=21) self.motorEncoder = ScalarEncoder(21, -1, 1, n=1024) self.tm = MonitoredExtendedTemporalMemory( columnDimensions=[2048], basalInputDimensions: (999999,) # Dodge input checking. cellsPerColumn=1, initialPermanence=0.5, connectedPermanence=0.6, permanenceIncrement=0.1, permanenceDecrement=0.02, minThreshold=35, activationThreshold=35, maxNewSynapseCount=40) self.plotter = Plotter(self.tm, showOverlaps=False, showOverlapsValues=False) self.lastState = None self.lastAction = None self.prevMotorPattern = () def sync(self, outputData): if not ("location" in outputData and "steer" in outputData): print "Warning: Missing data:", outputData return reset = outputData.get("reset") or False if reset: print "Reset." self.tm.reset() location = outputData["location"] steer = outputData["steer"] x = int(location["x"] * SCALE) z = int(location["z"] * SCALE) coordinate = numpy.array([x, z]) encoding = self.encoder.encode((coordinate, RADIUS)) motorEncoding = self.motorEncoder.encode(steer) sensorPattern = set(encoding.nonzero()[0]) motorPattern = set(motorEncoding.nonzero()[0]) self.tm.compute(sensorPattern, activeCellsExternalBasal=motorPattern, reinforceCandidatesExternalBasal=self.prevMotorPattern, growthCandidatesExternalBasal=self.prevMotorPattern) print self.tm.mmPrettyPrintMetrics(self.tm.mmGetDefaultMetrics()) self.plotter.update(encoding, reset) if reset: self.plotter.render() self.lastState = encoding self.lastAction = steer self.prevMotorPattern = motorPattern