def testResolution(self): """ Test that numbers within the same resolution return the same encoding. Numbers outside the resolution should return different encodings. """ encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0) # Since 23.0 is the first encoded number, it will be the offset. # Since resolution is 1, 22.9 and 23.4 should have the same bucket index and # encoding. e23 = encoder.encode(23.0) e23p1 = encoder.encode(23.1) e22p9 = encoder.encode(22.9) e24 = encoder.encode(24.0) self.assertEqual(e23.sum(), encoder.w) self.assertEqual((e23 == e23p1).sum(), encoder.getWidth(), "Numbers within resolution don't have the same encoding") self.assertEqual((e23 == e22p9).sum(), encoder.getWidth(), "Numbers within resolution don't have the same encoding") self.assertNotEqual((e23 == e24).sum(), encoder.getWidth(), "Numbers outside resolution have the same encoding") e22p9 = encoder.encode(22.5) self.assertNotEqual((e23 == e22p9).sum(), encoder.getWidth(), "Numbers outside resolution have the same encoding")
def testSeed(self): """ Test that initializing twice with the same seed returns identical encodings and different when not specified """ encoder1 = RandomDistributedScalarEncoder(name="encoder1", resolution=1.0, seed=42) encoder2 = RandomDistributedScalarEncoder(name="encoder2", resolution=1.0, seed=42) encoder3 = RandomDistributedScalarEncoder(name="encoder3", resolution=1.0, seed=-1) encoder4 = RandomDistributedScalarEncoder(name="encoder4", resolution=1.0, seed=-1) e1 = encoder1.encode(23.0) e2 = encoder2.encode(23.0) e3 = encoder3.encode(23.0) e4 = encoder4.encode(23.0) self.assertEqual((e1 == e2).sum(), encoder1.getWidth(), "Same seed gives rise to different encodings") self.assertNotEqual((e1 == e3).sum(), encoder1.getWidth(), "Different seeds gives rise to same encodings") self.assertNotEqual((e3 == e4).sum(), encoder1.getWidth(), "seeds of -1 give rise to same encodings")
def definir_encoders(): """ retorna o SIZE_ENCODER_, scalar_2_encoder, scalar_1_encoder, scalar_3_encoder, bits_scalar_1, bits_scalar_2, bits_scalar_3 """ ### A RESOLUCAO DOS 3 TINHA QUE SER 2.30 # TROCAR DEPOIS scalar_1_encoder = RandomDistributedScalarEncoder(resolution = 15.384615384615385, seed = 42, ) #two inputs separated by less than the 'resolution' will have the same encoder output. scalar_2_encoder = RandomDistributedScalarEncoder(resolution = 15.384615384615385, seed = 53) scalar_3_encoder = RandomDistributedScalarEncoder(resolution = 15.384615384615385, seed = 21) #7 = how much bits represent one input #0.25 = radius = if an input ir greater than the radius in comparisson with anoter .. #they won't overlapp bits_scalar_1 = np.zeros(scalar_1_encoder.getWidth()) bits_scalar_2 = np.zeros(scalar_2_encoder.getWidth()) bits_scalar_3 = np.zeros(scalar_3_encoder.getWidth()) SIZE_ENCODER_ = np.size(bits_scalar_1) + np.size(bits_scalar_2) + np.size(bits_scalar_3) return SIZE_ENCODER_, scalar_2_encoder, scalar_1_encoder, scalar_3_encoder, bits_scalar_1, bits_scalar_2, bits_scalar_3
def testMapBucketIndexToNonZeroBits(self): """ Test that mapBucketIndexToNonZeroBits works and that max buckets and clipping are handled properly. """ enc = RandomDistributedScalarEncoder(resolution=1.0, w=11, n=150) # Set a low number of max buckets enc._initializeBucketMap(10, None) enc.encode(0.0) enc.encode(-7.0) enc.encode(7.0) self.assertEqual(len(enc.bucketMap), enc._maxBuckets, "_maxBuckets exceeded") self.assertTrue( (enc.mapBucketIndexToNonZeroBits(-1) == enc.bucketMap[0]).all(), "mapBucketIndexToNonZeroBits did not handle negative index") self.assertTrue( (enc.mapBucketIndexToNonZeroBits(1000) == enc.bucketMap[9]).all(), "mapBucketIndexToNonZeroBits did not handle negative index") e23 = enc.encode(23.0) e6 = enc.encode(6) self.assertEqual((e23 == e6).sum(), enc.getWidth(), "Values not clipped correctly during encoding") e_8 = enc.encode(-8) e_7 = enc.encode(-7) self.assertEqual((e_8 == e_7).sum(), enc.getWidth(), "Values not clipped correctly during encoding") self.assertEqual(enc.getBucketIndices(-8)[0], 0, "getBucketIndices returned negative bucket index") self.assertEqual(enc.getBucketIndices(23)[0], enc._maxBuckets-1, "getBucketIndices returned bucket index that is too large")
def testMapBucketIndexToNonZeroBits(self): """ Test that mapBucketIndexToNonZeroBits works and that max buckets and clipping are handled properly. """ encoder = RandomDistributedScalarEncoder(resolution=1.0, w=11, n=150) # Set a low number of max buckets encoder._initializeBucketMap(10, None) encoder.encode(0.0) encoder.encode(-7.0) encoder.encode(7.0) self.assertEqual(len(encoder.bucketMap), encoder._maxBuckets, "_maxBuckets exceeded") self.assertTrue( numpy.array_equal(encoder.mapBucketIndexToNonZeroBits(-1), encoder.bucketMap[0]), "mapBucketIndexToNonZeroBits did not handle negative" " index") self.assertTrue( numpy.array_equal(encoder.mapBucketIndexToNonZeroBits(1000), encoder.bucketMap[9]), "mapBucketIndexToNonZeroBits did not handle negative index") e23 = encoder.encode(23.0) e6 = encoder.encode(6) self.assertEqual((e23 == e6).sum(), encoder.getWidth(), "Values not clipped correctly during encoding") ep8 = encoder.encode(-8) ep7 = encoder.encode(-7) self.assertEqual((ep8 == ep7).sum(), encoder.getWidth(), "Values not clipped correctly during encoding") self.assertEqual( encoder.getBucketIndices(-8)[0], 0, "getBucketIndices returned negative bucket index") self.assertEqual( encoder.getBucketIndices(23)[0], encoder._maxBuckets - 1, "getBucketIndices returned bucket index that is too" " large")
def testVerbosity(self): """ Test that nothing is printed out when verbosity=0 """ _stdout = sys.stdout sys.stdout = _stringio = StringIO() encoder = RandomDistributedScalarEncoder(name="mv", resolution=1.0, verbosity=0) output = numpy.zeros(encoder.getWidth(), dtype=defaultDtype) encoder.encodeIntoArray(23.0, output) encoder.getBucketIndices(23.0) sys.stdout = _stdout self.assertEqual(len(_stringio.getvalue()), 0, "zero verbosity doesn't lead to zero output")
def testGetMethods(self): """ Test that the getWidth, getDescription, and getDecoderOutputFieldTypes methods work. """ enc = RandomDistributedScalarEncoder(name='theName', resolution=1.0, n=500) self.assertEqual(enc.getWidth(), 500, "getWidth doesn't return the correct result") self.assertEqual(enc.getDescription(), [('theName', 0)], "getDescription doesn't return the correct result") self.assertEqual(enc.getDecoderOutputFieldTypes(), (FieldMetaType.float, ), "getDecoderOutputFieldTypes doesn't return the correct result")
def testGetMethods(self): """ Test that the getWidth, getDescription, and getDecoderOutputFieldTypes methods work. """ encoder = RandomDistributedScalarEncoder(name="theName", resolution=1.0, n=500) self.assertEqual(encoder.getWidth(), 500, "getWidth doesn't return the correct result") self.assertEqual(encoder.getDescription(), [("theName", 0)], "getDescription doesn't return the correct result") self.assertEqual(encoder.getDecoderOutputFieldTypes(), (FieldMetaType.float, ), "getDecoderOutputFieldTypes doesn't return the correct" " result")
class DendriteDetector(AnomalyDetector): def initialize(self): # Keep track of value range for spatial anomaly detection. self.minVal = None self.maxVal = None # Time of day encoder self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49), name='time_enc') # RDSE encoder for the time series value. minResolution = 0.001 rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = self.inputMax + rangePadding numBuckets = 130 resolution = max(minResolution, (maxVal - minVal) / numBuckets) self.value_enc = RandomDistributedScalarEncoder(resolution=resolution, name='value_rdse') # Spatial Pooler. encodingWidth = self.timeOfDayEncoder.getWidth( ) + self.value_enc.getWidth() self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(2048, ), potentialPct=0.8, potentialRadius=encodingWidth, globalInhibition=1, numActiveColumnsPerInhArea=40, synPermInactiveDec=0.0005, synPermActiveInc=0.003, synPermConnected=0.2, boostStrength=0.0, seed=1956, wrapAround=True, ) self.tm = TemporalMemory( columnDimensions=(2048, ), cellsPerColumn=32, activationThreshold=20, initialPermanence=.5, # Increased to connectedPermanence. connectedPermanence=.5, minThreshold=13, maxNewSynapseCount=31, permanenceIncrement=0.04, permanenceDecrement=0.008, predictedSegmentDecrement=0.001, maxSegmentsPerCell=128, maxSynapsesPerSegment= 128, # Changed meaning. Also see connections.topology[2] seed=1993, ) # Initialize the anomaly likelihood object numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100, ) self.age = 0 def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def handleRecord(self, inputData): """ Argument inputData is {"value": instantaneous_value, "timestamp": pandas.Timestamp} Returns a tuple (anomalyScore, rawScore). Internally to NuPIC "anomalyScore" corresponds to "likelihood_score" and "rawScore" corresponds to "anomaly_score". Sorry about that. """ # Check for spatial anomalies and update min/max values. value = inputData["value"] spatialAnomaly = False if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE maxExpected = self.maxVal + tolerance minExpected = self.minVal - tolerance if value > maxExpected or value < minExpected: spatialAnomaly = True if self.maxVal is None or value > self.maxVal: self.maxVal = value if self.minVal is None or value < self.minVal: self.minVal = value # Run the HTM stack. First Encoders. timestamp = inputData["timestamp"] timeOfDayBits = np.zeros(self.timeOfDayEncoder.getWidth()) self.timeOfDayEncoder.encodeIntoArray(timestamp, timeOfDayBits) valueBits = np.zeros(self.value_enc.getWidth()) self.value_enc.encodeIntoArray(value, valueBits) encoding = np.concatenate([timeOfDayBits, valueBits]) # Spatial Pooler. activeColumns = np.zeros(self.sp.getNumColumns()) self.sp.compute(encoding, True, activeColumns) activeColumnIndices = np.nonzero(activeColumns)[0] # Temporal Memory and Anomaly. predictions = self.tm.getPredictiveCells() predictedColumns = list(self.tm.mapCellsToColumns(predictions).keys()) self.tm.compute(activeColumnIndices, learn=True) activeCells = self.tm.getActiveCells() rawScore = anomaly.computeRawAnomalyScore(activeColumnIndices, predictedColumns) # Compute log(anomaly likelihood) anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) finalScore = logScore = self.anomalyLikelihood.computeLogLikelihood( anomalyScore) if spatialAnomaly: finalScore = 1.0 if False: # Plot correlation of excitement versus compartmentalization. if self.age == 0: print("Correlation Plots ENABLED.") if False: start_age = 1000 end_age = 1800 else: start_age = 4000 end_age = 7260 if self.age == start_age: import correlation import random self.cor_samplers = [] sampled_cells = [] while len(self.cor_samplers) < 20: n = random.choice(xrange(self.tm.numberOfCells())) if n in sampled_cells: continue else: sampled_cells.append(n) neuron = self.tm.connections.dataForCell(n) if neuron._roots: c = correlation.CorrelationSampler(neuron._roots[0]) c.random_sample_points(100) self.cor_samplers.append(c) print("Created %d Correlation Samplers" % len(self.cor_samplers)) if self.age >= start_age: for smplr in self.cor_samplers: smplr.sample() if self.age == end_age: import matplotlib.pyplot as plt for idx, smplr in enumerate(self.cor_samplers): if smplr.num_samples == 0: print("No samples, plot not shown.") continue plt.figure("Sample %d" % idx) smplr.plot(period=64) # Different value! plt.show() if False: # Plot excitement of a typical detection on a dendrite. if self.age == 7265: #if self.age == 1800: import matplotlib.pyplot as plt import random from connections import SYN_CONNECTED_ACTIVE sampled_cells = set() for figure_num in xrange(40): plt.figure("(%d)" % figure_num) # Find an active cell to view. cell = None for attempt in range(100): event = random.choice(self.tm.activeEvents) cell = event.cell # This is an integer. if cell is not None and cell not in sampled_cells: break else: break sampled_cells.add(cell) cell = self.tm.connections.dataForCell(cell) # Organize the data. EPSPs = [] excitement = [] distance_to_root = 0 segment_offsets = {} branch = cell._roots[0] while True: segment_offsets[branch] = distance_to_root distance_to_root += len(branch._synapses) excitement.extend(branch.excitement) for syn in branch._synapses: if syn is None: EPSPs.append(0) else: EPSPs.append(syn.state == SYN_CONNECTED_ACTIVE) if branch.children: branch = random.choice(branch.children) else: break plt.plot( np.arange(distance_to_root), EPSPs, 'r', np.arange(distance_to_root), excitement, 'b', ) plt.title( "Dendrite Activation\n Horizontal line is activation threshold, Vertical lines are segment bifurcations" ) plt.xlabel("Distance along Dendrite", ) plt.ylabel("EPSPs are Red, Excitement is Blue") # Show lines where the excitement crosses thresholds. plt.axhline(20, color='k') # Hard coded parameter value. for offset in segment_offsets.values(): if offset != 0: plt.axvline(offset, color='k') print("\nShowing %d excitement plots." % len(sampled_cells)) plt.show() self.age += 1 return (finalScore, rawScore)
def runHotgym(): timeOfDayEncoder = DateEncoder(timeOfDay=(21,1)) weekendEncoder = DateEncoder(weekend=21) scalarEncoder = RandomDistributedScalarEncoder(0.88) encodingWidth = timeOfDayEncoder.getWidth() \ + weekendEncoder.getWidth() \ + scalarEncoder.getWidth() sp = SpatialPooler( # How large the input encoding will be. inputDimensions=(encodingWidth), # How many mini-columns will be in the Spatial Pooler. columnDimensions=(2048), # What percent of the columns's receptive field is available for potential # synapses? potentialPct=0.85, # This means that the input space has no topology. globalInhibition=True, localAreaDensity=-1.0, # Roughly 2%, giving that there is only one inhibition area because we have # turned on globalInhibition (40 / 2048 = 0.0195) numActiveColumnsPerInhArea=40.0, # How quickly synapses grow and degrade. synPermInactiveDec=0.005, synPermActiveInc=0.04, synPermConnected=0.1, # boostStrength controls the strength of boosting. Boosting encourages # efficient usage of SP columns. boostStrength=3.0, # Random number generator seed. seed=1956, # Determines if inputs at the beginning and end of an input dimension should # be considered neighbors when mapping columns to inputs. wrapAround=False ) tm = TemporalMemory( # Must be the same dimensions as the SP columnDimensions=(2048, ), # How many cells in each mini-column. cellsPerColumn=32, # A segment is active if it has >= activationThreshold connected synapses # that are active due to infActiveState activationThreshold=16, initialPermanence=0.21, connectedPermanence=0.5, # Minimum number of active synapses for a segment to be considered during # search for the best-matching segments. minThreshold=12, # The max number of synapses added to a segment during learning maxNewSynapseCount=20, permanenceIncrement=0.1, permanenceDecrement=0.1, predictedSegmentDecrement=0.0, maxSegmentsPerCell=128, maxSynapsesPerSegment=32, seed=1960 ) classifier = SDRClassifierFactory.create() with open (_INPUT_FILE_PATH) as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(2048) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. probability, value = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(value, probability * 100))
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + scalarEncoder.getWidth()) sp = SpatialPooler( # How large the input encoding will be. inputDimensions=(encodingWidth), # How many mini-columns will be in the Spatial Pooler. columnDimensions=(spParams["columnCount"]), # What percent of the columns"s receptive field is available for potential # synapses? potentialPct=spParams["potentialPct"], # This means that the input space has no topology. globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], # Roughly 2%, giving that there is only one inhibition area because we have # turned on globalInhibition (40 / 2048 = 0.0195) numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], # How quickly synapses grow and degrade. synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], # boostStrength controls the strength of boosting. Boosting encourages # efficient usage of SP columns. boostStrength=spParams["boostStrength"], # Random number generator seed. seed=spParams["seed"], # TODO: is this useful? # Determines if inputs at the beginning and end of an input dimension should # be considered neighbors when mapping columns to inputs. wrapAround=False ) tm = TemporalMemory( # Must be the same dimensions as the SP columnDimensions=(tmParams["columnCount"],), # How many cells in each mini-column. cellsPerColumn=tmParams["cellsPerColumn"], # A segment is active if it has >= activationThreshold connected synapses # that are active due to infActiveState activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], # TODO: This comes from the SP params, is this normal connectedPermanence=spParams["synPermConnected"], # Minimum number of active synapses for a segment to be considered during # search for the best-matching segments. minThreshold=tmParams["minThreshold"], # The max number of synapses added to a segment during learning maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) results.append([oneStep, oneStepConfidence * 100, None, None]) return results
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) CtEncoder = RandomDistributedScalarEncoder(enParams["Ct"]["resolution"]) ZIP_10467Encoder = RandomDistributedScalarEncoder( enParams["ZIP_10467"]["resolution"]) # ZIP_10462Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10462"]["resolution"]) # ZIP_10475Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10475"]["resolution"]) # ZIP_10466Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10466"]["resolution"]) # ZIP_10469Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10469"]["resolution"]) # DEPT_11Encoder = RandomDistributedScalarEncoder(enParams["DEPT_11"]["resolution"]) # DEPT_24Encoder = RandomDistributedScalarEncoder(enParams["DEPT_24"]["resolution"]) # DEPT_41Encoder = RandomDistributedScalarEncoder(enParams["DEPT_41"]["resolution"]) # DEPT_34Encoder = RandomDistributedScalarEncoder(enParams["DEPT_34"]["resolution"]) # DEPT_31Encoder = RandomDistributedScalarEncoder(enParams["DEPT_31"]["resolution"]) # DEPT_60Encoder = RandomDistributedScalarEncoder(enParams["DEPT_60"]["resolution"]) # AGE_0_9Encoder = RandomDistributedScalarEncoder(enParams["AGE_0_9"]["resolution"]) # AGE_10_19Encoder = RandomDistributedScalarEncoder(enParams["AGE_10_19"]["resolution"]) # AGE_20_29Encoder = RandomDistributedScalarEncoder(enParams["AGE_20_29"]["resolution"]) # AGE_30_39Encoder = RandomDistributedScalarEncoder(enParams["AGE_30_39"]["resolution"]) # AGE_40_49Encoder = RandomDistributedScalarEncoder(enParams["AGE_40_49"]["resolution"]) # AGE_50_59Encoder = RandomDistributedScalarEncoder(enParams["AGE_50_59"]["resolution"]) # AGE_60_69Encoder = RandomDistributedScalarEncoder(enParams["AGE_60_69"]["resolution"]) # AGE_70_79Encoder = RandomDistributedScalarEncoder(enParams["AGE_70_79"]["resolution"]) # AGE_80_89Encoder = RandomDistributedScalarEncoder(enParams["AGE_80_89"]["resolution"]) # AGE_90_99Encoder = RandomDistributedScalarEncoder(enParams["AGE_90_99"]["resolution"]) # DIST_1_7Encoder = RandomDistributedScalarEncoder(enParams["DIST_1_7"]["resolution"]) # DIST_8_14Encoder = RandomDistributedScalarEncoder(enParams["DIST_8_14"]["resolution"]) # DIST_15_21Encoder = RandomDistributedScalarEncoder(enParams["DIST_15_21"]["resolution"]) # DIST_22_28Encoder = RandomDistributedScalarEncoder(enParams["DIST_22_28"]["resolution"]) # DIST_29_35Encoder = RandomDistributedScalarEncoder(enParams["DIST_29_35"]["resolution"]) # DIST_36_42Encoder = RandomDistributedScalarEncoder(enParams["DIST_36_42"]["resolution"]) # DIST_43_49Encoder = RandomDistributedScalarEncoder(enParams["DIST_43_49"]["resolution"]) # DIST_50_56Encoder = RandomDistributedScalarEncoder(enParams["DIST_50_56"]["resolution"]) # DIST_57_63Encoder = RandomDistributedScalarEncoder(enParams["DIST_57_63"]["resolution"]) # DIST_64_70Encoder = RandomDistributedScalarEncoder(enParams["DIST_64_70"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + CtEncoder.getWidth() * 2) sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"]) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME) for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%Y-%m-%d %H:%M:%S") # Convert data value string into float. Ct = float(record[1]) ZIP_10467 = float(record[2]) # ZIP_10462 = float(record[3]) # ZIP_10475 = float(record[4]) # ZIP_10466 = float(record[5]) # ZIP_10469 = float(record[6]) # DEPT_11 = float(record[7]) # DEPT_24 = float(record[8]) # DEPT_41 = float(record[9]) # DEPT_34 = float(record[10]) # DEPT_31 = float(record[11]) # DEPT_60 = float(record[12]) # AGE_0_9 = float(record[13]) # AGE_10_19 = float(record[14]) # AGE_20_29 = float(record[15]) # AGE_30_39 = float(record[16]) # AGE_40_49 = float(record[17]) # AGE_50_59 = float(record[18]) # AGE_60_69 = float(record[19]) # AGE_70_79 = float(record[20]) # AGE_80_89 = float(record[21]) # AGE_90_99 = float(record[22]) # DIST_1_7 = float(record[23]) # DIST_8_14 = float(record[24]) # DIST_15_21 = float(record[25]) # DIST_22_28 = float(record[26]) # DIST_29_35 = float(record[27]) # DIST_36_42 = float(record[28]) # DIST_43_49 = float(record[29]) # DIST_50_56 = float(record[30]) # DIST_57_63 = float(record[31]) # DIST_64_70 = float(record[31]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) CtBits = numpy.zeros(CtEncoder.getWidth()) ZIP_10467Bits = numpy.zeros(ZIP_10467Encoder.getWidth()) # ZIP_10462Bits = numpy.zeros(ZIP_10462Encoder.getWidth()) # ZIP_10475Bits = numpy.zeros(ZIP_10475Encoder.getWidth()) # ZIP_10466Bits = numpy.zeros(ZIP_10466Encoder.getWidth()) # ZIP_10469Bits = numpy.zeros(ZIP_10469Encoder.getWidth()) # DEPT_11Bits = numpy.zeros(DEPT_11Encoder.getWidth()) # DEPT_24Bits = numpy.zeros(DEPT_24Encoder.getWidth()) # DEPT_41Bits = numpy.zeros(DEPT_41Encoder.getWidth()) # DEPT_34Bits = numpy.zeros(DEPT_34Encoder.getWidth()) # DEPT_31Bits = numpy.zeros(DEPT_31Encoder.getWidth()) # DEPT_60Bits = numpy.zeros(DEPT_60Encoder.getWidth()) # AGE_0_9Bits = numpy.zeros(AGE_0_9Encoder.getWidth()) # AGE_10_19Bits = numpy.zeros(AGE_10_19Encoder.getWidth()) # AGE_20_29Bits = numpy.zeros(AGE_20_29Encoder.getWidth()) # AGE_30_39Bits = numpy.zeros(AGE_30_39Encoder.getWidth()) # AGE_40_49Bits = numpy.zeros(AGE_40_49Encoder.getWidth()) # AGE_50_59Bits = numpy.zeros(AGE_50_59Encoder.getWidth()) # AGE_60_69Bits = numpy.zeros(AGE_60_69Encoder.getWidth()) # AGE_70_79Bits = numpy.zeros(AGE_70_79Encoder.getWidth()) # AGE_80_89Bits = numpy.zeros(AGE_80_89Encoder.getWidth()) # AGE_90_99Bits = numpy.zeros(AGE_90_99Encoder.getWidth()) # DIST_1_7Bits = numpy.zeros(DIST_1_7Encoder.getWidth()) # DIST_8_14Bits = numpy.zeros(DIST_8_14Encoder.getWidth()) # DIST_15_21Bits = numpy.zeros(DIST_15_21Encoder.getWidth()) # DIST_22_28Bits = numpy.zeros(DIST_22_28Encoder.getWidth()) # DIST_29_35Bits = numpy.zeros(DIST_29_35Encoder.getWidth()) # DIST_36_42Bits = numpy.zeros(DIST_36_42Encoder.getWidth()) # DIST_43_49Bits = numpy.zeros(DIST_43_49Encoder.getWidth()) # DIST_50_56Bits = numpy.zeros(DIST_50_56Encoder.getWidth()) # DIST_57_63Bits = numpy.zeros(DIST_57_63Encoder.getWidth()) # DIST_64_70Bits = numpy.zeros(DIST_64_70Encoder.getWidth()) # Now we call the encoders to create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) CtEncoder.encodeIntoArray(Ct, CtBits) ZIP_10467Encoder.encodeIntoArray(ZIP_10467, ZIP_10467Bits) # ZIP_10462Encoder.encodeIntoArray(ZIP_10462, ZIP_10462Bits) # ZIP_10475Encoder.encodeIntoArray(ZIP_10475, ZIP_10475Bits) # ZIP_10466Encoder.encodeIntoArray(ZIP_10466, ZIP_10466Bits) # ZIP_10469Encoder.encodeIntoArray(ZIP_10469, ZIP_10469Bits) # DEPT_11Encoder.encodeIntoArray(DEPT_11, DEPT_11Bits) # DEPT_24Encoder.encodeIntoArray(DEPT_24, DEPT_24Bits) # DEPT_41Encoder.encodeIntoArray(DEPT_41, DEPT_41Bits) # DEPT_34Encoder.encodeIntoArray(DEPT_34, DEPT_34Bits) # DEPT_31Encoder.encodeIntoArray(DEPT_31, DEPT_31Bits) # DEPT_60Encoder.encodeIntoArray(DEPT_60, DEPT_60Bits) # AGE_0_9Encoder.encodeIntoArray(AGE_0_9, AGE_0_9Bits) # AGE_10_19Encoder.encodeIntoArray(AGE_10_19, AGE_10_19Bits) # AGE_20_29Encoder.encodeIntoArray(AGE_20_29, AGE_20_29Bits) # AGE_30_39Encoder.encodeIntoArray(AGE_30_39, AGE_30_39Bits) # AGE_40_49Encoder.encodeIntoArray(AGE_40_49, AGE_40_49Bits) # AGE_50_59Encoder.encodeIntoArray(AGE_50_59, AGE_50_59Bits) # AGE_60_69Encoder.encodeIntoArray(AGE_60_69, AGE_60_69Bits) # AGE_70_79Encoder.encodeIntoArray(AGE_70_79, AGE_70_79Bits) # AGE_80_89Encoder.encodeIntoArray(AGE_80_89, AGE_80_89Bits) # AGE_90_99Encoder.encodeIntoArray(AGE_90_99, AGE_90_99Bits) # DIST_1_7Encoder.encodeIntoArray(DIST_1_7, DIST_1_7Bits) # DIST_8_14Encoder.encodeIntoArray(DIST_8_14, DIST_8_14Bits) # DIST_15_21Encoder.encodeIntoArray(DIST_15_21, DIST_15_21Bits) # DIST_22_28Encoder.encodeIntoArray(DIST_22_28, DIST_22_28Bits) # DIST_29_35Encoder.encodeIntoArray(DIST_29_35, DIST_29_35Bits) # DIST_36_42Encoder.encodeIntoArray(DIST_36_42, DIST_36_42Bits) # DIST_43_49Encoder.encodeIntoArray(DIST_43_49, DIST_43_49Bits) # DIST_50_56Encoder.encodeIntoArray(DIST_50_56, DIST_50_56Bits) # DIST_57_63Encoder.encodeIntoArray(DIST_57_63, DIST_57_63Bits) # DIST_64_70Encoder.encodeIntoArray(DIST_64_70, DIST_64_70Bits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, CtBits, ZIP_10467Bits]) # encoding = numpy.concatenate( # [timeOfDayBits, weekendBits, CtBits, # ZIP_10467Bits, ZIP_10462Bits, ZIP_10475Bits, ZIP_10466Bits, ZIP_10469Bits, # DEPT_11Bits, DEPT_24Bits, DEPT_41Bits, DEPT_34Bits, DEPT_31Bits, # DEPT_60Bits, AGE_0_9Bits, AGE_10_19Bits, AGE_20_29Bits, AGE_30_39Bits, # AGE_40_49Bits, AGE_50_59Bits, AGE_60_69Bits, AGE_70_79Bits, AGE_80_89Bits, # AGE_90_99Bits, DIST_1_7Bits, DIST_8_14Bits, DIST_15_21Bits, DIST_22_28Bits, # DIST_29_35Bits, DIST_36_42Bits, DIST_43_49Bits, DIST_50_56Bits, DIST_57_63Bits, # DIST_64_70Bits]) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = CtEncoder.getBucketIndices(Ct)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute(recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": Ct }, learn=True, infer=True) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted(zip( classifierResult[1], classifierResult["actualValues"]), reverse=True)[0] # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) # results.append([oneStep, oneStepConfidence * 100, None, None]) results.append([record[0], Ct, oneStep, oneStepConfidence * 100]) output.write(record[0], Ct, oneStep, oneStepConfidence * 100) output.close() return results
# maxVal=100.0 # ) # print(params) ########### encoders class ################### #Generating the encoder classes scalar_encoder = RandomDistributedScalarEncoder(resolution=0.03) #two inputs separated by less than the 'resolution' will have the same encoder output. time_encoder = RandomDistributedScalarEncoder(resolution=0.001) #7 = how much bits represent one input #0.25 = radius = if an input ir greater than the radius in comparisson with anoter .. #they won't overlapp bits_scalar = np.zeros(scalar_encoder.getWidth()) bits_time = np.zeros(time_encoder.getWidth()) SIZE_ENCODER_ = np.size(bits_time) + np.size(bits_scalar) ############ SPATIAL POOLER ##################### N_COLUMNS = 2048 sp = SpatialPooler( inputDimensions=(SIZE_ENCODER_, ), columnDimensions=( N_COLUMNS, ), # in this case we will use 2048 mini-columns distributed in a "linear array" ... potentialRadius= SIZE_ENCODER_, # i set the potential radius of each mini-column as the whole ...
class NumentaTMLowLevelDetector(AnomalyDetector): """The 'numentaTM' detector, but not using the CLAModel or network API """ def __init__(self, *args, **kwargs): super(NumentaTMLowLevelDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.sp = None self.spOutput = None self.tm = None self.anomalyLikelihood = None # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): # Initialize the RDSE with a resolution; calculated from the data min and # max, the resolution is specific to the data stream. rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) # Initialize the timestamp encoder self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = (self.timestampEncoder.getWidth() + self.valueEncoder.getWidth()) self.sp = SpatialPooler(**{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.tm = TemporalMemory(**{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, }) if self.useLikelihood: learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100 ) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" # Encode the input data record self.valueEncoder.encodeIntoArray( inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray( inputData["timestamp"], self.encodedTimestamp) # Run the encoded data through the spatial pooler self.sp.compute(np.concatenate((self.encodedTimestamp, self.encodedValue,)), True, self.spOutput) # At the current state, the set of the region's active columns and the set # of columns that have previously-predicted cells are used to calculate the # raw anomaly score. activeColumns = set(self.spOutput.nonzero()[0].tolist()) prevPredictedColumns = set(self.tm.columnForCell(cell) for cell in self.tm.getPredictiveCells()) rawScore = (len(activeColumns - prevPredictedColumns) / float(len(activeColumns))) self.tm.compute(activeColumns) if self.useLikelihood: # Compute the log-likelihood score anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) return (logScore, rawScore) return (rawScore, rawScore)
class BaseNetwork(object): def __init__(self, inputMin=None, inputMax=None, runSanity=False): self.inputMin = inputMin self.inputMax = inputMax self.runSanity = runSanity self.encoder = None self.encoderOutput = None self.sp = None self.spOutput = None self.spOutputNZ = None self.tm = None self.anomalyScore = None if runSanity: self.sanity = None self.defaultEncoderResolution = 0.0001 self.numColumns = 2048 self.cellsPerColumn = 32 self.predictedActiveCells = None self.previouslyPredictiveCells = None def initialize(self): # Scalar Encoder resolution = self.getEncoderResolution() self.encoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32) # Spatial Pooler spInputWidth = self.encoder.getWidth() self.spParams = { "globalInhibition": True, "columnDimensions": [self.numColumns], "inputDimensions": [spInputWidth], "potentialRadius": spInputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, } self.sp = SpatialPooler(**self.spParams) self.spOutput = np.zeros(self.numColumns, dtype=np.uint32) # Temporal Memory self.tmParams = { "activationThreshold": 20, "cellsPerColumn": self.cellsPerColumn, "columnDimensions": (self.numColumns,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, } self.tm = TemporalMemory(**self.tmParams) # Sanity if self.runSanity: self.sanity = sanity.SPTMInstance(self.sp, self.tm) def handleRecord(self, scalarValue, label=None, skipEncoding=False, learningMode=True): """Process one record.""" if self.runSanity: self.sanity.waitForUserContinue() # Encode the input data record if it hasn't already been encoded. if not skipEncoding: self.encodeValue(scalarValue) # Run the encoded data through the spatial pooler self.sp.compute(self.encoderOutput, learningMode, self.spOutput) self.spOutputNZ = self.spOutput.nonzero()[0] # WARNING: this needs to happen here, before the TM runs. self.previouslyPredictiveCells = self.tm.getPredictiveCells() # Run SP output through temporal memory self.tm.compute(self.spOutputNZ) self.predictedActiveCells = _computePredictedActiveCells( self.tm.getActiveCells(), self.previouslyPredictiveCells) # Anomaly score self.anomalyScore = _computeAnomalyScore(self.spOutputNZ, self.previouslyPredictiveCells, self.cellsPerColumn) # Run Sanity if self.runSanity: self.sanity.appendTimestep(self.getEncoderOutputNZ(), self.getSpOutputNZ(), self.previouslyPredictiveCells, { 'value': scalarValue, 'label':label }) def encodeValue(self, scalarValue): self.encoder.encodeIntoArray(scalarValue, self.encoderOutput) def getEncoderResolution(self): """ Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's calculated from the data min and max, specific to the data stream. """ if self.inputMin is None or self.inputMax is None: return self.defaultEncoderResolution else: rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets) def getEncoderOutputNZ(self): return self.encoderOutput.nonzero()[0] def getSpOutputNZ(self): return self.spOutputNZ def getTmPredictiveCellsNZ(self): return self.tm.getPredictiveCells() def getTmActiveCellsNZ(self): return self.tm.getActiveCells() def getTmPredictedActiveCellsNZ(self): return self.predictedActiveCells def getRawAnomalyScore(self): return self.anomalyScore
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector): """The 'numenta' detector, with the following changes: - Use pure Temporal Memory, not the classic TP that uses backtracking. - Don't spatial pool the timestamp. Pass it in as distal input. - 1 cell per column. - Use w=41 in the scalar encoding, rather than w=21, to make up for the lost timestamp input to the spatial pooler. """ def __init__(self, *args, **kwargs): super(DistalTimestamps1CellPerColumnDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.activeExternalCells = [] self.prevActiveExternalCells = [] self.sp = None self.spOutput = None self.etm = None self.anomalyLikelihood = None def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, w=41, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) self.timestampEncoder = DateEncoder(timeOfDay=(21,9.49,)) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = self.valueEncoder.getWidth() self.sp = SpatialPooler(**{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.etm = ExtendedTemporalMemory(**{ "activationThreshold": 13, "cellsPerColumn": 1, "columnDimensions": (2048,), "basalInputDimensions": (self.timestampEncoder.getWidth(),), "initialPermanence": 0.21, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 32, "minThreshold": 10, "maxNewSynapseCount": 20, "permanenceDecrement": 0.1, "permanenceIncrement": 0.1, "seed": 1960, "checkInputs": False, }) learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100 ) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" self.valueEncoder.encodeIntoArray(inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray(inputData["timestamp"], self.encodedTimestamp) self.prevActiveExternalCells = self.activeExternalCells self.activeExternalCells = self.encodedTimestamp.nonzero()[0] self.sp.compute(self.encodedValue, True, self.spOutput) activeColumns = self.spOutput.nonzero()[0] activeColumnsSet = set(activeColumns.tolist()) prevPredictedColumns = set(self.etm.columnForCell(cell) for cell in self.etm.getPredictiveCells()) rawScore = (len(activeColumnsSet - prevPredictedColumns) / float(len(activeColumns))) anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) self.etm.compute(activeColumns, activeCellsExternalBasal=self.activeExternalCells, reinforceCandidatesExternalBasal=self.prevActiveExternalCells, growthCandidatesExternalBasal=self.prevActiveExternalCells) return (logScore, rawScore)
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector): """The 'numenta' detector, with the following changes: - Use pure Temporal Memory, not the classic TP that uses backtracking. - Don't spatial pool the timestamp. Pass it in as distal input. - 1 cell per column. - Use w=41 in the scalar encoding, rather than w=21, to make up for the lost timestamp input to the spatial pooler. """ def __init__(self, *args, **kwargs): super(DistalTimestamps1CellPerColumnDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.activeExternalCells = [] self.prevActiveExternalCells = [] self.sp = None self.spOutput = None self.etm = None self.anomalyLikelihood = None def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, w=41, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) self.timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = self.valueEncoder.getWidth() self.sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.etm = ExtendedTemporalMemory( **{ "activationThreshold": 13, "cellsPerColumn": 1, "columnDimensions": (2048, ), "basalInputDimensions": (self.timestampEncoder.getWidth(), ), "initialPermanence": 0.21, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 32, "minThreshold": 10, "maxNewSynapseCount": 20, "permanenceDecrement": 0.1, "permanenceIncrement": 0.1, "seed": 1960, "checkInputs": False, }) learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" self.valueEncoder.encodeIntoArray(inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray(inputData["timestamp"], self.encodedTimestamp) self.prevActiveExternalCells = self.activeExternalCells self.activeExternalCells = self.encodedTimestamp.nonzero()[0] self.sp.compute(self.encodedValue, True, self.spOutput) activeColumns = self.spOutput.nonzero()[0] activeColumnsSet = set(activeColumns.tolist()) prevPredictedColumns = set( self.etm.columnForCell(cell) for cell in self.etm.getPredictiveCells()) rawScore = (len(activeColumnsSet - prevPredictedColumns) / float(len(activeColumns))) anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) self.etm.compute( activeColumns, activeCellsExternalBasal=self.activeExternalCells, reinforceCandidatesExternalBasal=self.prevActiveExternalCells, growthCandidatesExternalBasal=self.prevActiveExternalCells) return (logScore, rawScore)
class BaseNetwork(object): def __init__(self, inputMin=None, inputMax=None, runSanity=False): self.inputMin = inputMin self.inputMax = inputMax self.runSanity = runSanity self.encoder = None self.encoderOutput = None self.sp = None self.spOutput = None self.spOutputNZ = None self.tm = None self.anomalyScore = None if runSanity: self.sanity = None self.defaultEncoderResolution = 0.0001 self.numColumns = 2048 self.cellsPerColumn = 32 self.predictedActiveCells = None self.previouslyPredictiveCells = None def initialize(self): # Scalar Encoder resolution = self.getEncoderResolution() self.encoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32) # Spatial Pooler spInputWidth = self.encoder.getWidth() self.spParams = { "globalInhibition": True, "columnDimensions": [self.numColumns], "inputDimensions": [spInputWidth], "potentialRadius": spInputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 5.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, } self.sp = SpatialPooler(**self.spParams) self.spOutput = np.zeros(self.numColumns, dtype=np.uint32) # Temporal Memory self.tmParams = { "activationThreshold": 20, "cellsPerColumn": self.cellsPerColumn, "columnDimensions": (self.numColumns,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, } self.tm = TemporalMemory(**self.tmParams) # Sanity if self.runSanity: self.sanity = sanity.SPTMInstance(self.sp, self.tm) def handleRecord(self, scalarValue, label=None, skipEncoding=False, learningMode=True): """Process one record.""" if self.runSanity: self.sanity.waitForUserContinue() # Encode the input data record if it hasn't already been encoded. if not skipEncoding: self.encodeValue(scalarValue) # Run the encoded data through the spatial pooler self.sp.compute(self.encoderOutput, learningMode, self.spOutput) self.spOutputNZ = self.spOutput.nonzero()[0] # WARNING: this needs to happen here, before the TM runs. self.previouslyPredictiveCells = self.tm.getPredictiveCells() # Run SP output through temporal memory self.tm.compute(self.spOutputNZ) self.predictedActiveCells = _computePredictedActiveCells( self.tm.getActiveCells(), self.previouslyPredictiveCells) # Anomaly score self.anomalyScore = _computeAnomalyScore(self.spOutputNZ, self.previouslyPredictiveCells, self.cellsPerColumn) # Run Sanity if self.runSanity: self.sanity.appendTimestep(self.getEncoderOutputNZ(), self.getSpOutputNZ(), self.previouslyPredictiveCells, { 'value': scalarValue, 'label':label }) def encodeValue(self, scalarValue): self.encoder.encodeIntoArray(scalarValue, self.encoderOutput) def getEncoderResolution(self): """ Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's calculated from the data min and max, specific to the data stream. """ if self.inputMin is None or self.inputMax is None: return self.defaultEncoderResolution else: rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets) def getEncoderOutputNZ(self): return self.encoderOutput.nonzero()[0] def getSpOutputNZ(self): return self.spOutputNZ def getTmPredictiveCellsNZ(self): return self.tm.getPredictiveCells() def getTmActiveCellsNZ(self): return self.tm.getActiveCells() def getTmPredictedActiveCellsNZ(self): return self.predictedActiveCells def getRawAnomalyScore(self): return self.anomalyScore
def go(): valueEncoder = RandomDistributedScalarEncoder(resolution=0.88, seed=42) timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) inputWidth = timestampEncoder.getWidth() + valueEncoder.getWidth() sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) tm = TemporalMemory( **{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048, ), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1961, }) inputPath = os.path.join(os.path.dirname(__file__), "data/rec-center-hourly.csv") inputFile = open(inputPath, "rb") csvReader = csv.reader(inputFile) csvReader.next() csvReader.next() csvReader.next() encodedValue = np.zeros(valueEncoder.getWidth(), dtype=np.uint32) encodedTimestamp = np.zeros(timestampEncoder.getWidth(), dtype=np.uint32) spOutput = np.zeros(2048, dtype=np.float32) sanityInstance = sanity.SPTMInstance(sp, tm) for timestampStr, consumptionStr in csvReader: sanityInstance.waitForUserContinue() timestamp = datetime.datetime.strptime(timestampStr, "%m/%d/%y %H:%M") consumption = float(consumptionStr) timestampEncoder.encodeIntoArray(timestamp, encodedTimestamp) valueEncoder.encodeIntoArray(consumption, encodedValue) sensoryInput = np.concatenate(( encodedTimestamp, encodedValue, )) sp.compute(sensoryInput, True, spOutput) activeColumns = np.flatnonzero(spOutput) predictedCells = tm.getPredictiveCells() tm.compute(activeColumns) activeInputBits = np.flatnonzero(sensoryInput) displayText = { "timestamp": timestampStr, "consumption": consumptionStr } sanityInstance.appendTimestep(activeInputBits, activeColumns, predictedCells, displayText)
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + scalarEncoder.getWidth()) sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnCount"],), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True ) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders to create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) results.append([oneStep, oneStepConfidence * 100, None, None]) return results
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) scalarEncoder2 = RandomDistributedScalarEncoder( enParams["consumption2"]["resolution"]) encodingWidth = (scalarEncoder.getWidth() + scalarEncoder2.getWidth()) sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"]) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME) for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. # dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. prediction = float(record[1]) prediction2 = float(record[2]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. consumptionBits = numpy.zeros(scalarEncoder.getWidth()) consumptionBits2 = numpy.zeros(scalarEncoder2.getWidth()) # Now we call the encoders to create bit representations for each value. scalarEncoder.encodeIntoArray(prediction, consumptionBits) scalarEncoder2.encodeIntoArray(prediction2, consumptionBits2) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate([consumptionBits, consumptionBits2]) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(prediction)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute(recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": prediction }, learn=True, infer=True) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted(zip( classifierResult[1], classifierResult["actualValues"]), reverse=True)[0] # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) # results.append([oneStep, oneStepConfidence * 100, None, None]) results.append( [record[0], prediction, oneStep, oneStepConfidence * 100]) output.write(record[0], prediction, oneStep, oneStepConfidence * 100) output.close() return results