class DendriteDetector(AnomalyDetector): def initialize(self): # Keep track of value range for spatial anomaly detection. self.minVal = None self.maxVal = None # Time of day encoder self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49), name='time_enc') # RDSE encoder for the time series value. minResolution = 0.001 rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = self.inputMax + rangePadding numBuckets = 130 resolution = max(minResolution, (maxVal - minVal) / numBuckets) self.value_enc = RandomDistributedScalarEncoder(resolution=resolution, name='value_rdse') # Spatial Pooler. encodingWidth = self.timeOfDayEncoder.getWidth( ) + self.value_enc.getWidth() self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(2048, ), potentialPct=0.8, potentialRadius=encodingWidth, globalInhibition=1, numActiveColumnsPerInhArea=40, synPermInactiveDec=0.0005, synPermActiveInc=0.003, synPermConnected=0.2, boostStrength=0.0, seed=1956, wrapAround=True, ) self.tm = TemporalMemory( columnDimensions=(2048, ), cellsPerColumn=32, activationThreshold=20, initialPermanence=.5, # Increased to connectedPermanence. connectedPermanence=.5, minThreshold=13, maxNewSynapseCount=31, permanenceIncrement=0.04, permanenceDecrement=0.008, predictedSegmentDecrement=0.001, maxSegmentsPerCell=128, maxSynapsesPerSegment= 128, # Changed meaning. Also see connections.topology[2] seed=1993, ) # Initialize the anomaly likelihood object numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100, ) self.age = 0 def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def handleRecord(self, inputData): """ Argument inputData is {"value": instantaneous_value, "timestamp": pandas.Timestamp} Returns a tuple (anomalyScore, rawScore). Internally to NuPIC "anomalyScore" corresponds to "likelihood_score" and "rawScore" corresponds to "anomaly_score". Sorry about that. """ # Check for spatial anomalies and update min/max values. value = inputData["value"] spatialAnomaly = False if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE maxExpected = self.maxVal + tolerance minExpected = self.minVal - tolerance if value > maxExpected or value < minExpected: spatialAnomaly = True if self.maxVal is None or value > self.maxVal: self.maxVal = value if self.minVal is None or value < self.minVal: self.minVal = value # Run the HTM stack. First Encoders. timestamp = inputData["timestamp"] timeOfDayBits = np.zeros(self.timeOfDayEncoder.getWidth()) self.timeOfDayEncoder.encodeIntoArray(timestamp, timeOfDayBits) valueBits = np.zeros(self.value_enc.getWidth()) self.value_enc.encodeIntoArray(value, valueBits) encoding = np.concatenate([timeOfDayBits, valueBits]) # Spatial Pooler. activeColumns = np.zeros(self.sp.getNumColumns()) self.sp.compute(encoding, True, activeColumns) activeColumnIndices = np.nonzero(activeColumns)[0] # Temporal Memory and Anomaly. predictions = self.tm.getPredictiveCells() predictedColumns = list(self.tm.mapCellsToColumns(predictions).keys()) self.tm.compute(activeColumnIndices, learn=True) activeCells = self.tm.getActiveCells() rawScore = anomaly.computeRawAnomalyScore(activeColumnIndices, predictedColumns) # Compute log(anomaly likelihood) anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) finalScore = logScore = self.anomalyLikelihood.computeLogLikelihood( anomalyScore) if spatialAnomaly: finalScore = 1.0 if False: # Plot correlation of excitement versus compartmentalization. if self.age == 0: print("Correlation Plots ENABLED.") if False: start_age = 1000 end_age = 1800 else: start_age = 4000 end_age = 7260 if self.age == start_age: import correlation import random self.cor_samplers = [] sampled_cells = [] while len(self.cor_samplers) < 20: n = random.choice(xrange(self.tm.numberOfCells())) if n in sampled_cells: continue else: sampled_cells.append(n) neuron = self.tm.connections.dataForCell(n) if neuron._roots: c = correlation.CorrelationSampler(neuron._roots[0]) c.random_sample_points(100) self.cor_samplers.append(c) print("Created %d Correlation Samplers" % len(self.cor_samplers)) if self.age >= start_age: for smplr in self.cor_samplers: smplr.sample() if self.age == end_age: import matplotlib.pyplot as plt for idx, smplr in enumerate(self.cor_samplers): if smplr.num_samples == 0: print("No samples, plot not shown.") continue plt.figure("Sample %d" % idx) smplr.plot(period=64) # Different value! plt.show() if False: # Plot excitement of a typical detection on a dendrite. if self.age == 7265: #if self.age == 1800: import matplotlib.pyplot as plt import random from connections import SYN_CONNECTED_ACTIVE sampled_cells = set() for figure_num in xrange(40): plt.figure("(%d)" % figure_num) # Find an active cell to view. cell = None for attempt in range(100): event = random.choice(self.tm.activeEvents) cell = event.cell # This is an integer. if cell is not None and cell not in sampled_cells: break else: break sampled_cells.add(cell) cell = self.tm.connections.dataForCell(cell) # Organize the data. EPSPs = [] excitement = [] distance_to_root = 0 segment_offsets = {} branch = cell._roots[0] while True: segment_offsets[branch] = distance_to_root distance_to_root += len(branch._synapses) excitement.extend(branch.excitement) for syn in branch._synapses: if syn is None: EPSPs.append(0) else: EPSPs.append(syn.state == SYN_CONNECTED_ACTIVE) if branch.children: branch = random.choice(branch.children) else: break plt.plot( np.arange(distance_to_root), EPSPs, 'r', np.arange(distance_to_root), excitement, 'b', ) plt.title( "Dendrite Activation\n Horizontal line is activation threshold, Vertical lines are segment bifurcations" ) plt.xlabel("Distance along Dendrite", ) plt.ylabel("EPSPs are Red, Excitement is Blue") # Show lines where the excitement crosses thresholds. plt.axhline(20, color='k') # Hard coded parameter value. for offset in segment_offsets.values(): if offset != 0: plt.axvline(offset, color='k') print("\nShowing %d excitement plots." % len(sampled_cells)) plt.show() self.age += 1 return (finalScore, rawScore)
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + scalarEncoder.getWidth()) sp = SpatialPooler( # How large the input encoding will be. inputDimensions=(encodingWidth), # How many mini-columns will be in the Spatial Pooler. columnDimensions=(spParams["columnCount"]), # What percent of the columns"s receptive field is available for potential # synapses? potentialPct=spParams["potentialPct"], # This means that the input space has no topology. globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], # Roughly 2%, giving that there is only one inhibition area because we have # turned on globalInhibition (40 / 2048 = 0.0195) numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], # How quickly synapses grow and degrade. synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], # boostStrength controls the strength of boosting. Boosting encourages # efficient usage of SP columns. boostStrength=spParams["boostStrength"], # Random number generator seed. seed=spParams["seed"], # TODO: is this useful? # Determines if inputs at the beginning and end of an input dimension should # be considered neighbors when mapping columns to inputs. wrapAround=False ) tm = TemporalMemory( # Must be the same dimensions as the SP columnDimensions=(tmParams["columnCount"],), # How many cells in each mini-column. cellsPerColumn=tmParams["cellsPerColumn"], # A segment is active if it has >= activationThreshold connected synapses # that are active due to infActiveState activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], # TODO: This comes from the SP params, is this normal connectedPermanence=spParams["synPermConnected"], # Minimum number of active synapses for a segment to be considered during # search for the best-matching segments. minThreshold=tmParams["minThreshold"], # The max number of synapses added to a segment during learning maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) results.append([oneStep, oneStepConfidence * 100, None, None]) return results
def runHotgym(): timeOfDayEncoder = DateEncoder(timeOfDay=(21,1)) weekendEncoder = DateEncoder(weekend=21) scalarEncoder = RandomDistributedScalarEncoder(0.88) encodingWidth = timeOfDayEncoder.getWidth() \ + weekendEncoder.getWidth() \ + scalarEncoder.getWidth() sp = SpatialPooler( # How large the input encoding will be. inputDimensions=(encodingWidth), # How many mini-columns will be in the Spatial Pooler. columnDimensions=(2048), # What percent of the columns's receptive field is available for potential # synapses? potentialPct=0.85, # This means that the input space has no topology. globalInhibition=True, localAreaDensity=-1.0, # Roughly 2%, giving that there is only one inhibition area because we have # turned on globalInhibition (40 / 2048 = 0.0195) numActiveColumnsPerInhArea=40.0, # How quickly synapses grow and degrade. synPermInactiveDec=0.005, synPermActiveInc=0.04, synPermConnected=0.1, # boostStrength controls the strength of boosting. Boosting encourages # efficient usage of SP columns. boostStrength=3.0, # Random number generator seed. seed=1956, # Determines if inputs at the beginning and end of an input dimension should # be considered neighbors when mapping columns to inputs. wrapAround=False ) tm = TemporalMemory( # Must be the same dimensions as the SP columnDimensions=(2048, ), # How many cells in each mini-column. cellsPerColumn=32, # A segment is active if it has >= activationThreshold connected synapses # that are active due to infActiveState activationThreshold=16, initialPermanence=0.21, connectedPermanence=0.5, # Minimum number of active synapses for a segment to be considered during # search for the best-matching segments. minThreshold=12, # The max number of synapses added to a segment during learning maxNewSynapseCount=20, permanenceIncrement=0.1, permanenceDecrement=0.1, predictedSegmentDecrement=0.0, maxSegmentsPerCell=128, maxSynapsesPerSegment=32, seed=1960 ) classifier = SDRClassifierFactory.create() with open (_INPUT_FILE_PATH) as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(2048) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. probability, value = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(value, probability * 100))
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + scalarEncoder.getWidth()) sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnCount"],), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True ) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders to create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) results.append([oneStep, oneStepConfidence * 100, None, None]) return results
class NumentaTMLowLevelDetector(AnomalyDetector): """The 'numentaTM' detector, but not using the CLAModel or network API """ def __init__(self, *args, **kwargs): super(NumentaTMLowLevelDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.sp = None self.spOutput = None self.tm = None self.anomalyLikelihood = None # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): # Initialize the RDSE with a resolution; calculated from the data min and # max, the resolution is specific to the data stream. rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) # Initialize the timestamp encoder self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = (self.timestampEncoder.getWidth() + self.valueEncoder.getWidth()) self.sp = SpatialPooler(**{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.tm = TemporalMemory(**{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, }) if self.useLikelihood: learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100 ) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" # Encode the input data record self.valueEncoder.encodeIntoArray( inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray( inputData["timestamp"], self.encodedTimestamp) # Run the encoded data through the spatial pooler self.sp.compute(np.concatenate((self.encodedTimestamp, self.encodedValue,)), True, self.spOutput) # At the current state, the set of the region's active columns and the set # of columns that have previously-predicted cells are used to calculate the # raw anomaly score. activeColumns = set(self.spOutput.nonzero()[0].tolist()) prevPredictedColumns = set(self.tm.columnForCell(cell) for cell in self.tm.getPredictiveCells()) rawScore = (len(activeColumns - prevPredictedColumns) / float(len(activeColumns))) self.tm.compute(activeColumns) if self.useLikelihood: # Compute the log-likelihood score anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) return (logScore, rawScore) return (rawScore, rawScore)
dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") consumption = float(record[1]) if (prediction is not None) and (1 == 1): #print("row: %s %s %s" % (count, consumption, prediction)) predictions.append(prediction) consumptions.append(consumption) # new empty encodings timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth(), numpy.uint8) weekendBits = numpy.zeros(weekendEncoder.getWidth(), numpy.uint8) consumptionBits = numpy.zeros(consumeEncoder.getWidth(), numpy.uint8) # fill encodings timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) consumeEncoder.encodeIntoArray(consumption, consumptionBits) encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits]) #ONE = {'id': count, 'input': consumption, 'bucket': bucketIdx, # 'output': consumptionBits.tolist()} #print ONE #REZ.append(ONE) # spatial pooling activeColumns = numpy.zeros(COL_WIDTH, numpy.int8) sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0]
def go(): valueEncoder = RandomDistributedScalarEncoder(resolution=0.88, seed=42) timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) inputWidth = timestampEncoder.getWidth() + valueEncoder.getWidth() sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) tm = TemporalMemory( **{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048, ), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1961, }) inputPath = os.path.join(os.path.dirname(__file__), "data/rec-center-hourly.csv") inputFile = open(inputPath, "rb") csvReader = csv.reader(inputFile) csvReader.next() csvReader.next() csvReader.next() encodedValue = np.zeros(valueEncoder.getWidth(), dtype=np.uint32) encodedTimestamp = np.zeros(timestampEncoder.getWidth(), dtype=np.uint32) spOutput = np.zeros(2048, dtype=np.float32) sanityInstance = sanity.SPTMInstance(sp, tm) for timestampStr, consumptionStr in csvReader: sanityInstance.waitForUserContinue() timestamp = datetime.datetime.strptime(timestampStr, "%m/%d/%y %H:%M") consumption = float(consumptionStr) timestampEncoder.encodeIntoArray(timestamp, encodedTimestamp) valueEncoder.encodeIntoArray(consumption, encodedValue) sensoryInput = np.concatenate(( encodedTimestamp, encodedValue, )) sp.compute(sensoryInput, True, spOutput) activeColumns = np.flatnonzero(spOutput) predictedCells = tm.getPredictiveCells() tm.compute(activeColumns) activeInputBits = np.flatnonzero(sensoryInput) displayText = { "timestamp": timestampStr, "consumption": consumptionStr } sanityInstance.appendTimestep(activeInputBits, activeColumns, predictedCells, displayText)
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector): """The 'numenta' detector, with the following changes: - Use pure Temporal Memory, not the classic TP that uses backtracking. - Don't spatial pool the timestamp. Pass it in as distal input. - 1 cell per column. - Use w=41 in the scalar encoding, rather than w=21, to make up for the lost timestamp input to the spatial pooler. """ def __init__(self, *args, **kwargs): super(DistalTimestamps1CellPerColumnDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.activeExternalCells = [] self.prevActiveExternalCells = [] self.sp = None self.spOutput = None self.etm = None self.anomalyLikelihood = None def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, w=41, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) self.timestampEncoder = DateEncoder(timeOfDay=(21,9.49,)) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = self.valueEncoder.getWidth() self.sp = SpatialPooler(**{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.etm = ExtendedTemporalMemory(**{ "activationThreshold": 13, "cellsPerColumn": 1, "columnDimensions": (2048,), "basalInputDimensions": (self.timestampEncoder.getWidth(),), "initialPermanence": 0.21, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 32, "minThreshold": 10, "maxNewSynapseCount": 20, "permanenceDecrement": 0.1, "permanenceIncrement": 0.1, "seed": 1960, "checkInputs": False, }) learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100 ) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" self.valueEncoder.encodeIntoArray(inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray(inputData["timestamp"], self.encodedTimestamp) self.prevActiveExternalCells = self.activeExternalCells self.activeExternalCells = self.encodedTimestamp.nonzero()[0] self.sp.compute(self.encodedValue, True, self.spOutput) activeColumns = self.spOutput.nonzero()[0] activeColumnsSet = set(activeColumns.tolist()) prevPredictedColumns = set(self.etm.columnForCell(cell) for cell in self.etm.getPredictiveCells()) rawScore = (len(activeColumnsSet - prevPredictedColumns) / float(len(activeColumns))) anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) self.etm.compute(activeColumns, activeCellsExternalBasal=self.activeExternalCells, reinforceCandidatesExternalBasal=self.prevActiveExternalCells, growthCandidatesExternalBasal=self.prevActiveExternalCells) return (logScore, rawScore)
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) scalarEncoder2 = RandomDistributedScalarEncoder( enParams["consumption2"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + scalarEncoder.getWidth() + scalarEncoder2.getWidth()) sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnCount"],), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True ) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME) for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. prediction = float(record[1]) prediction2 = float(record[2]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) consumptionBits2 = numpy.zeros(scalarEncoder2.getWidth()) # Now we call the encoders to create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(prediction, consumptionBits) scalarEncoder2.encodeIntoArray(prediction2, consumptionBits2) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits, consumptionBits2] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(prediction)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": prediction }, learn=True, infer=True ) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) # results.append([oneStep, oneStepConfidence * 100, None, None]) results.append([record[0], prediction, oneStep, oneStepConfidence * 100]) output.write(record[0], prediction, oneStep, oneStepConfidence * 100) output.close() return results
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) CtEncoder = RandomDistributedScalarEncoder(enParams["Ct"]["resolution"]) ZIP_10467Encoder = RandomDistributedScalarEncoder( enParams["ZIP_10467"]["resolution"]) # ZIP_10462Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10462"]["resolution"]) # ZIP_10475Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10475"]["resolution"]) # ZIP_10466Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10466"]["resolution"]) # ZIP_10469Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10469"]["resolution"]) # DEPT_11Encoder = RandomDistributedScalarEncoder(enParams["DEPT_11"]["resolution"]) # DEPT_24Encoder = RandomDistributedScalarEncoder(enParams["DEPT_24"]["resolution"]) # DEPT_41Encoder = RandomDistributedScalarEncoder(enParams["DEPT_41"]["resolution"]) # DEPT_34Encoder = RandomDistributedScalarEncoder(enParams["DEPT_34"]["resolution"]) # DEPT_31Encoder = RandomDistributedScalarEncoder(enParams["DEPT_31"]["resolution"]) # DEPT_60Encoder = RandomDistributedScalarEncoder(enParams["DEPT_60"]["resolution"]) # AGE_0_9Encoder = RandomDistributedScalarEncoder(enParams["AGE_0_9"]["resolution"]) # AGE_10_19Encoder = RandomDistributedScalarEncoder(enParams["AGE_10_19"]["resolution"]) # AGE_20_29Encoder = RandomDistributedScalarEncoder(enParams["AGE_20_29"]["resolution"]) # AGE_30_39Encoder = RandomDistributedScalarEncoder(enParams["AGE_30_39"]["resolution"]) # AGE_40_49Encoder = RandomDistributedScalarEncoder(enParams["AGE_40_49"]["resolution"]) # AGE_50_59Encoder = RandomDistributedScalarEncoder(enParams["AGE_50_59"]["resolution"]) # AGE_60_69Encoder = RandomDistributedScalarEncoder(enParams["AGE_60_69"]["resolution"]) # AGE_70_79Encoder = RandomDistributedScalarEncoder(enParams["AGE_70_79"]["resolution"]) # AGE_80_89Encoder = RandomDistributedScalarEncoder(enParams["AGE_80_89"]["resolution"]) # AGE_90_99Encoder = RandomDistributedScalarEncoder(enParams["AGE_90_99"]["resolution"]) # DIST_1_7Encoder = RandomDistributedScalarEncoder(enParams["DIST_1_7"]["resolution"]) # DIST_8_14Encoder = RandomDistributedScalarEncoder(enParams["DIST_8_14"]["resolution"]) # DIST_15_21Encoder = RandomDistributedScalarEncoder(enParams["DIST_15_21"]["resolution"]) # DIST_22_28Encoder = RandomDistributedScalarEncoder(enParams["DIST_22_28"]["resolution"]) # DIST_29_35Encoder = RandomDistributedScalarEncoder(enParams["DIST_29_35"]["resolution"]) # DIST_36_42Encoder = RandomDistributedScalarEncoder(enParams["DIST_36_42"]["resolution"]) # DIST_43_49Encoder = RandomDistributedScalarEncoder(enParams["DIST_43_49"]["resolution"]) # DIST_50_56Encoder = RandomDistributedScalarEncoder(enParams["DIST_50_56"]["resolution"]) # DIST_57_63Encoder = RandomDistributedScalarEncoder(enParams["DIST_57_63"]["resolution"]) # DIST_64_70Encoder = RandomDistributedScalarEncoder(enParams["DIST_64_70"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + CtEncoder.getWidth() * 2) sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"]) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME) for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%Y-%m-%d %H:%M:%S") # Convert data value string into float. Ct = float(record[1]) ZIP_10467 = float(record[2]) # ZIP_10462 = float(record[3]) # ZIP_10475 = float(record[4]) # ZIP_10466 = float(record[5]) # ZIP_10469 = float(record[6]) # DEPT_11 = float(record[7]) # DEPT_24 = float(record[8]) # DEPT_41 = float(record[9]) # DEPT_34 = float(record[10]) # DEPT_31 = float(record[11]) # DEPT_60 = float(record[12]) # AGE_0_9 = float(record[13]) # AGE_10_19 = float(record[14]) # AGE_20_29 = float(record[15]) # AGE_30_39 = float(record[16]) # AGE_40_49 = float(record[17]) # AGE_50_59 = float(record[18]) # AGE_60_69 = float(record[19]) # AGE_70_79 = float(record[20]) # AGE_80_89 = float(record[21]) # AGE_90_99 = float(record[22]) # DIST_1_7 = float(record[23]) # DIST_8_14 = float(record[24]) # DIST_15_21 = float(record[25]) # DIST_22_28 = float(record[26]) # DIST_29_35 = float(record[27]) # DIST_36_42 = float(record[28]) # DIST_43_49 = float(record[29]) # DIST_50_56 = float(record[30]) # DIST_57_63 = float(record[31]) # DIST_64_70 = float(record[31]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) CtBits = numpy.zeros(CtEncoder.getWidth()) ZIP_10467Bits = numpy.zeros(ZIP_10467Encoder.getWidth()) # ZIP_10462Bits = numpy.zeros(ZIP_10462Encoder.getWidth()) # ZIP_10475Bits = numpy.zeros(ZIP_10475Encoder.getWidth()) # ZIP_10466Bits = numpy.zeros(ZIP_10466Encoder.getWidth()) # ZIP_10469Bits = numpy.zeros(ZIP_10469Encoder.getWidth()) # DEPT_11Bits = numpy.zeros(DEPT_11Encoder.getWidth()) # DEPT_24Bits = numpy.zeros(DEPT_24Encoder.getWidth()) # DEPT_41Bits = numpy.zeros(DEPT_41Encoder.getWidth()) # DEPT_34Bits = numpy.zeros(DEPT_34Encoder.getWidth()) # DEPT_31Bits = numpy.zeros(DEPT_31Encoder.getWidth()) # DEPT_60Bits = numpy.zeros(DEPT_60Encoder.getWidth()) # AGE_0_9Bits = numpy.zeros(AGE_0_9Encoder.getWidth()) # AGE_10_19Bits = numpy.zeros(AGE_10_19Encoder.getWidth()) # AGE_20_29Bits = numpy.zeros(AGE_20_29Encoder.getWidth()) # AGE_30_39Bits = numpy.zeros(AGE_30_39Encoder.getWidth()) # AGE_40_49Bits = numpy.zeros(AGE_40_49Encoder.getWidth()) # AGE_50_59Bits = numpy.zeros(AGE_50_59Encoder.getWidth()) # AGE_60_69Bits = numpy.zeros(AGE_60_69Encoder.getWidth()) # AGE_70_79Bits = numpy.zeros(AGE_70_79Encoder.getWidth()) # AGE_80_89Bits = numpy.zeros(AGE_80_89Encoder.getWidth()) # AGE_90_99Bits = numpy.zeros(AGE_90_99Encoder.getWidth()) # DIST_1_7Bits = numpy.zeros(DIST_1_7Encoder.getWidth()) # DIST_8_14Bits = numpy.zeros(DIST_8_14Encoder.getWidth()) # DIST_15_21Bits = numpy.zeros(DIST_15_21Encoder.getWidth()) # DIST_22_28Bits = numpy.zeros(DIST_22_28Encoder.getWidth()) # DIST_29_35Bits = numpy.zeros(DIST_29_35Encoder.getWidth()) # DIST_36_42Bits = numpy.zeros(DIST_36_42Encoder.getWidth()) # DIST_43_49Bits = numpy.zeros(DIST_43_49Encoder.getWidth()) # DIST_50_56Bits = numpy.zeros(DIST_50_56Encoder.getWidth()) # DIST_57_63Bits = numpy.zeros(DIST_57_63Encoder.getWidth()) # DIST_64_70Bits = numpy.zeros(DIST_64_70Encoder.getWidth()) # Now we call the encoders to create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) CtEncoder.encodeIntoArray(Ct, CtBits) ZIP_10467Encoder.encodeIntoArray(ZIP_10467, ZIP_10467Bits) # ZIP_10462Encoder.encodeIntoArray(ZIP_10462, ZIP_10462Bits) # ZIP_10475Encoder.encodeIntoArray(ZIP_10475, ZIP_10475Bits) # ZIP_10466Encoder.encodeIntoArray(ZIP_10466, ZIP_10466Bits) # ZIP_10469Encoder.encodeIntoArray(ZIP_10469, ZIP_10469Bits) # DEPT_11Encoder.encodeIntoArray(DEPT_11, DEPT_11Bits) # DEPT_24Encoder.encodeIntoArray(DEPT_24, DEPT_24Bits) # DEPT_41Encoder.encodeIntoArray(DEPT_41, DEPT_41Bits) # DEPT_34Encoder.encodeIntoArray(DEPT_34, DEPT_34Bits) # DEPT_31Encoder.encodeIntoArray(DEPT_31, DEPT_31Bits) # DEPT_60Encoder.encodeIntoArray(DEPT_60, DEPT_60Bits) # AGE_0_9Encoder.encodeIntoArray(AGE_0_9, AGE_0_9Bits) # AGE_10_19Encoder.encodeIntoArray(AGE_10_19, AGE_10_19Bits) # AGE_20_29Encoder.encodeIntoArray(AGE_20_29, AGE_20_29Bits) # AGE_30_39Encoder.encodeIntoArray(AGE_30_39, AGE_30_39Bits) # AGE_40_49Encoder.encodeIntoArray(AGE_40_49, AGE_40_49Bits) # AGE_50_59Encoder.encodeIntoArray(AGE_50_59, AGE_50_59Bits) # AGE_60_69Encoder.encodeIntoArray(AGE_60_69, AGE_60_69Bits) # AGE_70_79Encoder.encodeIntoArray(AGE_70_79, AGE_70_79Bits) # AGE_80_89Encoder.encodeIntoArray(AGE_80_89, AGE_80_89Bits) # AGE_90_99Encoder.encodeIntoArray(AGE_90_99, AGE_90_99Bits) # DIST_1_7Encoder.encodeIntoArray(DIST_1_7, DIST_1_7Bits) # DIST_8_14Encoder.encodeIntoArray(DIST_8_14, DIST_8_14Bits) # DIST_15_21Encoder.encodeIntoArray(DIST_15_21, DIST_15_21Bits) # DIST_22_28Encoder.encodeIntoArray(DIST_22_28, DIST_22_28Bits) # DIST_29_35Encoder.encodeIntoArray(DIST_29_35, DIST_29_35Bits) # DIST_36_42Encoder.encodeIntoArray(DIST_36_42, DIST_36_42Bits) # DIST_43_49Encoder.encodeIntoArray(DIST_43_49, DIST_43_49Bits) # DIST_50_56Encoder.encodeIntoArray(DIST_50_56, DIST_50_56Bits) # DIST_57_63Encoder.encodeIntoArray(DIST_57_63, DIST_57_63Bits) # DIST_64_70Encoder.encodeIntoArray(DIST_64_70, DIST_64_70Bits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, CtBits, ZIP_10467Bits]) # encoding = numpy.concatenate( # [timeOfDayBits, weekendBits, CtBits, # ZIP_10467Bits, ZIP_10462Bits, ZIP_10475Bits, ZIP_10466Bits, ZIP_10469Bits, # DEPT_11Bits, DEPT_24Bits, DEPT_41Bits, DEPT_34Bits, DEPT_31Bits, # DEPT_60Bits, AGE_0_9Bits, AGE_10_19Bits, AGE_20_29Bits, AGE_30_39Bits, # AGE_40_49Bits, AGE_50_59Bits, AGE_60_69Bits, AGE_70_79Bits, AGE_80_89Bits, # AGE_90_99Bits, DIST_1_7Bits, DIST_8_14Bits, DIST_15_21Bits, DIST_22_28Bits, # DIST_29_35Bits, DIST_36_42Bits, DIST_43_49Bits, DIST_50_56Bits, DIST_57_63Bits, # DIST_64_70Bits]) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = CtEncoder.getBucketIndices(Ct)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute(recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": Ct }, learn=True, infer=True) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted(zip( classifierResult[1], classifierResult["actualValues"]), reverse=True)[0] # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) # results.append([oneStep, oneStepConfidence * 100, None, None]) results.append([record[0], Ct, oneStep, oneStepConfidence * 100]) output.write(record[0], Ct, oneStep, oneStepConfidence * 100) output.close() return results
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector): """The 'numenta' detector, with the following changes: - Use pure Temporal Memory, not the classic TP that uses backtracking. - Don't spatial pool the timestamp. Pass it in as distal input. - 1 cell per column. - Use w=41 in the scalar encoding, rather than w=21, to make up for the lost timestamp input to the spatial pooler. """ def __init__(self, *args, **kwargs): super(DistalTimestamps1CellPerColumnDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.activeExternalCells = [] self.prevActiveExternalCells = [] self.sp = None self.spOutput = None self.etm = None self.anomalyLikelihood = None def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, w=41, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) self.timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = self.valueEncoder.getWidth() self.sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.etm = ExtendedTemporalMemory( **{ "activationThreshold": 13, "cellsPerColumn": 1, "columnDimensions": (2048, ), "basalInputDimensions": (self.timestampEncoder.getWidth(), ), "initialPermanence": 0.21, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 32, "minThreshold": 10, "maxNewSynapseCount": 20, "permanenceDecrement": 0.1, "permanenceIncrement": 0.1, "seed": 1960, "checkInputs": False, }) learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" self.valueEncoder.encodeIntoArray(inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray(inputData["timestamp"], self.encodedTimestamp) self.prevActiveExternalCells = self.activeExternalCells self.activeExternalCells = self.encodedTimestamp.nonzero()[0] self.sp.compute(self.encodedValue, True, self.spOutput) activeColumns = self.spOutput.nonzero()[0] activeColumnsSet = set(activeColumns.tolist()) prevPredictedColumns = set( self.etm.columnForCell(cell) for cell in self.etm.getPredictiveCells()) rawScore = (len(activeColumnsSet - prevPredictedColumns) / float(len(activeColumns))) anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) self.etm.compute( activeColumns, activeCellsExternalBasal=self.activeExternalCells, reinforceCandidatesExternalBasal=self.prevActiveExternalCells, growthCandidatesExternalBasal=self.prevActiveExternalCells) return (logScore, rawScore)
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + scalarEncoder.getWidth()) sp = SpatialPooler( # How large the input encoding will be. inputDimensions=(encodingWidth), # How many mini-columns will be in the Spatial Pooler. columnDimensions=(spParams["columnCount"]), # What percent of the columns"s receptive field is available for potential # synapses? potentialPct=spParams["potentialPct"], # This means that the input space has no topology. globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], # Roughly 2%, giving that there is only one inhibition area because we have # turned on globalInhibition (40 / 2048 = 0.0195) numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], # How quickly synapses grow and degrade. synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], # boostStrength controls the strength of boosting. Boosting encourages # efficient usage of SP columns. boostStrength=spParams["boostStrength"], # Random number generator seed. seed=spParams["seed"], # TODO: is this useful? # Determines if inputs at the beginning and end of an input dimension should # be considered neighbors when mapping columns to inputs. wrapAround=False ) tm = TemporalMemory( # Must be the same dimensions as the SP columnDimensions=(tmParams["columnCount"],), # How many cells in each mini-column. cellsPerColumn=tmParams["cellsPerColumn"], # A segment is active if it has >= activationThreshold connected synapses # that are active due to infActiveState activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], # TODO: This comes from the SP params, is this normal connectedPermanence=spParams["synPermConnected"], # Minimum number of active synapses for a segment to be considered during # search for the best-matching segments. minThreshold=tmParams["minThreshold"], # The max number of synapses added to a segment during learning maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) results.append([oneStep, oneStepConfidence * 100, None, None]) return results