def _createRDSE(self, min_val=0, max_val=0): scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = self.parameters["enc"]["value"]["size"] scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"] scalarEncoderParams.resolution = max(0.001, (max_val - min_val) / 130) scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"] return RDSE(scalarEncoderParams)
def __init__(self, world, parameters=default_parameters): self.world = world self.area_size = parameters['num_cells'] self.num_areas = len(self.world.coordinates) # Make an RDSE for every location. self.enc = np.zeros(self.world.dims, dtype=object) enc_parameters = RDSE_Parameters() enc_parameters.size = self.area_size enc_parameters.sparsity = parameters['local_sparsity'] enc_parameters.category = True for coords in self.world.coordinates: self.enc[coords] = RDSE(enc_parameters) # Make empty buffers for the working data. self.local = np.zeros(self.world.dims, dtype=object) self.gnw = np.zeros(self.world.dims, dtype=object) for coords in self.world.coordinates: self.local[coords] = SDR((self.area_size, )) self.gnw[coords] = SDR((self.area_size, )) # Make an instance of the model at every location. self.apical_denrites = np.zeros(self.world.dims, dtype=object) self.gnw_size = self.num_areas * self.area_size for coords in self.world.coordinates: self.apical_denrites[coords] = TemporalMemory( [self.area_size], # column_dimensions cellsPerColumn=1, externalPredictiveInputs=self.gnw_size, seed=0, **parameters['apical_denrites'])
def create_encoder(self): print("creating encoder...") print(self.setting("enc")) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = self.setting("enc").size scalarEncoderParams.sparsity = self.setting("enc").sparsity scalarEncoderParams.resolution = self.setting("enc").resolution scalarEncoder = RDSE(scalarEncoderParams) print() return scalarEncoder
def sacler_data_randonscaler_method_1(): pooler_data = [] data = pickle.load(open('data/x_hat_v3.pkl', mode='rb')) col1 = data[:, 0:1].flatten() col2 = data[:, 1:2].flatten() col3 = data[:, 2:3].flatten() col4 = data[:, 3:4].flatten() parameter1 = RDSE_Parameters() parameter1.size = 2000 parameter1.sparsity = 0.02 parameter1.resolution = 0.66 rsc1 = RDSE(parameter1) # Create SDR for 3D TSNE input plus one for magnitude for 128 D original vector. # Loop through all vectors one at the time # to create SDe for SP. for _x1, _x2, _x3, _x4 in zip(col1, col2, col3, col4): x_x1 = rsc1.encode(_x1) x_x2 = rsc1.encode(_x2) x_x3 = rsc1.encode(_x3) x_x4 = rsc1.encode(_x4) pooler_data.append(SDR(8000).concatenate([x_x1, x_x2, x_x3, x_x4])) return pooler_data
def SystemSetup(parameters, verbose=True): global agent, sensorEncoder, env, sensorLayer_sp, sensorLayer_SDR_columns global gridCellEncoder, locationlayer_SDR_cells global sensorLayer_tm if verbose: import pprint print("Parameters:") pprint.pprint(parameters, indent=4) print("") # create environment and the agent env = htm2d.environment.TwoDimensionalEnvironment(20, 20) agent = htm2d.agent.Agent() # load object from yml file with open(os.path.join(_OBJECTS_DIR, OBJECT_FILENAME), "r") as stream: try: env.load_object(stream) except yaml.YAMLError as exc: print(exc) # SENSOR LAYER -------------------------------------------------------------- # setup sensor encoder sensorEncoderParams = RDSE_Parameters() sensorEncoderParams.category = True sensorEncoderParams.size = parameters["enc"]["size"] sensorEncoderParams.sparsity = parameters["enc"]["sparsity"] sensorEncoderParams.seed = parameters["enc"]["seed"] sensorEncoder = RDSE(sensorEncoderParams) # Create SpatialPooler spParams = parameters["sensorLayer_sp"] sensorLayer_sp = SpatialPooler( inputDimensions=(sensorEncoder.size, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=sensorEncoder.size, globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True, ) sp_info = Metrics(sensorLayer_sp.getColumnDimensions(), 999999999) # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. sensorLayer_SDR_columns = SDR(spParams["columnCount"]) # LOCATION LAYER ------------------------------------------------------------ # Grid cell modules locParams = parameters["locationLayer"] gridCellEncoder = GridCellEncoder( size=locParams["cellCount"], sparsity=locParams["sparsity"], periods=locParams["periods"], seed=locParams["seed"], ) locationlayer_SDR_cells = SDR(gridCellEncoder.dimensions) tmParams = parameters["sensorLayer_tm"] sensorLayer_tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], externalPredictiveInputs=locParams["cellCount"], ) tm_info = Metrics([sensorLayer_tm.numberOfCells()], 999999999)
def building_htm(len_data): global enc_info global sp_info global tm_info global anomaly_history global predictor global predictor_resolution global tm global sp global scalarEncoder global encodingWidth global dateEncoder # Initial message print("Building HTM for predicting trends...") # Default parameters in HTM default_parameters = { # There are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) 'enc': { "value": { 'resolution': 0.88, 'size': 700, 'sparsity': 0.02 }, "time": { 'timeOfDay': (30, 1) } #, 'weekend': 21} }, 'predictor': { 'sdrc_alpha': 0.1 }, 'sp': { 'boostStrength': 3.0, 'columnCount': 1638, 'localAreaDensity': 0.04395604395604396, 'potentialPct': 0.85, 'synPermActiveInc': 0.04, 'synPermConnected': 0.13999999999999999, 'synPermInactiveDec': 0.006 }, 'tm': { 'activationThreshold': 17, 'cellsPerColumn': 13, 'initialPerm': 0.21, 'maxSegmentsPerCell': 128, 'maxSynapsesPerSegment': 64, 'minThreshold': 10, 'newSynapseCount': 32, 'permanenceDec': 0.1, 'permanenceInc': 0.1 }, 'anomaly': { 'likelihood': { 'probationaryPct': 0.1, 'reestimationPeriod': 100 } } } # Make the encoder print("- Make the encoder") dateEncoder = DateEncoder( timeOfDay=default_parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = default_parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = default_parameters["enc"]["value"][ "sparsity"] scalarEncoderParams.resolution = default_parameters["enc"]["value"][ "resolution"] scalarEncoder = RDSE(scalarEncoderParams) encodingWidth = (dateEncoder.size + scalarEncoder.size) enc_info = Metrics([encodingWidth], 999999999) # Make the SP print("- Make the SP") spParams = default_parameters["sp"] sp = SpatialPooler(inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) sp_info = Metrics(sp.getColumnDimensions(), 999999999) # Temporal Memory Parameters print("- Make the TM") tmParams = default_parameters["tm"] tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) tm_info = Metrics([tm.numberOfCells()], 999999999) # Setup Likelihood print("- Make Anomaly Score/Likelihood") anParams = default_parameters["anomaly"]["likelihood"] probationaryPeriod = int( math.floor(float(anParams["probationaryPct"]) * len_data)) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) # Make predictor print("- Make the predictor") predictor = Predictor(steps=[1, 5], alpha=default_parameters["predictor"]['sdrc_alpha']) predictor_resolution = 1 # End message print("- Finish the building of HTM!")
class UnivHTMDetector(object): """ This detector uses an HTM based anomaly detection technique. """ def __init__(self, name, probationaryPeriod, smoothingKernelSize, htmParams=None, verbose=False): self.useSpatialAnomaly = True self.verbose = verbose self.name = name # for logging self.probationaryPeriod = probationaryPeriod self.parameters = parameters_best self.minVal = None self.maxVal = None self.spatial_tolerance = None self.encTimestamp = None self.encValue = None self.sp = None self.tm = None self.anomalyLikelihood = None # optional debug info self.enc_info = None self.sp_info = None self.tm_info = None # for initialization self.init_data = [] self.is_initialized = False self.iteration_ = 0 # for smoothing with gaussian self.historic_raw_anomaly_scores = deque(maxlen=smoothingKernelSize) self.kernel = None self.learningPeriod = None def initialize(self, input_min=0, input_max=0): # setup spatial anomaly if self.useSpatialAnomaly: self.spatial_tolerance = self.parameters["spatial_tolerance"] ## setup Enc, SP, TM # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = self.parameters["enc"]["value"]["size"] scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"] scalarEncoderParams.resolution = max(0.001, (input_max - input_min) / 130) scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"] self.encValue = RDSE(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = self.parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnDimensions"],), potentialRadius=encodingWidth, potentialPct=spParams["potentialPct"], globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=spParams["wrapAround"], minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"], dutyCyclePeriod=spParams["dutyCyclePeriod"], seed=spParams["seed"], ) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = self.parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnDimensions"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPermanence"], connectedPermanence=tmParams["connectedPermanence"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["maxNewSynapseCount"], permanenceIncrement=tmParams["permanenceIncrement"], permanenceDecrement=tmParams["permanenceDecrement"], predictedSegmentDecrement=tmParams["predictedSegmentDecrement"], maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) anParams = self.parameters["anomaly"]["likelihood"] self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=self.learningPeriod, estimationSamples=self.probationaryPeriod - self.learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) self.kernel = self._gauss_kernel(self.historic_raw_anomaly_scores.maxlen, self.historic_raw_anomaly_scores.maxlen) def modelRun(self, ts, val): """ Run a single pass through HTM model @config ts - Timestamp @config val - float input value @return rawAnomalyScore computed for the `val` in this step """ self.iteration_ += 1 # 0. During the probation period, gather the data and return 0.01. if self.iteration_ <= self.probationaryPeriod: self.init_data.append((ts, val)) return 0.01 if self.is_initialized is False: if self.verbose: print("[{}] Initializing".format(self.name)) temp_iteration = self.iteration_ vals = [i[1] for i in self.init_data] self.initialize(input_min=min(vals), input_max=max(vals)) self.is_initialized = True for ts, val in self.init_data: self.modelRun(ts, val) self.iteration_ = temp_iteration if self.verbose: print("[{}] Initialization done".format(self.name)) ## run data through our model pipeline: enc -> SP -> TM -> Anomaly # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) valueBits = self.encValue.encode(float(val)) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR(self.encTimestamp.size + self.encValue.size).concatenate([valueBits, dateBits]) self.enc_info.addData(encoding) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR(self.sp.getColumnDimensions()) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) self.sp_info.addData(activeColumns) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. self.tm.compute(activeColumns, learn=True) self.tm_info.addData(self.tm.getActiveCells().flatten()) # 4. Anomaly # handle spatial, contextual (raw, likelihood) anomalies # -Spatial spatialAnomaly = 0.0 if self.useSpatialAnomaly: # Update min/max values and check if there is a spatial anomaly if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * self.spatial_tolerance maxExpected = self.maxVal + tolerance minExpected = self.minVal - tolerance if val > maxExpected or val < minExpected: spatialAnomaly = 1.0 if self.maxVal is None or val > self.maxVal: self.maxVal = val if self.minVal is None or val < self.minVal: self.minVal = val # -Temporal raw = self.tm.anomaly like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) temporalAnomaly = logScore anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB # 5. Apply smoothing self.historic_raw_anomaly_scores.append(anomalyScore) historic_scores = np.asarray(self.historic_raw_anomaly_scores) convolved = np.convolve(historic_scores, self.kernel, 'valid') anomalyScore = convolved[-1] return anomalyScore @staticmethod def estimateNormal(sampleData, performLowerBoundCheck=True): """ :param sampleData: :type sampleData: Numpy array. :param performLowerBoundCheck: :type performLowerBoundCheck: bool :returns: A dict containing the parameters of a normal distribution based on the ``sampleData``. """ mean = np.mean(sampleData) variance = np.var(sampleData) st_dev = 0 if performLowerBoundCheck: # Handle edge case of almost no deviations and super low anomaly scores. We # find that such low anomaly means can happen, but then the slightest blip # of anomaly score can cause the likelihood to jump up to red. if mean < 0.03: mean = 0.03 # Catch all for super low variance to handle numerical precision issues if variance < 0.0003: variance = 0.0003 # Compute standard deviation if variance > 0: st_dev = math.sqrt(variance) return mean, variance, st_dev @staticmethod def _calcSkipRecords(numIngested, windowSize, learningPeriod): """Return the value of skipRecords for passing to estimateAnomalyLikelihoods If `windowSize` is very large (bigger than the amount of data) then this could just return `learningPeriod`. But when some values have fallen out of the historical sliding window of anomaly records, then we have to take those into account as well so we return the `learningPeriod` minus the number shifted out. :param numIngested - (int) number of data points that have been added to the sliding window of historical data points. :param windowSize - (int) size of sliding window of historical data points. :param learningPeriod - (int) the number of iterations required for the algorithm to learn the basic patterns in the dataset and for the anomaly score to 'settle down'. """ numShiftedOut = max(0, numIngested - windowSize) return min(numIngested, max(0, learningPeriod - numShiftedOut)) @staticmethod def _gauss_kernel(std, size): def _norm_pdf(x, mean, sd): var = float(sd) ** 2 denom = (2 * math.pi * var) ** .5 num = math.exp(-(float(x) - float(mean)) ** 2 / (2 * var)) return num / denom kernel = [2 * _norm_pdf(idx, 0, std) for idx in list(range(-size + 1, 1))] kernel = np.array(kernel) kernel = np.flip(kernel) kernel = kernel / sum(kernel) return kernel
def initialize(self, input_min=0, input_max=0): # setup spatial anomaly if self.useSpatialAnomaly: self.spatial_tolerance = self.parameters["spatial_tolerance"] ## setup Enc, SP, TM # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = self.parameters["enc"]["value"]["size"] scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"] scalarEncoderParams.resolution = max(0.001, (input_max - input_min) / 130) scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"] self.encValue = RDSE(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = self.parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnDimensions"],), potentialRadius=encodingWidth, potentialPct=spParams["potentialPct"], globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=spParams["wrapAround"], minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"], dutyCyclePeriod=spParams["dutyCyclePeriod"], seed=spParams["seed"], ) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = self.parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnDimensions"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPermanence"], connectedPermanence=tmParams["connectedPermanence"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["maxNewSynapseCount"], permanenceIncrement=tmParams["permanenceIncrement"], permanenceDecrement=tmParams["permanenceDecrement"], predictedSegmentDecrement=tmParams["predictedSegmentDecrement"], maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) anParams = self.parameters["anomaly"]["likelihood"] self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=self.learningPeriod, estimationSamples=self.probationaryPeriod - self.learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) self.kernel = self._gauss_kernel(self.historic_raw_anomaly_scores.maxlen, self.historic_raw_anomaly_scores.maxlen)
def initialize(self): # toggle parameters here if self.use_optimization: parameters = get_params('params.json') else: parameters = parameters_numenta_comparable # setup spatial anomaly if self.useSpatialAnomaly: # Keep track of value range for spatial anomaly detection self.minVal = None self.maxVal = None ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] self.encValue = RDSE( scalarEncoderParams ) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 ) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod= learningPeriod, estimationSamples= self.probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) # Predictor # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) # predictor_resolution = 1 # initialize pandaBaker if PANDA_VIS_BAKE_DATA: self.BuildPandaSystem(self.sp, self.tm, parameters["enc"]["value"]["size"], self.encTimestamp.size)
class HtmcoreDetector(AnomalyDetector): """ This detector uses an HTM based anomaly detection technique. """ def __init__(self, *args, **kwargs): super(HtmcoreDetector, self).__init__(*args, **kwargs) ## API for controlling settings of htm.core HTM detector: # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True self.useSpatialAnomaly = True self.verbose = True # Set this to true if you want to use the optimization. # If true, it reads the parameters from ./params.json # If false, it reads the parameters from ./best_params.json self.use_optimization = False ## internal members # (listed here for easier understanding) # initialized in `initialize()` self.encTimestamp = None self.encValue = None self.sp = None self.tm = None self.anLike = None # optional debug info self.enc_info = None self.sp_info = None self.tm_info = None # internal helper variables: self.inputs_ = [] self.iteration_ = 0 def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] #TODO optional: add "prediction" def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore). @param inputData is a dict {"timestamp" : Timestamp(), "value" : float} @return tuple (anomalyScore, <any other fields specified in `getAdditionalHeaders()`>, ...) """ # Send it to Numenta detector and get back the results return self.modelRun(inputData["timestamp"], inputData["value"]) def initialize(self): # toggle parameters here if self.use_optimization: parameters = get_params('params.json') else: parameters = parameters_numenta_comparable # setup spatial anomaly if self.useSpatialAnomaly: # Keep track of value range for spatial anomaly detection self.minVal = None self.maxVal = None ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] self.encValue = RDSE( scalarEncoderParams ) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 ) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod= learningPeriod, estimationSamples= self.probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) # Predictor # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) # predictor_resolution = 1 # initialize pandaBaker if PANDA_VIS_BAKE_DATA: self.BuildPandaSystem(self.sp, self.tm, parameters["enc"]["value"]["size"], self.encTimestamp.size) def modelRun(self, ts, val): """ Run a single pass through HTM model @params ts - Timestamp @params val - float input value @return rawAnomalyScore computed for the `val` in this step """ ## run data through our model pipeline: enc -> SP -> TM -> Anomaly self.inputs_.append( val ) self.iteration_ += 1 # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) valueBits = self.encValue.encode(float(val)) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR( self.encTimestamp.size + self.encValue.size ).concatenate([valueBits, dateBits]) self.enc_info.addData( encoding ) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR( self.sp.getColumnDimensions() ) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) self.sp_info.addData( activeColumns ) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. # to get predictive cells we need to call activateDendrites & activateCells separately if PANDA_VIS_BAKE_DATA: # activateDendrites calculates active segments self.tm.activateDendrites(learn=True) # predictive cells are calculated directly from active segments predictiveCells = self.tm.getPredictiveCells() # activates cells in columns by TM algorithm (winners, bursting...) self.tm.activateCells(activeColumns, learn=True) else: self.tm.compute(activeColumns, learn=True) self.tm_info.addData( self.tm.getActiveCells().flatten() ) # 4.1 (optional) Predictor #TODO optional #TODO optional: also return an error metric on predictions (RMSE, R2,...) # 4.2 Anomaly # handle spatial, contextual (raw, likelihood) anomalies # -Spatial spatialAnomaly = 0.0 #TODO optional: make this computed in SP (and later improve) if self.useSpatialAnomaly: # Update min/max values and check if there is a spatial anomaly if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE maxExpected = self.maxVal + tolerance minExpected = self.minVal - tolerance if val > maxExpected or val < minExpected: spatialAnomaly = 1.0 if self.maxVal is None or val > self.maxVal: self.maxVal = val if self.minVal is None or val < self.minVal: self.minVal = val # -temporal (raw) raw= self.tm.anomaly temporalAnomaly = raw if self.useLikelihood: # Compute log(anomaly likelihood) like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) temporalAnomaly = logScore #TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB # 5. print stats if self.verbose and self.iteration_ % 1000 == 0: # print(self.enc_info) # print(self.sp_info) # print(self.tm_info) pass # 6. panda vis if PANDA_VIS_BAKE_DATA: # ------------------HTMpandaVis---------------------- # see more about this structure at https://github.com/htm-community/HTMpandaVis/blob/master/pandaBaker/README.md # fill up values pandaBaker.inputs["Value"].stringValue = "value: {:.2f}".format(val) pandaBaker.inputs["Value"].bits = valueBits.sparse pandaBaker.inputs["TimeOfDay"].stringValue = str(ts) pandaBaker.inputs["TimeOfDay"].bits = dateBits.sparse pandaBaker.layers["Layer1"].activeColumns = activeColumns.sparse pandaBaker.layers["Layer1"].winnerCells = self.tm.getWinnerCells().sparse pandaBaker.layers["Layer1"].predictiveCells = predictiveCells.sparse pandaBaker.layers["Layer1"].activeCells = self.tm.getActiveCells().sparse # customizable datastreams to be show on the DASH PLOTS pandaBaker.dataStreams["rawAnomaly"].value = temporalAnomaly pandaBaker.dataStreams["value"].value = val pandaBaker.dataStreams["numberOfWinnerCells"].value = len(self.tm.getWinnerCells().sparse) pandaBaker.dataStreams["numberOfPredictiveCells"].value = len(predictiveCells.sparse) pandaBaker.dataStreams["valueInput_sparsity"].value = valueBits.getSparsity() pandaBaker.dataStreams["dateInput_sparsity"].value = dateBits.getSparsity() pandaBaker.dataStreams["Layer1_SP_overlap_metric"].value = self.sp_info.overlap.overlap pandaBaker.dataStreams["Layer1_TM_overlap_metric"].value = self.sp_info.overlap.overlap pandaBaker.dataStreams["Layer1_SP_activation_frequency"].value = self.sp_info.activationFrequency.mean() pandaBaker.dataStreams["Layer1_TM_activation_frequency"].value = self.tm_info.activationFrequency.mean() pandaBaker.dataStreams["Layer1_SP_entropy"].value = self.sp_info.activationFrequency.mean() pandaBaker.dataStreams["Layer1_TM_entropy"].value = self.tm_info.activationFrequency.mean() pandaBaker.StoreIteration(self.iteration_-1) print("ITERATION: " + str(self.iteration_-1)) # ------------------HTMpandaVis---------------------- return (anomalyScore, raw) # with this method, the structure for visualization is defined def BuildPandaSystem(self, sp, tm, consumptionBits_size, dateBits_size): # we have two inputs connected to proximal synapses of Layer1 pandaBaker.inputs["Value"] = cInput(consumptionBits_size) pandaBaker.inputs["TimeOfDay"] = cInput(dateBits_size) pandaBaker.layers["Layer1"] = cLayer(sp, tm) # Layer1 has Spatial Pooler & Temporal Memory pandaBaker.layers["Layer1"].proximalInputs = [ "Value", "TimeOfDay", ] pandaBaker.layers["Layer1"].distalInputs = ["Layer1"] # data for dash plots streams = ["rawAnomaly", "value", "numberOfWinnerCells", "numberOfPredictiveCells", "valueInput_sparsity", "dateInput_sparsity", "Layer1_SP_overlap_metric", "Layer1_TM_overlap_metric", "Layer1_SP_activation_frequency", "Layer1_TM_activation_frequency", "Layer1_SP_entropy", "Layer1_TM_entropy" ] pandaBaker.dataStreams = dict((name, cDataStream()) for name in streams) # create dicts for more comfortable code # could be also written like: pandaBaker.dataStreams["myStreamName"] = cDataStream() pandaBaker.PrepareDatabase()
def main(parameters=default_parameters, argv=None, verbose=True): if verbose: import pprint print("Parameters:") pprint.pprint(parameters, indent=4) print("") # Read the input file. records = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = next(reader) next(reader) next(reader) for record in reader: records.append(record) # Make the Encoders. These will convert input data into binary representations. dateEncoder = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] scalarEncoder = RDSE( scalarEncoderParams ) encodingWidth = (dateEncoder.size + scalarEncoder.size) enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. spParams = parameters["sp"] sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) sp_info = Metrics( sp.getColumnDimensions(), 999999999 ) tmParams = parameters["tm"] tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) tm_info = Metrics( [tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB anParams = parameters["anomaly"]["likelihood"] probationaryPeriod = int(math.floor(float(anParams["probationaryPct"])*len(records))) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood(learningPeriod= learningPeriod, estimationSamples= probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) predictor_resolution = 1 # Iterate through every datum in the dataset, record the inputs & outputs. inputs = [] anomaly = [] anomalyProb = [] predictions = {1: [], 5: []} for count, record in enumerate(records): # Convert date string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) inputs.append( consumption ) # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = dateEncoder.encode(dateString) consumptionBits = scalarEncoder.encode(consumption) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR( encodingWidth ).concatenate([consumptionBits, dateBits]) enc_info.addData( encoding ) # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR( sp.getColumnDimensions() ) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) sp_info.addData( activeColumns ) # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumns, learn=True) tm_info.addData( tm.getActiveCells().flatten() ) # Predict what will happen, and then train the predictor based on what just happened. pdf = predictor.infer( count, tm.getActiveCells() ) for n in (1, 5): if pdf[n]: predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution ) else: predictions[n].append(float('nan')) predictor.learn( count, tm.getActiveCells(), int(consumption / predictor_resolution)) anomalyLikelihood = anomaly_history.anomalyProbability( consumption, tm.anomaly ) anomaly.append( tm.anomaly ) anomalyProb.append( anomalyLikelihood ) # Print information & statistics about the state of the HTM. print("Encoded Input", enc_info) print("") print("Spatial Pooler Mini-Columns", sp_info) print(str(sp)) print("") print("Temporal Memory Cells", tm_info) print(str(tm)) print("") # Shift the predictions so that they are aligned with the input they predict. for n_steps, pred_list in predictions.items(): for x in range(n_steps): pred_list.insert(0, float('nan')) pred_list.pop() # Calculate the predictive accuracy, Root-Mean-Squared accuracy = {1: 0, 5: 0} accuracy_samples = {1: 0, 5: 0} for idx, inp in enumerate(inputs): for n in predictions: # For each [N]umber of time steps ahead which was predicted. val = predictions[n][ idx ] if not math.isnan(val): accuracy[n] += (inp - val) ** 2 accuracy_samples[n] += 1 for n in sorted(predictions): accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5 print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n]) # Show info about the anomaly (mean & std) print("Anomaly Mean", np.mean(anomaly)) print("Anomaly Std ", np.std(anomaly)) # Plot the Predictions and Anomalies. if verbose: try: import matplotlib.pyplot as plt except: print("WARNING: failed to import matplotlib, plots cannot be shown.") return -accuracy[5] plt.subplot(2,1,1) plt.title("Predictions") plt.xlabel("Time") plt.ylabel("Power Consumption") plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), predictions[1], 'blue', np.arange(len(inputs)), predictions[5], 'green',) plt.legend(labels=('Input', '1 Step Prediction, Shifted 1 step', '5 Step Prediction, Shifted 5 steps')) plt.subplot(2,1,2) plt.title("Anomaly Score") plt.xlabel("Time") plt.ylabel("Power Consumption") inputs = np.array(inputs) / max(inputs) plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), anomaly, 'blue',) plt.legend(labels=('Input', 'Anomaly Score')) plt.show() return -accuracy[5]
from htm.encoders.date import DateEncoder from htm.bindings.algorithms import SpatialPooler from htm.bindings.algorithms import TemporalMemory from htm.algorithms.anomaly_likelihood import AnomalyLikelihood from htm.bindings.algorithms import Predictor import matplotlib import matplotlib.pyplot as plt dateEncoder = DateEncoder(timeOfDay=(30, 1), weekend=21) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = 700 scalarEncoderParams.sparsity = 0.02 scalarEncoderParams.resolution = 0.88 scalarEncoder = RDSE(scalarEncoderParams) encodingWidth = (dateEncoder.size + scalarEncoder.size) sp = SpatialPooler(inputDimensions=(encodingWidth, ), columnDimensions=(1638, ), potentialPct=0.85, potentialRadius=encodingWidth, globalInhibition=True, localAreaDensity=0.04395604395604396, synPermInactiveDec=0.006, synPermActiveInc=0.04, synPermConnected=0.13999999999999999, boostStrength=3.0, wrapAround=True) tm = TemporalMemory(
class Experiment: def __init__(self): self.anomalyHistData = [] self.iterationNo = 0 self.enc_info = None self.sp_info = None self.tm_info = None def SystemSetup(self, verbose=True): if verbose: import pprint print("Parameters:") pprint.pprint(parameters, indent=4) print("") # create environment and the agent self.env = htm2d.environment.TwoDimensionalEnvironment(20, 20) self.agent = htm2d.agent.Agent() # load object from yml file with open(os.path.join(_OBJECTS_DIR, OBJECT_FILENAME), "r") as stream: try: self.env.load_object(stream) except yaml.YAMLError as exc: print(exc) # SENSOR LAYER -------------------------------------------------------------- # setup sensor encoder sensorEncoderParams = RDSE_Parameters() sensorEncoderParams.category = True sensorEncoderParams.size = parameters["enc"]["size"] sensorEncoderParams.sparsity = parameters["enc"]["sparsity"] sensorEncoderParams.seed = parameters["enc"]["seed"] self.sensorEncoder = RDSE(sensorEncoderParams) # Create SpatialPooler self.sensorLayer_sp = SpatialPooler( inputDimensions=(self.sensorEncoder.size, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=self.sensorEncoder.size, globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True, ) self.sp_info = Metrics(self.sensorLayer_sp.getColumnDimensions(), 999999999) # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. self.sensorLayer_SDR_columns = SDR(spParams["columnCount"]) # LOCATION LAYER ------------------------------------------------------------ # Grid cell modules self.gridCellEncoder = GridCellEncoder( size=locParams["cellCount"], sparsity=locParams["sparsity"], periods=locParams["periods"], seed=locParams["seed"], ) self.locationlayer_SDR_cells = SDR(self.gridCellEncoder.dimensions) initParams = { "columnCount": spParams["columnCount"], "cellsPerColumn": tmParams["cellsPerColumn"], "basalInputSize": locParams["cellCount"], "activationThreshold": tmParams["activationThreshold"], "reducedBasalThreshold": 13, "initialPermanence": tmParams["initialPerm"], "connectedPermanence": spParams["synPermConnected"], "minThreshold": tmParams["minThreshold"], "sampleSize": 20, "permanenceIncrement": tmParams["permanenceInc"], "permanenceDecrement": tmParams["permanenceDec"], "basalPredictedSegmentDecrement": 0.0, "apicalPredictedSegmentDecrement": 0.0, "maxSynapsesPerSegment": tmParams["maxSynapsesPerSegment"] } self.sensoryLayer_tm = ApicalTiebreakPairMemory(**initParams) # self.sensoryLayer_tm = TemporalMemory( # columnDimensions=(spParams["columnCount"],), # cellsPerColumn=tmParams["cellsPerColumn"], # activationThreshold=tmParams["activationThreshold"], # initialPermanence=tmParams["initialPerm"], # connectedPermanence=spParams["synPermConnected"], # minThreshold=tmParams["minThreshold"], # maxNewSynapseCount=tmParams["newSynapseCount"], # permanenceIncrement=tmParams["permanenceInc"], # permanenceDecrement=tmParams["permanenceDec"], # predictedSegmentDecrement=0.0, # maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], # maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], # externalPredictiveInputs=locParams["cellCount"], # ) self.tm_info = Metrics([self.sensoryLayer_tm.numberOfCells()], 999999999) def CellsToColumns(self, cells, cellsPerColumn, columnsCount): array = [] for cell in cells.sparse: col = int(cell / cellsPerColumn) if col not in array: #each column max once array += [col] columns = SDR(columnsCount) columns.sparse = array return columns def SystemCalculate(self, feature, learning): global fig_environment, fig_graphs # ENCODE DATA TO SDR-------------------------------------------------- # Convert sensed feature to int self.sensedFeature = 1 if feature == "X" else 0 self.sensorSDR = self.sensorEncoder.encode(self.sensedFeature) # ACTIVATE COLUMNS IN SENSORY LAYER ---------------------------------- # Execute Spatial Pooling algorithm on Sensory Layer with sensorSDR as proximal input self.sensorLayer_sp.compute(self.sensorSDR, learning, self.sensorLayer_SDR_columns) # SIMULATE LOCATION LAYER -------------------------------------------- # Execute Location Layer - it is just GC encoder self.gridCellEncoder.encode(self.agent.get_position(), self.locationlayer_SDR_cells) # # Execute Temporal memory algorithm over the Sensory Layer, with mix of # Location Layer activity and Sensory Layer activity as distal input externalDistalInput = self.locationlayer_SDR_cells tm_input = { "activeColumns": self.sensorLayer_SDR_columns.sparse, "basalInput": externalDistalInput.sparse, "basalGrowthCandidates": None, "learn": learning } self.sensoryLayer_tm.compute(**tm_input) #self.sensoryLayer_tm.activateCells(self.sensorLayer_SDR_columns, learning) # activateDendrites calculates active segments #self.sensoryLayer_tm.activateDendrites(learn=learning, externalPredictiveInputsActive=externalDistalInput, #externalPredictiveInputsWinners=externalDistalInput) # predictive cells are calculated directly from active segments self.predictiveCellsSDR = SDR(spParams["columnCount"] * tmParams["cellsPerColumn"]) self.predictiveCellsSDR.sparse = self.sensoryLayer_tm.predictedCells if self.iterationNo != 0: # and calculate anomaly - compare how much of active columns had some predictive cells self.rawAnomaly = Anomaly.calculateRawAnomaly( self.sensorLayer_SDR_columns, self.CellsToColumns( self.predictiveCellsSDR, parameters["sensoryLayer_tm"]["cellsPerColumn"], parameters["sensoryLayer_sp"]["columnCount"])) else: self.rawAnomaly = 0 # PANDA VIS if PANDA_VIS_BAKE_DATA: # ------------------HTMpandaVis---------------------- # fill up values pandaBaker.inputs[ "FeatureSensor"].stringValue = "Feature: {:.2f}".format( self.sensedFeature) pandaBaker.inputs["FeatureSensor"].bits = self.sensorSDR.sparse pandaBaker.inputs["LocationLayer"].stringValue = str( self.agent.get_position()) pandaBaker.inputs[ "LocationLayer"].bits = self.locationlayer_SDR_cells.sparse pandaBaker.layers[ "SensoryLayer"].activeColumns = self.sensorLayer_SDR_columns.sparse pandaBaker.layers[ "SensoryLayer"].winnerCells = self.sensoryLayer_tm.getWinnerCells( ) pandaBaker.layers[ "SensoryLayer"].predictiveCells = self.predictiveCellsSDR.sparse pandaBaker.layers[ "SensoryLayer"].activeCells = self.sensoryLayer_tm.getActiveCells( ) # customizable datastreams to be show on the DASH PLOTS pandaBaker.dataStreams["rawAnomaly"].value = self.rawAnomaly pandaBaker.dataStreams["numberOfWinnerCells"].value = len( self.sensoryLayer_tm.getWinnerCells()) pandaBaker.dataStreams["numberOfPredictiveCells"].value = len( self.predictiveCellsSDR.sparse) pandaBaker.dataStreams[ "sensor_sparsity"].value = self.sensorSDR.getSparsity() * 100 pandaBaker.dataStreams[ "location_sparsity"].value = self.locationlayer_SDR_cells.getSparsity( ) * 100 pandaBaker.dataStreams[ "SensoryLayer_SP_overlap_metric"].value = self.sp_info.overlap.overlap pandaBaker.dataStreams[ "SensoryLayer_TM_overlap_metric"].value = self.sp_info.overlap.overlap pandaBaker.dataStreams[ "SensoryLayer_SP_activation_frequency"].value = self.sp_info.activationFrequency.mean( ) pandaBaker.dataStreams[ "SensoryLayer_TM_activation_frequency"].value = self.tm_info.activationFrequency.mean( ) pandaBaker.dataStreams[ "SensoryLayer_SP_entropy"].value = self.sp_info.activationFrequency.mean( ) pandaBaker.dataStreams[ "SensoryLayer_TM_entropy"].value = self.tm_info.activationFrequency.mean( ) pandaBaker.StoreIteration(self.iterationNo) # ------------------HTMpandaVis---------------------- print("Position:" + str(self.agent.get_position())) print("Feature:" + str(self.sensedFeature)) print("Anomaly score:" + str(self.rawAnomaly)) self.anomalyHistData += [self.rawAnomaly] if PLOT_ENV: # Plotting and visualising environment------------------------------------------- if ( fig_environment == None or isNotebook() ): # create figure only if it doesn't exist yet or we are in interactive console fig_environment, _ = plt.subplots(nrows=1, ncols=1, figsize=(6, 4)) else: fig_environment.axes[0].clear() plotEnvironment(fig_environment.axes[0], "Environment", self.env, self.agent.get_position()) fig_environment.canvas.draw() plt.show(block=False) plt.pause(0.001) # delay is needed for proper redraw self.iterationNo += 1 if PLOT_GRAPHS: # --------------------------- if ( fig_graphs == None or isNotebook() ): # create figure only if it doesn't exist yet or we are in interactive console fig_graphs, _ = plt.subplots(nrows=1, ncols=1, figsize=(5, 2)) else: fig_graphs.axes[0].clear() fig_graphs.axes[0].set_title("Anomaly score") fig_graphs.axes[0].plot(self.anomalyHistData) fig_graphs.canvas.draw() #if agent.get_position() != [3, 4]: # HACK ALERT! Ignore at this pos (after reset) # anomalyHistData += [sensoryLayer_tm.anomaly] def BuildPandaSystem(self): pandaBaker.inputs["FeatureSensor"] = cInput(self.sensorEncoder.size) pandaBaker.layers["SensoryLayer"] = cLayer(self.sensorLayer_sp, self.sensoryLayer_tm) pandaBaker.layers["SensoryLayer"].proximalInputs = ["FeatureSensor"] pandaBaker.layers["SensoryLayer"].distalInputs = ["LocationLayer"] pandaBaker.inputs["LocationLayer"] = cInput( self.gridCellEncoder.size ) # for now, Location layer is just position encoder # data for dash plots streams = [ "rawAnomaly", "numberOfWinnerCells", "numberOfPredictiveCells", "sensor_sparsity", "location_sparsity", "SensoryLayer_SP_overlap_metric", "SensoryLayer_TM_overlap_metric", "SensoryLayer_SP_activation_frequency", "SensoryLayer_TM_activation_frequency", "SensoryLayer_SP_entropy", "SensoryLayer_TM_entropy" ] pandaBaker.dataStreams = dict( (name, cDataStream()) for name in streams) # create dicts for more comfortable code # could be also written like: pandaBaker.dataStreams["myStreamName"] = cDataStream() pandaBaker.PrepareDatabase() def RunExperiment1(self): global fig_expect # put agent in the environment self.agent.set_env(self.env, 1, 1, 1, 1) # is on [1,1] and will go to [1,1] agentDir = Direction.RIGHT self.iterationNo = 0 for i in range(3): for x in range(2, 18): for y in range(2, 18): print("Iteration:" + str(self.iterationNo)) self.agent.move(x, y) self.SystemCalculate(self.agent.get_feature(Direction.UP), learning=True) expectedObject = [x[:] for x in [[0] * 20] * 20] A = [x[:] for x in [[0] * 20] * 20] B = [x[:] for x in [[0] * 20] * 20] predSDR1 = SDR(self.predictiveCellsSDR) predSDR2 = SDR(self.predictiveCellsSDR) # calculate what kind of object will system expect for x in range(2, 18): for y in range(2, 18): # for sensor UP ! self.agent.move(x, y) self.SystemCalculate("X", learning=False) scoreWithFeature = self.rawAnomaly self.SystemCalculate(" ", learning=False) scoreWithoutFeature = self.rawAnomaly # y -1 because we are using sensor UP A[x][y - 1] = scoreWithFeature B[x][y - 1] = scoreWithoutFeature expectedObject[x][ y - 1] = 1 if scoreWithFeature < scoreWithoutFeature else 0 print(A) print(B) print(expectedObject) # Plotting and visualising environment------------------------------------------- if ( fig_expect == None or isNotebook() ): # create figure only if it doesn't exist yet or we are in interactive console fig_expect, _ = plt.subplots(nrows=1, ncols=1, figsize=(6, 4)) else: fig_expect.axes[0].clear() plotBinaryMap(fig_expect.axes[0], "Expectation", expectedObject) fig_expect.canvas.draw() plt.show(block=True) #plt.pause(20) # delay is needed for proper redraw def RunExperiment2(self): global fig_expect # put agent in the environment self.agent.set_env(self.env, 1, 1, 1, 1) # is on [1,1] and will go to [1,1] self.iterationNo = 0 random.seed = 42 for i in range(1000): print("Iteration:" + str(self.iterationNo)) self.SystemCalculate(self.agent.get_feature(Direction.UP), learning=True) # this tells agent where he will make movement next time & it will make previously requested movement self.agent.nextMove(random.randint(3, 10), random.randint(3, 10))
class HTMCoreDetector(object): def __init__(self, inputMin, inputMax, probationaryPeriod, *args, **kwargs): self.inputMin = inputMin self.inputMax = inputMax self.probationaryPeriod = probationaryPeriod ## API for controlling settings of htm.core HTM detector: # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True self.verbose = False ## internal members # (listed here for easier understanding) # initialized in `initialize()` self.encTimestamp = None self.encValue = None self.sp = None self.tm = None self.anLike = None # optional debug info self.enc_info = None self.sp_info = None self.tm_info = None # internal helper variables: self.inputs_ = [] self.iteration_ = 0 def handleRecord(self, ts, val): """Returns a tuple (anomalyScore, rawScore). @param ts Timestamp @param val float @return tuple (anomalyScore, <any other fields specified in `getAdditionalHeaders()`>, ...) """ # Send it to Numenta detector and get back the results return self.modelRun(ts, val) def initialize(self): # toggle parameters here # parameters = default_parameters parameters = parameters_numenta_comparable ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder( timeOfDay=parameters["enc"]["time"]["timeOfDay"], weekend=parameters["enc"]["time"]["weekend"], season=parameters["enc"]["time"]["season"], dayOfWeek=parameters["enc"]["time"]["dayOfWeek"]) scalarEncoderParams = EncParameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"][ "resolution"] self.encValue = Encoder(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=spParams["potentialRadius"], globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) # Predictor # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) # predictor_resolution = 1 def modelRun(self, ts, val): """ Run a single pass through HTM model @params ts - Timestamp @params val - float input value @return rawAnomalyScore computed for the `val` in this step """ ## run data through our model pipeline: enc -> SP -> TM -> Anomaly self.inputs_.append(val) self.iteration_ += 1 # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) valueBits = self.encValue.encode(float(val)) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR(self.encTimestamp.size + self.encValue.size).concatenate([valueBits, dateBits]) self.enc_info.addData(encoding) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR(self.sp.getColumnDimensions()) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) self.sp_info.addData(activeColumns) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. self.tm.compute(activeColumns, learn=True) self.tm_info.addData(self.tm.getActiveCells().flatten()) # 4.1 (optional) Predictor #TODO optional # TODO optional: also return an error metric on predictions (RMSE, R2,...) # 4.2 Anomaly # handle contextual (raw, likelihood) anomalies # -temporal (raw) raw = self.tm.anomaly temporalAnomaly = raw if self.useLikelihood: # Compute log(anomaly likelihood) like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) temporalAnomaly = logScore # TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood anomalyScore = temporalAnomaly # this is the "main" anomaly, compared in NAB # 5. print stats if self.verbose and self.iteration_ % 1000 == 0: print(self.enc_info) print(self.sp_info) print(self.tm_info) pass return anomalyScore, raw
def initialize(self): # toggle parameters here # parameters = default_parameters parameters = parameters_numenta_comparable ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder( timeOfDay=parameters["enc"]["time"]["timeOfDay"], weekend=parameters["enc"]["time"]["weekend"], season=parameters["enc"]["time"]["season"], dayOfWeek=parameters["enc"]["time"]["dayOfWeek"]) scalarEncoderParams = EncParameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"][ "resolution"] self.encValue = Encoder(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=spParams["potentialRadius"], globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"])