def testRandomOverlap(self): """ Verify that distant values have little to no semantic similarity. Also measure sparsity & activation frequency. """ P = RDSE_Parameters() P.size = 2000 P.sparsity = .08 P.radius = 12 P.seed = 42 R = RDSE( P ) num_samples = 1000 A = SDR( R.parameters.size ) M = Metrics( A, num_samples + 1 ) for i in range( num_samples ): X = i * R.parameters.radius R.encode( X, A ) print( M ) assert(M.overlap.max() < .15 ) assert(M.overlap.mean() < .10 ) assert(M.sparsity.min() > R.parameters.sparsity - .01 ) assert(M.sparsity.max() < R.parameters.sparsity + .01 ) assert(M.sparsity.mean() > R.parameters.sparsity - .005 ) assert(M.sparsity.mean() < R.parameters.sparsity + .005 ) assert(M.activationFrequency.min() > R.parameters.sparsity - .05 ) assert(M.activationFrequency.max() < R.parameters.sparsity + .05 ) assert(M.activationFrequency.mean() > R.parameters.sparsity - .005 ) assert(M.activationFrequency.mean() < R.parameters.sparsity + .005 ) assert(M.activationFrequency.entropy() > .99 )
def testStatistics(self): p = ScalarEncoderParameters() p.size = 100 p.activeBits = 10 p.minimum = 0 p.maximum = 20 p.clipInput = True enc = ScalarEncoder( p ) del p out = SDR( enc.parameters.size ) mtr = Metrics(out, 9999) # The activation frequency of bits near the endpoints of the range is a # little weird, because the bits at the very end are not used as often # as the ones in the middle of the range, unless clipInputs is enabled. # If clipInputs is enabled then the bits 1 radius from the end get used # twice as often as the should because they respond to inputs off # outside of the valid range as well as inputs inside of the range. for i in np.linspace( enc.parameters.minimum - enc.parameters.radius / 2, enc.parameters.maximum + enc.parameters.radius / 2, 100 + 10 ): enc.encode( i, out ) # print( i, out.sparse ) print(str(mtr)) assert( mtr.sparsity.min() > .95 * .10 ) assert( mtr.sparsity.max() < 1.05 * .10 ) assert( mtr.activationFrequency.min() > .50 * .10 ) assert( mtr.activationFrequency.max() < 1.75 * .10 ) assert( mtr.overlap.min() > .85 )
def testAverageOverlap(self): """ Verify that nearby values have the correct amount of semantic similarity. Also measure sparsity & activation frequency. """ P = RDSE_Parameters() P.size = 2000 P.sparsity = .08 P.radius = 12 P.seed = 42 R = RDSE( P ) A = SDR( R.parameters.size ) num_samples = 10000 M = Metrics( A, num_samples + 1 ) for i in range( num_samples ): R.encode( i, A ) print( M ) assert(M.overlap.min() > (1 - 1. / R.parameters.radius) - .04 ) assert(M.overlap.max() < (1 - 1. / R.parameters.radius) + .04 ) assert(M.overlap.mean() > (1 - 1. / R.parameters.radius) - .001 ) assert(M.overlap.mean() < (1 - 1. / R.parameters.radius) + .001 ) assert(M.sparsity.min() > R.parameters.sparsity - .01 ) assert(M.sparsity.max() < R.parameters.sparsity + .01 ) assert(M.sparsity.mean() > R.parameters.sparsity - .005 ) assert(M.sparsity.mean() < R.parameters.sparsity + .005 ) assert(M.activationFrequency.min() > R.parameters.sparsity - .05 ) assert(M.activationFrequency.max() < R.parameters.sparsity + .05 ) assert(M.activationFrequency.mean() > R.parameters.sparsity - .005 ) assert(M.activationFrequency.mean() < R.parameters.sparsity + .005 ) assert(M.activationFrequency.entropy() > .99 )
def main(parameters=default_parameters, argv=None, verbose=True): # Load data. train_labels, train_images, test_labels, test_images = load_ds( 'mnist_784', 10000, shape=[28, 28]) # HTM: ~95.6% #train_labels, train_images, test_labels, test_images = load_ds('Fashion-MNIST', 10000, shape=[28,28]) # HTM baseline: ~83% training_data = list(zip(train_images, train_labels)) test_data = list(zip(test_images, test_labels)) random.shuffle(training_data) # Setup the AI. enc = SDR(train_images[0].shape) sp = SpatialPooler( inputDimensions=enc.dimensions, columnDimensions=parameters['columnDimensions'], potentialRadius=parameters['potentialRadius'], potentialPct=parameters['potentialPct'], globalInhibition=True, localAreaDensity=parameters['localAreaDensity'], stimulusThreshold=int(round(parameters['stimulusThreshold'])), synPermInactiveDec=parameters['synPermInactiveDec'], synPermActiveInc=parameters['synPermActiveInc'], synPermConnected=parameters['synPermConnected'], minPctOverlapDutyCycle=parameters['minPctOverlapDutyCycle'], dutyCyclePeriod=int(round(parameters['dutyCyclePeriod'])), boostStrength=parameters['boostStrength'], seed= 0, # this is important, 0="random" seed which changes on each invocation spVerbosity=99, wrapAround=False) columns = SDR(sp.getColumnDimensions()) columns_stats = Metrics(columns, 99999999) sdrc = Classifier() # Training Loop for i in range(len(train_images)): img, lbl = training_data[i] encode(img, enc) sp.compute(enc, True, columns) sdrc.learn( columns, lbl ) #TODO SDRClassifier could accept string as a label, currently must be int print(str(sp)) print(str(columns_stats)) # Testing Loop score = 0 for img, lbl in test_data: encode(img, enc) sp.compute(enc, False, columns) if lbl == np.argmax(sdrc.infer(columns)): score += 1 score = score / len(test_data) print('Score:', 100 * score, '%') return score
def testDayOfWeek(self): """ Creating date encoder instance. """ # 1 bit for days in a week (x7 days -> 7 bits), no other fields encoded p = DateEncoderParameters() p.dayOfWeek_width = 1 p.verbose = False enc = DateEncoder(p) # In the middle of fall, Thursday, not a weekend, afternoon - 4th Nov, # 2010, 14:55 d = datetime.datetime(2010, 11, 4, 14, 55) bits = enc.encode(d) # Week is MTWTFSS, # Monday = 0 (for python datetime.datetime.timetuple()) dayOfWeekExpected = [0, 0, 0, 1, 0, 0, 0] #Thu expected = dayOfWeekExpected self.assertEqual(bits.size, 7) self.assertEqual(expected, bits.dense.tolist()) # check a day is encoded consistently during most of its hours. p = DateEncoderParameters() p.dayOfWeek_width = 40 p.verbose = False enc = DateEncoder(p) dMorn = datetime.datetime(2010, 11, 4, 8, 00) # 8 AM to dEve = datetime.datetime(2010, 11, 4, 8 + 12, 00) # 8 PM bits1 = enc.encode(dMorn) bits2 = enc.encode(dEve) assert (bits1.getOverlap(bits2) > 40 * .25) # Check the long term statistics of the encoder. p = DateEncoderParameters() p.dayOfWeek_width = 300 p.verbose = False enc = DateEncoder(p) sdr = SDR(enc.dimensions) test_period = 1000 metrics = Metrics(sdr, test_period) now = datetime.datetime.now() inc = datetime.timedelta(hours=1) for i in range(test_period): enc.encode(now, sdr) now += inc #print( metrics ) assert (metrics.sparsity.min() >= .05) assert (metrics.sparsity.max() <= .20) assert (metrics.activationFrequency.min() >= .05) assert (metrics.activationFrequency.max() <= .20)
def testStatistics(self): gc = GridCellEncoder(size=200, sparsity=.25, periods=[6, 8.5, 12, 17, 24], seed=42) sdr = SDR(gc.dimensions) M = Metrics(sdr, 999999) for x in range(1000): gc.encode([-x, 0], sdr) print(M) assert (M.sparsity.min() > .25 - .02) assert (M.sparsity.max() < .25 + .02) assert (M.activationFrequency.min() > .25 - .05) assert (M.activationFrequency.max() < .25 + .05) # These are approximate... assert (M.overlap.min() > .5) assert (M.overlap.max() < .9) assert (M.overlap.mean() > .7) assert (M.overlap.mean() < .8)
def testPeriodic(self): p = ScalarEncoderParameters() p.size = 100 p.activeBits = 10 p.minimum = 0 p.maximum = 20 p.periodic = True enc = ScalarEncoder( p ) out = SDR( enc.parameters.size ) mtr = Metrics(out, 9999) for i in range(201 * 10 + 1): x = (i % 201) / 10. enc.encode( x, out ) # print( x, out.sparse ) print(str(mtr)) assert( mtr.sparsity.min() > .95 * .10 ) assert( mtr.sparsity.max() < 1.05 * .10 ) assert( mtr.activationFrequency.min() > .9 * .10 ) assert( mtr.activationFrequency.max() < 1.1 * .10 ) assert( mtr.overlap.min() > .85 )
def initialize(self): # toggle parameters here if self.use_optimization: parameters = get_params('params.json') else: parameters = parameters_numenta_comparable # setup spatial anomaly if self.useSpatialAnomaly: # Keep track of value range for spatial anomaly detection self.minVal = None self.maxVal = None ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] self.encValue = RDSE( scalarEncoderParams ) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 ) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod= learningPeriod, estimationSamples= self.probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) # Predictor # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) # predictor_resolution = 1 # initialize pandaBaker if PANDA_VIS_BAKE_DATA: self.BuildPandaSystem(self.sp, self.tm, parameters["enc"]["value"]["size"], self.encTimestamp.size)
def SystemSetup(parameters, verbose=True): global agent, sensorEncoder, env, sensorLayer_sp, sensorLayer_SDR_columns global gridCellEncoder, locationlayer_SDR_cells global sensorLayer_tm if verbose: import pprint print("Parameters:") pprint.pprint(parameters, indent=4) print("") # create environment and the agent env = htm2d.environment.TwoDimensionalEnvironment(20, 20) agent = htm2d.agent.Agent() # load object from yml file with open(os.path.join(_OBJECTS_DIR, OBJECT_FILENAME), "r") as stream: try: env.load_object(stream) except yaml.YAMLError as exc: print(exc) # SENSOR LAYER -------------------------------------------------------------- # setup sensor encoder sensorEncoderParams = RDSE_Parameters() sensorEncoderParams.category = True sensorEncoderParams.size = parameters["enc"]["size"] sensorEncoderParams.sparsity = parameters["enc"]["sparsity"] sensorEncoderParams.seed = parameters["enc"]["seed"] sensorEncoder = RDSE(sensorEncoderParams) # Create SpatialPooler spParams = parameters["sensorLayer_sp"] sensorLayer_sp = SpatialPooler( inputDimensions=(sensorEncoder.size, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=sensorEncoder.size, globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True, ) sp_info = Metrics(sensorLayer_sp.getColumnDimensions(), 999999999) # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. sensorLayer_SDR_columns = SDR(spParams["columnCount"]) # LOCATION LAYER ------------------------------------------------------------ # Grid cell modules locParams = parameters["locationLayer"] gridCellEncoder = GridCellEncoder( size=locParams["cellCount"], sparsity=locParams["sparsity"], periods=locParams["periods"], seed=locParams["seed"], ) locationlayer_SDR_cells = SDR(gridCellEncoder.dimensions) tmParams = parameters["sensorLayer_tm"] sensorLayer_tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], externalPredictiveInputs=locParams["cellCount"], ) tm_info = Metrics([sensorLayer_tm.numberOfCells()], 999999999)
def testStatistics(self): # 100 random simple English words run mass encoding stats against testCorpus = [ "find", "any", "new", "work", "part", "take", "get", "place", "made", "live", "where", "after", "back", "little", "only", "round", "man", "year", "came", "show", "every", "good", "me", "give", "our", "under", "name", "very", "through", "just", "form", "sentence", "great", "think", "say", "help", "low", "line", "differ", "turn", "cause", "much", "mean", "before", "move", "right", "boy", "old", "too", "same", "tell", "does", "set", "three", "want", "air", "well", "also", "play", "small", "end", "put", "home", "read", "hand", "port", "large", "spell", "add", "even", "land", "here", "must", "big", "high", "such", "follow", "act", "why", "ask", "men", "change", "went", "light", "kind", "off", "need", "house", "picture", "try", "us", "again", "animal", "point", "mother", "world", "near", "build", "self", "earth"] num_samples = 1000 # number of documents to run num_tokens = 10 # tokens per document # Case 1 = tokenSimilarity OFF params1 = SimHashDocumentEncoderParameters() params1.size = 400 params1.sparsity = 0.33 params1.tokenSimilarity = False encoder1 = SimHashDocumentEncoder(params1) # Case 2 = tokenSimilarity ON params2 = params1 params2.tokenSimilarity = True encoder2 = SimHashDocumentEncoder(params2) sdrs1 = [] sdrs2 = [] for _ in range(num_samples): document = [] for _ in range(num_tokens - 1): token = testCorpus[random.randint(0, len(testCorpus) - 1)] document.append(token) sdrs1.append(encoder1.encode(document)) sdrs2.append(encoder2.encode(document)) report1 = Metrics([encoder1.size], len(sdrs1) + 1) report2 = Metrics([encoder2.size], len(sdrs2) + 1) for sdr in sdrs1: report1.addData(sdr) for sdr in sdrs2: report2.addData(sdr) # Assertions for Case 1 = tokenSimilarity OFF assert(report1.activationFrequency.entropy() > 0.87) assert(report1.activationFrequency.min() > 0.01) assert(report1.activationFrequency.max() < 0.99) assert(report1.activationFrequency.mean() > params1.sparsity - 0.005) assert(report1.activationFrequency.mean() < params1.sparsity + 0.005) assert(report1.overlap.min() > 0.21) assert(report1.overlap.max() > 0.53) assert(report1.overlap.mean() > 0.38) assert(report1.sparsity.min() > params1.sparsity - 0.01) assert(report1.sparsity.max() < params1.sparsity + 0.01) assert(report1.sparsity.mean() > params1.sparsity - 0.005) assert(report1.sparsity.mean() < params1.sparsity + 0.005) # Assertions for Case 2 = tokenSimilarity ON assert(report2.activationFrequency.entropy() > 0.59) assert(report2.activationFrequency.min() >= 0) assert(report2.activationFrequency.max() <= 1) assert(report2.activationFrequency.mean() > params2.sparsity - 0.005) assert(report2.activationFrequency.mean() < params2.sparsity + 0.005) assert(report2.overlap.min() > 0.38) assert(report2.overlap.max() > 0.78) assert(report2.overlap.mean() > 0.61) assert(report2.sparsity.min() > params2.sparsity - 0.01) assert(report2.sparsity.max() < params2.sparsity + 0.01) assert(report2.sparsity.mean() > params2.sparsity - 0.005) assert(report2.sparsity.mean() < params2.sparsity + 0.005)
def building_htm(len_data): global enc_info global sp_info global tm_info global anomaly_history global predictor global predictor_resolution global tm global sp global scalarEncoder global encodingWidth global dateEncoder # Initial message print("Building HTM for predicting trends...") # Default parameters in HTM default_parameters = { # There are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) 'enc': { "value": { 'resolution': 0.88, 'size': 700, 'sparsity': 0.02 }, "time": { 'timeOfDay': (30, 1) } #, 'weekend': 21} }, 'predictor': { 'sdrc_alpha': 0.1 }, 'sp': { 'boostStrength': 3.0, 'columnCount': 1638, 'localAreaDensity': 0.04395604395604396, 'potentialPct': 0.85, 'synPermActiveInc': 0.04, 'synPermConnected': 0.13999999999999999, 'synPermInactiveDec': 0.006 }, 'tm': { 'activationThreshold': 17, 'cellsPerColumn': 13, 'initialPerm': 0.21, 'maxSegmentsPerCell': 128, 'maxSynapsesPerSegment': 64, 'minThreshold': 10, 'newSynapseCount': 32, 'permanenceDec': 0.1, 'permanenceInc': 0.1 }, 'anomaly': { 'likelihood': { 'probationaryPct': 0.1, 'reestimationPeriod': 100 } } } # Make the encoder print("- Make the encoder") dateEncoder = DateEncoder( timeOfDay=default_parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = default_parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = default_parameters["enc"]["value"][ "sparsity"] scalarEncoderParams.resolution = default_parameters["enc"]["value"][ "resolution"] scalarEncoder = RDSE(scalarEncoderParams) encodingWidth = (dateEncoder.size + scalarEncoder.size) enc_info = Metrics([encodingWidth], 999999999) # Make the SP print("- Make the SP") spParams = default_parameters["sp"] sp = SpatialPooler(inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) sp_info = Metrics(sp.getColumnDimensions(), 999999999) # Temporal Memory Parameters print("- Make the TM") tmParams = default_parameters["tm"] tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) tm_info = Metrics([tm.numberOfCells()], 999999999) # Setup Likelihood print("- Make Anomaly Score/Likelihood") anParams = default_parameters["anomaly"]["likelihood"] probationaryPeriod = int( math.floor(float(anParams["probationaryPct"]) * len_data)) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) # Make predictor print("- Make the predictor") predictor = Predictor(steps=[1, 5], alpha=default_parameters["predictor"]['sdrc_alpha']) predictor_resolution = 1 # End message print("- Finish the building of HTM!")
nargs='+', default=[6 * (2**.5)**i for i in range(5)], help='') args = parser.parse_args() print('Module Periods', args.periods) gc = GridCellEncoder( size=100, sparsity=args.sparsity, periods=args.periods, ) gc_sdr = SDR(gc.dimensions) gc_statistics = Metrics(gc_sdr, args.arena_size**2) assert (args.arena_size >= 10) rf = np.empty((gc.size, args.arena_size, args.arena_size)) for x in range(args.arena_size): for y in range(args.arena_size): gc.encode([x, y], gc_sdr) rf[:, x, y] = gc_sdr.dense.ravel() print(gc_statistics) rows = 4 cols = 5 n_subplots = rows * cols assert (gc.size > n_subplots) samples = np.linspace(0, gc.size - 1, n_subplots, dtype=np.int)
def initialize(self, input_min=0, input_max=0): # setup spatial anomaly if self.useSpatialAnomaly: self.spatial_tolerance = self.parameters["spatial_tolerance"] ## setup Enc, SP, TM # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = self.parameters["enc"]["value"]["size"] scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"] scalarEncoderParams.resolution = max(0.001, (input_max - input_min) / 130) scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"] self.encValue = RDSE(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = self.parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnDimensions"],), potentialRadius=encodingWidth, potentialPct=spParams["potentialPct"], globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=spParams["wrapAround"], minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"], dutyCyclePeriod=spParams["dutyCyclePeriod"], seed=spParams["seed"], ) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = self.parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnDimensions"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPermanence"], connectedPermanence=tmParams["connectedPermanence"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["maxNewSynapseCount"], permanenceIncrement=tmParams["permanenceIncrement"], permanenceDecrement=tmParams["permanenceDecrement"], predictedSegmentDecrement=tmParams["predictedSegmentDecrement"], maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) anParams = self.parameters["anomaly"]["likelihood"] self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=self.learningPeriod, estimationSamples=self.probationaryPeriod - self.learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) self.kernel = self._gauss_kernel(self.historic_raw_anomaly_scores.maxlen, self.historic_raw_anomaly_scores.maxlen)
class UnivHTMDetector(object): """ This detector uses an HTM based anomaly detection technique. """ def __init__(self, name, probationaryPeriod, smoothingKernelSize, htmParams=None, verbose=False): self.useSpatialAnomaly = True self.verbose = verbose self.name = name # for logging self.probationaryPeriod = probationaryPeriod self.parameters = parameters_best self.minVal = None self.maxVal = None self.spatial_tolerance = None self.encTimestamp = None self.encValue = None self.sp = None self.tm = None self.anomalyLikelihood = None # optional debug info self.enc_info = None self.sp_info = None self.tm_info = None # for initialization self.init_data = [] self.is_initialized = False self.iteration_ = 0 # for smoothing with gaussian self.historic_raw_anomaly_scores = deque(maxlen=smoothingKernelSize) self.kernel = None self.learningPeriod = None def initialize(self, input_min=0, input_max=0): # setup spatial anomaly if self.useSpatialAnomaly: self.spatial_tolerance = self.parameters["spatial_tolerance"] ## setup Enc, SP, TM # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = self.parameters["enc"]["value"]["size"] scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"] scalarEncoderParams.resolution = max(0.001, (input_max - input_min) / 130) scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"] self.encValue = RDSE(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = self.parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnDimensions"],), potentialRadius=encodingWidth, potentialPct=spParams["potentialPct"], globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=spParams["wrapAround"], minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"], dutyCyclePeriod=spParams["dutyCyclePeriod"], seed=spParams["seed"], ) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = self.parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnDimensions"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPermanence"], connectedPermanence=tmParams["connectedPermanence"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["maxNewSynapseCount"], permanenceIncrement=tmParams["permanenceIncrement"], permanenceDecrement=tmParams["permanenceDecrement"], predictedSegmentDecrement=tmParams["predictedSegmentDecrement"], maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) anParams = self.parameters["anomaly"]["likelihood"] self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=self.learningPeriod, estimationSamples=self.probationaryPeriod - self.learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) self.kernel = self._gauss_kernel(self.historic_raw_anomaly_scores.maxlen, self.historic_raw_anomaly_scores.maxlen) def modelRun(self, ts, val): """ Run a single pass through HTM model @config ts - Timestamp @config val - float input value @return rawAnomalyScore computed for the `val` in this step """ self.iteration_ += 1 # 0. During the probation period, gather the data and return 0.01. if self.iteration_ <= self.probationaryPeriod: self.init_data.append((ts, val)) return 0.01 if self.is_initialized is False: if self.verbose: print("[{}] Initializing".format(self.name)) temp_iteration = self.iteration_ vals = [i[1] for i in self.init_data] self.initialize(input_min=min(vals), input_max=max(vals)) self.is_initialized = True for ts, val in self.init_data: self.modelRun(ts, val) self.iteration_ = temp_iteration if self.verbose: print("[{}] Initialization done".format(self.name)) ## run data through our model pipeline: enc -> SP -> TM -> Anomaly # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) valueBits = self.encValue.encode(float(val)) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR(self.encTimestamp.size + self.encValue.size).concatenate([valueBits, dateBits]) self.enc_info.addData(encoding) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR(self.sp.getColumnDimensions()) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) self.sp_info.addData(activeColumns) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. self.tm.compute(activeColumns, learn=True) self.tm_info.addData(self.tm.getActiveCells().flatten()) # 4. Anomaly # handle spatial, contextual (raw, likelihood) anomalies # -Spatial spatialAnomaly = 0.0 if self.useSpatialAnomaly: # Update min/max values and check if there is a spatial anomaly if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * self.spatial_tolerance maxExpected = self.maxVal + tolerance minExpected = self.minVal - tolerance if val > maxExpected or val < minExpected: spatialAnomaly = 1.0 if self.maxVal is None or val > self.maxVal: self.maxVal = val if self.minVal is None or val < self.minVal: self.minVal = val # -Temporal raw = self.tm.anomaly like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) temporalAnomaly = logScore anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB # 5. Apply smoothing self.historic_raw_anomaly_scores.append(anomalyScore) historic_scores = np.asarray(self.historic_raw_anomaly_scores) convolved = np.convolve(historic_scores, self.kernel, 'valid') anomalyScore = convolved[-1] return anomalyScore @staticmethod def estimateNormal(sampleData, performLowerBoundCheck=True): """ :param sampleData: :type sampleData: Numpy array. :param performLowerBoundCheck: :type performLowerBoundCheck: bool :returns: A dict containing the parameters of a normal distribution based on the ``sampleData``. """ mean = np.mean(sampleData) variance = np.var(sampleData) st_dev = 0 if performLowerBoundCheck: # Handle edge case of almost no deviations and super low anomaly scores. We # find that such low anomaly means can happen, but then the slightest blip # of anomaly score can cause the likelihood to jump up to red. if mean < 0.03: mean = 0.03 # Catch all for super low variance to handle numerical precision issues if variance < 0.0003: variance = 0.0003 # Compute standard deviation if variance > 0: st_dev = math.sqrt(variance) return mean, variance, st_dev @staticmethod def _calcSkipRecords(numIngested, windowSize, learningPeriod): """Return the value of skipRecords for passing to estimateAnomalyLikelihoods If `windowSize` is very large (bigger than the amount of data) then this could just return `learningPeriod`. But when some values have fallen out of the historical sliding window of anomaly records, then we have to take those into account as well so we return the `learningPeriod` minus the number shifted out. :param numIngested - (int) number of data points that have been added to the sliding window of historical data points. :param windowSize - (int) size of sliding window of historical data points. :param learningPeriod - (int) the number of iterations required for the algorithm to learn the basic patterns in the dataset and for the anomaly score to 'settle down'. """ numShiftedOut = max(0, numIngested - windowSize) return min(numIngested, max(0, learningPeriod - numShiftedOut)) @staticmethod def _gauss_kernel(std, size): def _norm_pdf(x, mean, sd): var = float(sd) ** 2 denom = (2 * math.pi * var) ** .5 num = math.exp(-(float(x) - float(mean)) ** 2 / (2 * var)) return num / denom kernel = [2 * _norm_pdf(idx, 0, std) for idx in list(range(-size + 1, 1))] kernel = np.array(kernel) kernel = np.flip(kernel) kernel = kernel / sum(kernel) return kernel
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default=os.path.join(os.path.dirname(__file__), '..', '..', '..', 'build', 'ThirdParty', 'mnist_data', 'mnist-src')) args = parser.parse_args(args=argv) # Load data. train_labels, train_images, test_labels, test_images = load_mnist( args.data_dir) training_data = list(zip(train_images, train_labels)) test_data = list(zip(test_images, test_labels)) random.shuffle(training_data) random.shuffle(test_data) # Setup the AI. enc = SDR((train_images[0].shape)) sp = SpatialPooler( inputDimensions=enc.dimensions, columnDimensions=parameters['columnDimensions'], potentialRadius=parameters['potentialRadius'], potentialPct=parameters['potentialPct'], globalInhibition=True, localAreaDensity=parameters['localAreaDensity'], stimulusThreshold=int(round(parameters['stimulusThreshold'])), synPermInactiveDec=parameters['synPermInactiveDec'], synPermActiveInc=parameters['synPermActiveInc'], synPermConnected=parameters['synPermConnected'], minPctOverlapDutyCycle=parameters['minPctOverlapDutyCycle'], dutyCyclePeriod=int(round(parameters['dutyCyclePeriod'])), boostStrength=parameters['boostStrength'], seed=0, spVerbosity=99, wrapAround=False) columns = SDR(sp.getColumnDimensions()) columns_stats = Metrics(columns, 99999999) sdrc = Classifier() # Training Loop for i in range(len(train_images)): img, lbl = random.choice(training_data) enc.dense = img >= np.mean(img) # Convert greyscale image to binary. sp.compute(enc, True, columns) sdrc.learn(columns, lbl) print(str(sp)) print(str(columns_stats)) # Testing Loop score = 0 for img, lbl in test_data: enc.dense = img >= np.mean(img) # Convert greyscale image to binary. sp.compute(enc, False, columns) if lbl == np.argmax(sdrc.infer(columns)): score += 1 score = score / len(test_data) print('Score:', 100 * score, '%') return score
class HTMCoreDetector(object): def __init__(self, inputMin, inputMax, probationaryPeriod, *args, **kwargs): self.inputMin = inputMin self.inputMax = inputMax self.probationaryPeriod = probationaryPeriod ## API for controlling settings of htm.core HTM detector: # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True self.verbose = False ## internal members # (listed here for easier understanding) # initialized in `initialize()` self.encTimestamp = None self.encValue = None self.sp = None self.tm = None self.anLike = None # optional debug info self.enc_info = None self.sp_info = None self.tm_info = None # internal helper variables: self.inputs_ = [] self.iteration_ = 0 def handleRecord(self, ts, val): """Returns a tuple (anomalyScore, rawScore). @param ts Timestamp @param val float @return tuple (anomalyScore, <any other fields specified in `getAdditionalHeaders()`>, ...) """ # Send it to Numenta detector and get back the results return self.modelRun(ts, val) def initialize(self): # toggle parameters here # parameters = default_parameters parameters = parameters_numenta_comparable ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder( timeOfDay=parameters["enc"]["time"]["timeOfDay"], weekend=parameters["enc"]["time"]["weekend"], season=parameters["enc"]["time"]["season"], dayOfWeek=parameters["enc"]["time"]["dayOfWeek"]) scalarEncoderParams = EncParameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"][ "resolution"] self.encValue = Encoder(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=spParams["potentialRadius"], globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) # Predictor # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) # predictor_resolution = 1 def modelRun(self, ts, val): """ Run a single pass through HTM model @params ts - Timestamp @params val - float input value @return rawAnomalyScore computed for the `val` in this step """ ## run data through our model pipeline: enc -> SP -> TM -> Anomaly self.inputs_.append(val) self.iteration_ += 1 # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) valueBits = self.encValue.encode(float(val)) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR(self.encTimestamp.size + self.encValue.size).concatenate([valueBits, dateBits]) self.enc_info.addData(encoding) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR(self.sp.getColumnDimensions()) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) self.sp_info.addData(activeColumns) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. self.tm.compute(activeColumns, learn=True) self.tm_info.addData(self.tm.getActiveCells().flatten()) # 4.1 (optional) Predictor #TODO optional # TODO optional: also return an error metric on predictions (RMSE, R2,...) # 4.2 Anomaly # handle contextual (raw, likelihood) anomalies # -temporal (raw) raw = self.tm.anomaly temporalAnomaly = raw if self.useLikelihood: # Compute log(anomaly likelihood) like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) temporalAnomaly = logScore # TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood anomalyScore = temporalAnomaly # this is the "main" anomaly, compared in NAB # 5. print stats if self.verbose and self.iteration_ % 1000 == 0: print(self.enc_info) print(self.sp_info) print(self.tm_info) pass return anomalyScore, raw
def main(parameters=default_parameters, argv=None, verbose=True): if verbose: import pprint print("Parameters:") pprint.pprint(parameters, indent=4) print("") # Read the input file. records = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = next(reader) next(reader) next(reader) for record in reader: records.append(record) # Make the Encoders. These will convert input data into binary representations. dateEncoder = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] scalarEncoder = RDSE( scalarEncoderParams ) encodingWidth = (dateEncoder.size + scalarEncoder.size) enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. spParams = parameters["sp"] sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) sp_info = Metrics( sp.getColumnDimensions(), 999999999 ) tmParams = parameters["tm"] tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) tm_info = Metrics( [tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB anParams = parameters["anomaly"]["likelihood"] probationaryPeriod = int(math.floor(float(anParams["probationaryPct"])*len(records))) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood(learningPeriod= learningPeriod, estimationSamples= probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) predictor_resolution = 1 # Iterate through every datum in the dataset, record the inputs & outputs. inputs = [] anomaly = [] anomalyProb = [] predictions = {1: [], 5: []} for count, record in enumerate(records): # Convert date string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) inputs.append( consumption ) # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = dateEncoder.encode(dateString) consumptionBits = scalarEncoder.encode(consumption) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR( encodingWidth ).concatenate([consumptionBits, dateBits]) enc_info.addData( encoding ) # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR( sp.getColumnDimensions() ) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) sp_info.addData( activeColumns ) # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumns, learn=True) tm_info.addData( tm.getActiveCells().flatten() ) # Predict what will happen, and then train the predictor based on what just happened. pdf = predictor.infer( count, tm.getActiveCells() ) for n in (1, 5): if pdf[n]: predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution ) else: predictions[n].append(float('nan')) predictor.learn( count, tm.getActiveCells(), int(consumption / predictor_resolution)) anomalyLikelihood = anomaly_history.anomalyProbability( consumption, tm.anomaly ) anomaly.append( tm.anomaly ) anomalyProb.append( anomalyLikelihood ) # Print information & statistics about the state of the HTM. print("Encoded Input", enc_info) print("") print("Spatial Pooler Mini-Columns", sp_info) print(str(sp)) print("") print("Temporal Memory Cells", tm_info) print(str(tm)) print("") # Shift the predictions so that they are aligned with the input they predict. for n_steps, pred_list in predictions.items(): for x in range(n_steps): pred_list.insert(0, float('nan')) pred_list.pop() # Calculate the predictive accuracy, Root-Mean-Squared accuracy = {1: 0, 5: 0} accuracy_samples = {1: 0, 5: 0} for idx, inp in enumerate(inputs): for n in predictions: # For each [N]umber of time steps ahead which was predicted. val = predictions[n][ idx ] if not math.isnan(val): accuracy[n] += (inp - val) ** 2 accuracy_samples[n] += 1 for n in sorted(predictions): accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5 print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n]) # Show info about the anomaly (mean & std) print("Anomaly Mean", np.mean(anomaly)) print("Anomaly Std ", np.std(anomaly)) # Plot the Predictions and Anomalies. if verbose: try: import matplotlib.pyplot as plt except: print("WARNING: failed to import matplotlib, plots cannot be shown.") return -accuracy[5] plt.subplot(2,1,1) plt.title("Predictions") plt.xlabel("Time") plt.ylabel("Power Consumption") plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), predictions[1], 'blue', np.arange(len(inputs)), predictions[5], 'green',) plt.legend(labels=('Input', '1 Step Prediction, Shifted 1 step', '5 Step Prediction, Shifted 5 steps')) plt.subplot(2,1,2) plt.title("Anomaly Score") plt.xlabel("Time") plt.ylabel("Power Consumption") inputs = np.array(inputs) / max(inputs) plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), anomaly, 'blue',) plt.legend(labels=('Input', 'Anomaly Score')) plt.show() return -accuracy[5]
class HtmcoreDetector(AnomalyDetector): """ This detector uses an HTM based anomaly detection technique. """ def __init__(self, *args, **kwargs): super(HtmcoreDetector, self).__init__(*args, **kwargs) ## API for controlling settings of htm.core HTM detector: # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True self.useSpatialAnomaly = True self.verbose = True # Set this to true if you want to use the optimization. # If true, it reads the parameters from ./params.json # If false, it reads the parameters from ./best_params.json self.use_optimization = False ## internal members # (listed here for easier understanding) # initialized in `initialize()` self.encTimestamp = None self.encValue = None self.sp = None self.tm = None self.anLike = None # optional debug info self.enc_info = None self.sp_info = None self.tm_info = None # internal helper variables: self.inputs_ = [] self.iteration_ = 0 def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] #TODO optional: add "prediction" def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore). @param inputData is a dict {"timestamp" : Timestamp(), "value" : float} @return tuple (anomalyScore, <any other fields specified in `getAdditionalHeaders()`>, ...) """ # Send it to Numenta detector and get back the results return self.modelRun(inputData["timestamp"], inputData["value"]) def initialize(self): # toggle parameters here if self.use_optimization: parameters = get_params('params.json') else: parameters = parameters_numenta_comparable # setup spatial anomaly if self.useSpatialAnomaly: # Keep track of value range for spatial anomaly detection self.minVal = None self.maxVal = None ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] self.encValue = RDSE( scalarEncoderParams ) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 ) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod= learningPeriod, estimationSamples= self.probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) # Predictor # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) # predictor_resolution = 1 # initialize pandaBaker if PANDA_VIS_BAKE_DATA: self.BuildPandaSystem(self.sp, self.tm, parameters["enc"]["value"]["size"], self.encTimestamp.size) def modelRun(self, ts, val): """ Run a single pass through HTM model @params ts - Timestamp @params val - float input value @return rawAnomalyScore computed for the `val` in this step """ ## run data through our model pipeline: enc -> SP -> TM -> Anomaly self.inputs_.append( val ) self.iteration_ += 1 # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) valueBits = self.encValue.encode(float(val)) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR( self.encTimestamp.size + self.encValue.size ).concatenate([valueBits, dateBits]) self.enc_info.addData( encoding ) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR( self.sp.getColumnDimensions() ) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) self.sp_info.addData( activeColumns ) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. # to get predictive cells we need to call activateDendrites & activateCells separately if PANDA_VIS_BAKE_DATA: # activateDendrites calculates active segments self.tm.activateDendrites(learn=True) # predictive cells are calculated directly from active segments predictiveCells = self.tm.getPredictiveCells() # activates cells in columns by TM algorithm (winners, bursting...) self.tm.activateCells(activeColumns, learn=True) else: self.tm.compute(activeColumns, learn=True) self.tm_info.addData( self.tm.getActiveCells().flatten() ) # 4.1 (optional) Predictor #TODO optional #TODO optional: also return an error metric on predictions (RMSE, R2,...) # 4.2 Anomaly # handle spatial, contextual (raw, likelihood) anomalies # -Spatial spatialAnomaly = 0.0 #TODO optional: make this computed in SP (and later improve) if self.useSpatialAnomaly: # Update min/max values and check if there is a spatial anomaly if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE maxExpected = self.maxVal + tolerance minExpected = self.minVal - tolerance if val > maxExpected or val < minExpected: spatialAnomaly = 1.0 if self.maxVal is None or val > self.maxVal: self.maxVal = val if self.minVal is None or val < self.minVal: self.minVal = val # -temporal (raw) raw= self.tm.anomaly temporalAnomaly = raw if self.useLikelihood: # Compute log(anomaly likelihood) like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) temporalAnomaly = logScore #TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB # 5. print stats if self.verbose and self.iteration_ % 1000 == 0: # print(self.enc_info) # print(self.sp_info) # print(self.tm_info) pass # 6. panda vis if PANDA_VIS_BAKE_DATA: # ------------------HTMpandaVis---------------------- # see more about this structure at https://github.com/htm-community/HTMpandaVis/blob/master/pandaBaker/README.md # fill up values pandaBaker.inputs["Value"].stringValue = "value: {:.2f}".format(val) pandaBaker.inputs["Value"].bits = valueBits.sparse pandaBaker.inputs["TimeOfDay"].stringValue = str(ts) pandaBaker.inputs["TimeOfDay"].bits = dateBits.sparse pandaBaker.layers["Layer1"].activeColumns = activeColumns.sparse pandaBaker.layers["Layer1"].winnerCells = self.tm.getWinnerCells().sparse pandaBaker.layers["Layer1"].predictiveCells = predictiveCells.sparse pandaBaker.layers["Layer1"].activeCells = self.tm.getActiveCells().sparse # customizable datastreams to be show on the DASH PLOTS pandaBaker.dataStreams["rawAnomaly"].value = temporalAnomaly pandaBaker.dataStreams["value"].value = val pandaBaker.dataStreams["numberOfWinnerCells"].value = len(self.tm.getWinnerCells().sparse) pandaBaker.dataStreams["numberOfPredictiveCells"].value = len(predictiveCells.sparse) pandaBaker.dataStreams["valueInput_sparsity"].value = valueBits.getSparsity() pandaBaker.dataStreams["dateInput_sparsity"].value = dateBits.getSparsity() pandaBaker.dataStreams["Layer1_SP_overlap_metric"].value = self.sp_info.overlap.overlap pandaBaker.dataStreams["Layer1_TM_overlap_metric"].value = self.sp_info.overlap.overlap pandaBaker.dataStreams["Layer1_SP_activation_frequency"].value = self.sp_info.activationFrequency.mean() pandaBaker.dataStreams["Layer1_TM_activation_frequency"].value = self.tm_info.activationFrequency.mean() pandaBaker.dataStreams["Layer1_SP_entropy"].value = self.sp_info.activationFrequency.mean() pandaBaker.dataStreams["Layer1_TM_entropy"].value = self.tm_info.activationFrequency.mean() pandaBaker.StoreIteration(self.iteration_-1) print("ITERATION: " + str(self.iteration_-1)) # ------------------HTMpandaVis---------------------- return (anomalyScore, raw) # with this method, the structure for visualization is defined def BuildPandaSystem(self, sp, tm, consumptionBits_size, dateBits_size): # we have two inputs connected to proximal synapses of Layer1 pandaBaker.inputs["Value"] = cInput(consumptionBits_size) pandaBaker.inputs["TimeOfDay"] = cInput(dateBits_size) pandaBaker.layers["Layer1"] = cLayer(sp, tm) # Layer1 has Spatial Pooler & Temporal Memory pandaBaker.layers["Layer1"].proximalInputs = [ "Value", "TimeOfDay", ] pandaBaker.layers["Layer1"].distalInputs = ["Layer1"] # data for dash plots streams = ["rawAnomaly", "value", "numberOfWinnerCells", "numberOfPredictiveCells", "valueInput_sparsity", "dateInput_sparsity", "Layer1_SP_overlap_metric", "Layer1_TM_overlap_metric", "Layer1_SP_activation_frequency", "Layer1_TM_activation_frequency", "Layer1_SP_entropy", "Layer1_TM_entropy" ] pandaBaker.dataStreams = dict((name, cDataStream()) for name in streams) # create dicts for more comfortable code # could be also written like: pandaBaker.dataStreams["myStreamName"] = cDataStream() pandaBaker.PrepareDatabase()
else: if not similar: similar = {"doc": document, "bits": current} else: if (distance(current, reference["bits"]) < distance( similar["bits"], reference["bits"])): similar = {"doc": document, "bits": current} if not unsimilar: unsimilar = {"doc": document, "bits": current} else: if (distance(current, reference["bits"]) > distance( unsimilar["bits"], reference["bits"])): unsimilar = {"doc": document, "bits": current} report = Metrics([encoder.size], len(sdrs) + 1) for sdr in sdrs: report.addData(sdr) print("Statistics:") print("\tEncoded %d Document inputs." % len(sdrs)) print("\tOutput: " + str(report)) print("Similarity:") print("\tReference:\n\t\t" + str(reference["doc"])) print("\tMOST Similar (Distance = " + str(distance(similar["bits"], reference["bits"])) + "):") print("\t\t" + str(similar["doc"])) print("\tLEAST Similar (Distance = " + str(distance(unsimilar["bits"], reference["bits"])) + "):") print("\t\t" + str(unsimilar["doc"]))
exit() # # Run the encoder and measure some statistics about its output. # if args.category: n_samples = int(args.maximum - args.minimum + 1) else: n_samples = (args.maximum - args.minimum) / enc.parameters.resolution oversample = 2 # Use more samples than needed to avoid aliasing & artifacts. n_samples = int(round(oversample * n_samples)) sdrs = [] for i in np.linspace(args.minimum, args.maximum, n_samples): sdrs.append(enc.encode(i)) M = Metrics([enc.size], len(sdrs) + 1) for s in sdrs: M.addData(s) print("Statistics:") print("Encoded %d inputs." % len(sdrs)) print("Output " + str(M)) # # Plot the Receptive Field of each bit in the encoder. # import matplotlib.pyplot as plt if 'matplotlib.pyplot' in modules: rf = np.zeros([enc.size, len(sdrs)], dtype=np.uint8) for i in range(len(sdrs)): rf[:, i] = sdrs[i].dense plt.imshow(rf, interpolation='nearest')
def initialize(self): # toggle parameters here # parameters = default_parameters parameters = parameters_numenta_comparable ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder( timeOfDay=parameters["enc"]["time"]["timeOfDay"], weekend=parameters["enc"]["time"]["weekend"], season=parameters["enc"]["time"]["season"], dayOfWeek=parameters["enc"]["time"]["dayOfWeek"]) scalarEncoderParams = EncParameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"][ "resolution"] self.encValue = Encoder(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=spParams["potentialRadius"], globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"])