def initialize(self): rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, w=41, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) self.timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = self.valueEncoder.getWidth() self.sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.etm = ExtendedTemporalMemory( **{ "activationThreshold": 13, "cellsPerColumn": 1, "columnDimensions": (2048, ), "basalInputDimensions": (self.timestampEncoder.getWidth(), ), "initialPermanence": 0.21, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 32, "minThreshold": 10, "maxNewSynapseCount": 20, "permanenceDecrement": 0.1, "permanenceIncrement": 0.1, "seed": 1960, "checkInputs": False, }) learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100)
def setUp(self): # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of # day # use of forced is not recommended, used here for readability, see scalar.py self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5) # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov, # 2010, 14:55 self._d = datetime.datetime(2010, 11, 4, 14, 55) self._bits = self._e.encode(self._d) # season is aaabbbcccddd (1 bit/month) # TODO should be <<3? # should be 000000000111 (centered on month 11 - Nov) seasonExpected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1] # week is MTWTFSS # contrary to localtime documentation, Monday = 0 (for python # datetime.datetime.timetuple() dayOfWeekExpected = [0, 0, 0, 1, 0, 0, 0] # not a weekend, so it should be "False" weekendExpected = [1, 0] # time of day has radius of 4 hours and w of 5 so each bit = 240/5 # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6 # should be 30 bits total (30 * 48 minutes = 24 hours) timeOfDayExpected = ([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) self._expected = numpy.array(seasonExpected + dayOfWeekExpected + weekendExpected + timeOfDayExpected, dtype=defaultDtype)
def smart_encode(data_fl): encoder_list = [] for i in data_fl.columns: if data_fl[i].dtype == 'M8[ns]': time_delta = data_fl[i][1] - data_fl[i][0] if time_delta >= pd.Timedelta(1, unit='M'): encoder_list += [[DateEncoder(season=(5, 1))]] elif time_delta >= pd.Timedelta(1, unit='D'): encoder_list += [[ DateEncoder(season=(21)), DateEncoder(dayOfWeek=(21, 1)), DateEncoder(weekend=5) ]] else: encoder_list += [[ DateEncoder(season=(5, 1)), DateEncoder(dayOfWeek=(5, 1)), DateEncoder(weekend=5), DateEncoder(timeOfDay=(5, 1)) ]] if data_fl[i].dtype == "float": col_range = data_fl[i].max() - data_fl[i].min() res = col_range / (400 - 21) encoder_list += [[RandomDistributedScalarEncoder(res)]] return encoder_list
def setUp(self): # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of # day # use of forced is not recommended, used here for readability, see scalar.py self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5) # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov, # 2010, 14:55 self._d = datetime.datetime(2010, 11, 4, 14, 55) self._bits = self._e.encode(self._d) # season is aaabbbcccddd (1 bit/month) # TODO should be <<3? # should be 000000000111 (centered on month 11 - Nov) seasonExpected = [0,0,0,0,0,0,0,0,0,1,1,1] # week is MTWTFSS # contrary to localtime documentation, Monday = 0 (for python # datetime.datetime.timetuple() dayOfWeekExpected = [0,0,0,1,0,0,0] # not a weekend, so it should be "False" weekendExpected = [1, 0] # time of day has radius of 4 hours and w of 5 so each bit = 240/5 # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6 # should be 30 bits total (30 * 48 minutes = 24 hours) timeOfDayExpected = ( [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0]) self._expected = numpy.array(seasonExpected + dayOfWeekExpected + weekendExpected + timeOfDayExpected, dtype=defaultDtype)
def initialize(self): # Initialize the RDSE with a resolution; calculated from the data min and # max, the resolution is specific to the data stream. rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) # Initialize the timestamp encoder self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = (self.timestampEncoder.getWidth() + self.valueEncoder.getWidth()) self.sp = SpatialPooler(**{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.tm = TemporalMemory(**{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, }) if self.useLikelihood: learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100 )
def testHoliday(self): '''look at holiday more carefully because of the smooth transition''' e = DateEncoder(holiday=5) holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype='uint8') notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype='uint8') holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype='uint8') d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue((e.encode(d) == holiday).all()) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue((e.encode(d) == notholiday).all()) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue((e.encode(d) == holiday2).all()) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue((e.encode(d) == holiday2).all())
def testHoliday(self): '''look at holiday more carefully because of the smooth transition''' # use of forced is not recommended, used here for readibility, see scalar.py e = DateEncoder(holiday=5, forced=True) holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype='uint8') notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype='uint8') holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype='uint8') d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue((e.encode(d) == holiday).all()) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue((e.encode(d) == notholiday).all()) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue((e.encode(d) == holiday2).all()) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue((e.encode(d) == holiday2).all())
def testHoliday(self): """look at holiday more carefully because of the smooth transition""" # use of forced is not recommended, used here for readability, see # scalar.py e = DateEncoder(holiday=5, forced=True) holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype="uint8") notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype="uint8") holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype="uint8") d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), notholiday)) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue(numpy.array_equal(e.encode(d), holiday2)) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue(numpy.array_equal(e.encode(d), holiday2))
def testWeekend(self): """Test weekend encoder""" # use of forced is not recommended, used here for readability, see scalar.py e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]), forced=True) mon = DateEncoder(customDays=(21, "Monday"), forced=True) e2 = DateEncoder(weekend=(21, 1), forced=True) d = datetime.datetime(1988, 5, 29, 20, 00) self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d))) for _ in range(300): d = d+datetime.timedelta(days=1) self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d))) #Make sure if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0: self.assertEqual(d.weekday(), 0) else: self.assertNotEqual(d.weekday(), 0)
def __init__(self): self.lat = ScalarEncoder(name='latitude', w=3, n=100, minval=-90, maxval=90, periodic=False) self.long= ScalarEncoder(name='longitude', w=3, n=100, minval=-180, maxval=180, periodic=True) self.timeenc= DateEncoder(season=0, dayOfWeek=1, weekend=3, timeOfDay=5) self.likes = ScalarEncoder(name='likes', w=3, n=50, minval=0, maxval=100000, periodic=False) self.people = ScalarEncoder(name='numpeople', w=3, n=20, minval=0, maxval=100, periodic=False) self.categories = SDRCategoryEncoder(n=87, w=3, categoryList = None, name="cats", verbosity=0) self.run()
def testHoliday(self): '''look at holiday more carefully because of the smooth transition''' e = DateEncoder(holiday=5) holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype='uint8') notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype='uint8') holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype='uint8') d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue((e.encode(d) == holiday).all()) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue((e.encode(d) == notholiday).all()) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue((e.encode(d) == holiday2).all()) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue((e.encode(d) == holiday2).all())
def testReadWrite(self): originalTS = datetime.datetime(1997, 8, 29, 2, 14) originalValue = self._e.encode(originalTS) proto1 = DateEncoderProto.new_message() self._e.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = DateEncoderProto.read(f) encoder = DateEncoder.read(proto2) self.assertIsInstance(encoder, DateEncoder) self.assertEqual(encoder.width, self._e.width) self.assertEqual(encoder.weekendOffset, self._e.weekendOffset) self.assertEqual(encoder.timeOfDayOffset, self._e.timeOfDayOffset) self.assertEqual(encoder.seasonOffset, self._e.seasonOffset) self.assertEqual(encoder.dayOfWeekOffset, self._e.dayOfWeekOffset) self.assertIsInstance(encoder.customDaysEncoder, self._e.customDaysEncoder.__class__) self.assertIsInstance(encoder.dayOfWeekEncoder, self._e.dayOfWeekEncoder.__class__) self.assertIsInstance(encoder.seasonEncoder, self._e.seasonEncoder.__class__) self.assertIsInstance(encoder.timeOfDayEncoder, self._e.timeOfDayEncoder.__class__) self.assertIsInstance(encoder.weekendEncoder, self._e.weekendEncoder.__class__) self.assertTrue(numpy.array_equal(self._bits, encoder.encode(self._d))) self.assertTrue(numpy.array_equal(encoder.encode(originalTS), originalValue)) self.assertEqual(self._e.decode(encoder.encode(self._d)), encoder.decode(self._e.encode(self._d)))
def testReadWrite(self): originalTS = datetime.datetime(1997, 8, 29, 2, 14) originalValue = self._e.encode(originalTS) proto1 = DateEncoderProto.new_message() self._e.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = DateEncoderProto.read(f) encoder = DateEncoder.read(proto2) self.assertIsInstance(encoder, DateEncoder) self.assertEqual(encoder.width, self._e.width) self.assertEqual(encoder.weekendOffset, self._e.weekendOffset) self.assertEqual(encoder.timeOfDayOffset, self._e.timeOfDayOffset) self.assertEqual(encoder.seasonOffset, self._e.seasonOffset) self.assertEqual(encoder.dayOfWeekOffset, self._e.dayOfWeekOffset) self.assertIsInstance(encoder.customDaysEncoder, self._e.customDaysEncoder.__class__) self.assertIsInstance(encoder.dayOfWeekEncoder, self._e.dayOfWeekEncoder.__class__) self.assertIsInstance(encoder.seasonEncoder, self._e.seasonEncoder.__class__) self.assertIsInstance(encoder.timeOfDayEncoder, self._e.timeOfDayEncoder.__class__) self.assertIsInstance(encoder.weekendEncoder, self._e.weekendEncoder.__class__) self.assertTrue(numpy.array_equal(self._bits, encoder.encode(self._d))) self.assertTrue( numpy.array_equal(encoder.encode(originalTS), originalValue)) self.assertEqual(self._e.decode(encoder.encode(self._d)), encoder.decode(self._e.encode(self._d)))
def testHoliday(self): '''look at holiday more carefully because of the smooth transition''' # use of forced is not recommended, used here for readibility, see scalar.py e = DateEncoder(holiday=5, forced=True) holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype='uint8') notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype='uint8') holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype='uint8') d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue((e.encode(d) == holiday).all()) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue((e.encode(d) == notholiday).all()) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue((e.encode(d) == holiday2).all()) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue((e.encode(d) == holiday2).all())
def testHolidayMultiple(self): """look at holiday more carefully because of the smooth transition""" # use of forced is not recommended, used here for readability, see # scalar.py e = DateEncoder(holiday=5, forced=True, holidays=[(12, 25), (2018, 4, 1), (2017, 4, 16)]) holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8") notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8") d = datetime.datetime(2011, 12, 25, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2007, 12, 2, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), notholiday)) d = datetime.datetime(2018, 4, 1, 16, 10) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2017, 4, 16, 16, 10) self.assertTrue(numpy.array_equal(e.encode(d), holiday))
def testHoliday(self): """look at holiday more carefully because of the smooth transition""" # use of forced is not recommended, used here for readability, see # scalar.py e = DateEncoder(holiday=5, forced=True) holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8") notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8") holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype="uint8") d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), notholiday)) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue(numpy.array_equal(e.encode(d), holiday2)) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue(numpy.array_equal(e.encode(d), holiday2))
def HTM_AD( Data='Test', vars={'value': ['num']}, prec_param=5, pooler_out=2024, # Number of columns of the pooler output cell_col=5, # HTM cells per column W=72, # Window parameter W_prim=5, # Local window for anomaly detection likelihood eps=1e-6, # to Avoid by zero divisions athreshold=0.95): """ This function performs HTM based anomaly detection on a time series provided :param Data: :param vars: Possible values: num, tod, weekend :param prec_param: A parameter that defines how much precision the number encoder has The encoder precision depends on the variability of the data, The real precision is computed taking into account both the precision parameter and data std A high precision might mean a high error at predicting the variable value in noisy variables :param pooler_out: Number of columns of the pooler output :param cell_col: HTM cells per column :param W: Window parameter :param W_prim: Local window for anomaly detection likelihood :param eps: to Avoid by zero divisions :param athreshold: To classify based on anomaly likelihood whether there is an anomaly or not :return: The Data + 3 columns Anomaly: indicates the error of within the value predicted by the HTM network Anomaly_likelihood: indicates the likelihood of the data into being anomalous Anomaly_flag: classifies the data in anomalous vs non anomalous """ if Data == 'Test': # If there is not data available, simply loads the temperature benchmark dataset # Import data Data = pd.read_csv('anomaly_API/Data/sample.csv', parse_dates=True, index_col='timestamp') Data = Data.resample('H').bfill().interpolate() TODE = DateEncoder(timeOfDay=(21, 1)) WENDE = DateEncoder(weekend=21) var_encoders = set() # Spatial Pooler Parameters for x in vars: for y in vars[x]: if y == 'num': exec( "RDSE_" + x + " = RandomDistributedScalarEncoder(resolution=Data['" + x + "'].std()/prec_param)", locals(), globals()) var_encoders.add(Encoder(x, ["RDSE_" + x])) elif y == 'weekend': var_encoders.add(Encoder(x, ["WENDE"])) elif y == 'tod': var_encoders.add(Encoder(x, ["TODE"])) else: return {"error": "Variable encoder type is not recognized "} encoder_width = 0 # Computes encoder width for x in var_encoders: for y in x.encoders: exec("s = " + y + ".getWidth()", locals(), globals()) encoder_width += s SP = SpatialPooler( inputDimensions=encoder_width, columnDimensions=pooler_out, potentialPct=0.8, globalInhibition=True, numActiveColumnsPerInhArea=pooler_out // 50, # Gets 2% of the total area boostStrength=1.0, wrapAround=False) TM = TemporalMemory(columnDimensions=(pooler_out, ), cellsPerColumn=cell_col) Data['Anomaly'] = 0.0 Data['Anomaly_Likelihood'] = 0.0 # Train Spatial Pooler print("Spatial pooler learning") start = time.time() active_columns = np.zeros(pooler_out) for x in range(len(Data)): encoder = multiencode(var_encoders, Data, x) SP.compute(encoder, True, active_columns) end = time.time() print(end - start) # Temporal pooler print("Temporal pooler learning") start = time.time() A_score = np.zeros(len(Data)) for x in range(len(Data)): encoder = multiencode(var_encoders, Data, x) SP.compute(encoder, False, active_columns) col_index = active_columns.nonzero()[0] TM.compute(col_index, learn=True) if x > 0: inter = set(col_index).intersection(Prev_pred_col) inter_l = len(inter) active_l = len(col_index) A_score[x] = 1 - (inter_l / active_l) Data.iat[x, -2] = A_score[x] Prev_pred_col = list( set(x // cell_col for x in TM.getPredictiveCells())) end = time.time() print(end - start) AL_score = np.zeros(len(Data)) # Computes the likelihood of the anomaly for x in range(len(Data)): if x > 0: W_vec = A_score[max(0, x - W):x] W_prim_vec = A_score[max(0, x - W_prim):x] AL_score[x] = 1 - 2 * norm.sf( abs(np.mean(W_vec) - np.mean(W_prim_vec)) / max(np.std(W_vec), eps)) Data.iat[x, -1] = AL_score[x] Data['Anomaly_flag'] = athreshold < Data['Anomaly_Likelihood'] return Data
from nupic.encoders.date import DateEncoder from nupic.encoders.random_distributed_scalar import \ RandomDistributedScalarEncoder timeOfDayEncoder = DateEncoder(timeOfDay=(21, 1)) weekendEncoder = DateEncoder(weekend=21) scalarEncoder = RandomDistributedScalarEncoder(0.88)
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector): """The 'numenta' detector, with the following changes: - Use pure Temporal Memory, not the classic TP that uses backtracking. - Don't spatial pool the timestamp. Pass it in as distal input. - 1 cell per column. - Use w=41 in the scalar encoding, rather than w=21, to make up for the lost timestamp input to the spatial pooler. """ def __init__(self, *args, **kwargs): super(DistalTimestamps1CellPerColumnDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.activeExternalCells = [] self.prevActiveExternalCells = [] self.sp = None self.spOutput = None self.etm = None self.anomalyLikelihood = None def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, w=41, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) self.timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = self.valueEncoder.getWidth() self.sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.etm = ExtendedTemporalMemory( **{ "activationThreshold": 13, "cellsPerColumn": 1, "columnDimensions": (2048, ), "basalInputDimensions": (self.timestampEncoder.getWidth(), ), "initialPermanence": 0.21, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 32, "minThreshold": 10, "maxNewSynapseCount": 20, "permanenceDecrement": 0.1, "permanenceIncrement": 0.1, "seed": 1960, "checkInputs": False, }) learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" self.valueEncoder.encodeIntoArray(inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray(inputData["timestamp"], self.encodedTimestamp) self.prevActiveExternalCells = self.activeExternalCells self.activeExternalCells = self.encodedTimestamp.nonzero()[0] self.sp.compute(self.encodedValue, True, self.spOutput) activeColumns = self.spOutput.nonzero()[0] activeColumnsSet = set(activeColumns.tolist()) prevPredictedColumns = set( self.etm.columnForCell(cell) for cell in self.etm.getPredictiveCells()) rawScore = (len(activeColumnsSet - prevPredictedColumns) / float(len(activeColumns))) anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) self.etm.compute( activeColumns, activeCellsExternalBasal=self.activeExternalCells, reinforceCandidatesExternalBasal=self.prevActiveExternalCells, growthCandidatesExternalBasal=self.prevActiveExternalCells) return (logScore, rawScore)
def runHotgym(): timeOfDayEncoder = DateEncoder(timeOfDay=(21,1)) weekendEncoder = DateEncoder(weekend=21) scalarEncoder = RandomDistributedScalarEncoder(0.88) encodingWidth = timeOfDayEncoder.getWidth() \ + weekendEncoder.getWidth() \ + scalarEncoder.getWidth() sp = SpatialPooler( # How large the input encoding will be. inputDimensions=(encodingWidth), # How many mini-columns will be in the Spatial Pooler. columnDimensions=(2048), # What percent of the columns's receptive field is available for potential # synapses? potentialPct=0.85, # This means that the input space has no topology. globalInhibition=True, localAreaDensity=-1.0, # Roughly 2%, giving that there is only one inhibition area because we have # turned on globalInhibition (40 / 2048 = 0.0195) numActiveColumnsPerInhArea=40.0, # How quickly synapses grow and degrade. synPermInactiveDec=0.005, synPermActiveInc=0.04, synPermConnected=0.1, # boostStrength controls the strength of boosting. Boosting encourages # efficient usage of SP columns. boostStrength=3.0, # Random number generator seed. seed=1956, # Determines if inputs at the beginning and end of an input dimension should # be considered neighbors when mapping columns to inputs. wrapAround=False ) tm = TemporalMemory( # Must be the same dimensions as the SP columnDimensions=(2048, ), # How many cells in each mini-column. cellsPerColumn=32, # A segment is active if it has >= activationThreshold connected synapses # that are active due to infActiveState activationThreshold=16, initialPermanence=0.21, connectedPermanence=0.5, # Minimum number of active synapses for a segment to be considered during # search for the best-matching segments. minThreshold=12, # The max number of synapses added to a segment during learning maxNewSynapseCount=20, permanenceIncrement=0.1, permanenceDecrement=0.1, predictedSegmentDecrement=0.0, maxSegmentsPerCell=128, maxSynapsesPerSegment=32, seed=1960 ) classifier = SDRClassifierFactory.create() with open (_INPUT_FILE_PATH) as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(2048) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. probability, value = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(value, probability * 100))
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector): """The 'numenta' detector, with the following changes: - Use pure Temporal Memory, not the classic TP that uses backtracking. - Don't spatial pool the timestamp. Pass it in as distal input. - 1 cell per column. - Use w=41 in the scalar encoding, rather than w=21, to make up for the lost timestamp input to the spatial pooler. """ def __init__(self, *args, **kwargs): super(DistalTimestamps1CellPerColumnDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.activeExternalCells = [] self.prevActiveExternalCells = [] self.sp = None self.spOutput = None self.etm = None self.anomalyLikelihood = None def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, w=41, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) self.timestampEncoder = DateEncoder(timeOfDay=(21,9.49,)) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = self.valueEncoder.getWidth() self.sp = SpatialPooler(**{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.etm = ExtendedTemporalMemory(**{ "activationThreshold": 13, "cellsPerColumn": 1, "columnDimensions": (2048,), "basalInputDimensions": (self.timestampEncoder.getWidth(),), "initialPermanence": 0.21, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 32, "minThreshold": 10, "maxNewSynapseCount": 20, "permanenceDecrement": 0.1, "permanenceIncrement": 0.1, "seed": 1960, "checkInputs": False, }) learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100 ) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" self.valueEncoder.encodeIntoArray(inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray(inputData["timestamp"], self.encodedTimestamp) self.prevActiveExternalCells = self.activeExternalCells self.activeExternalCells = self.encodedTimestamp.nonzero()[0] self.sp.compute(self.encodedValue, True, self.spOutput) activeColumns = self.spOutput.nonzero()[0] activeColumnsSet = set(activeColumns.tolist()) prevPredictedColumns = set(self.etm.columnForCell(cell) for cell in self.etm.getPredictiveCells()) rawScore = (len(activeColumnsSet - prevPredictedColumns) / float(len(activeColumns))) anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) self.etm.compute(activeColumns, activeCellsExternalBasal=self.activeExternalCells, reinforceCandidatesExternalBasal=self.prevActiveExternalCells, growthCandidatesExternalBasal=self.prevActiveExternalCells) return (logScore, rawScore)
def go(): valueEncoder = RandomDistributedScalarEncoder(resolution=0.88, seed=42) timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) inputWidth = timestampEncoder.getWidth() + valueEncoder.getWidth() sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) tm = TemporalMemory( **{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048, ), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1961, }) inputPath = os.path.join(os.path.dirname(__file__), "data/rec-center-hourly.csv") inputFile = open(inputPath, "rb") csvReader = csv.reader(inputFile) csvReader.next() csvReader.next() csvReader.next() encodedValue = np.zeros(valueEncoder.getWidth(), dtype=np.uint32) encodedTimestamp = np.zeros(timestampEncoder.getWidth(), dtype=np.uint32) spOutput = np.zeros(2048, dtype=np.float32) sanityInstance = sanity.SPTMInstance(sp, tm) for timestampStr, consumptionStr in csvReader: sanityInstance.waitForUserContinue() timestamp = datetime.datetime.strptime(timestampStr, "%m/%d/%y %H:%M") consumption = float(consumptionStr) timestampEncoder.encodeIntoArray(timestamp, encodedTimestamp) valueEncoder.encodeIntoArray(consumption, encodedValue) sensoryInput = np.concatenate(( encodedTimestamp, encodedValue, )) sp.compute(sensoryInput, True, spOutput) activeColumns = np.flatnonzero(spOutput) predictedCells = tm.getPredictiveCells() tm.compute(activeColumns) activeInputBits = np.flatnonzero(sensoryInput) displayText = { "timestamp": timestampStr, "consumption": consumptionStr } sanityInstance.appendTimestep(activeInputBits, activeColumns, predictedCells, displayText)
from nupic.encoders import ScalarEncoder from nupic.encoders.date import DateEncoder from nupic.encoders.category import CategoryEncoder #from nupic.research.spatial_pooler import SpatialPooler from nupic.algorithms.spatial_pooler import SpatialPooler from nupic.algorithms.anomaly import Anomaly from nupic.algorithms.anomaly_likelihood import AnomalyLikelihood import datetime from scipy.stats import norm import numpy as np import math de = DateEncoder(season=5) file1 = open("time_file.txt", "r") cpt = 0 for line in file1: if cpt == 0: line = line.replace("/", "-") line = line.replace("19-", "2019-") #print line, lines = '2019-06-03 00:00:16' now = datetime.datetime.strptime(lines, "%Y-%m-%d %H:%M:%S") print "now = ", de.encode(now) cpt += 1 categories = ('info', 'error', 'warning') encoder = CategoryEncoder(w=3, categoryList=categories, forced=True) info = encoder.encode("info") error = encoder.encode("error") warning = encoder.encode("warning") #print "info = ", info
var_chosen = 'value' Data = ma_preprocess(Data[var_chosen], 4).rename(columns={0: var_chosen}) Data['Anomaly'] = 0.0 Data['Anomaly_Likelihood'] = 0.0 prec_param = 5 pooler_out = 2024 cell_col = 5 # Value Encoder Resoltion Res = Data.std()[0] / prec_param RDSE = RandomDistributedScalarEncoder(resolution=Res) # We ecndoe now the datas TODE = DateEncoder(timeOfDay=(21, 1)) WENDE = DateEncoder(weekend=21) # Spatial Pooler Parameters var_encoders = {Encoder('value', ['RDSE'])} # Encoder('_index', ['TODE'])} encoder_width = 0 for x in var_encoders: for y in x.encoders: exec("s = " + y + ".getWidth()") encoder_width += s SP = SpatialPooler( inputDimensions=encoder_width,
class DateEncoderTest(unittest.TestCase): '''Unit tests for DateEncoder class''' def setUp(self): ##TODO: comment and code don't match - weekend?!! # 3 bits for season, 1 bit for day of week, 2 for weekend, 5 for time of day self._e = DateEncoder(season=3, dayOfWeek=1, weekend=3, timeOfDay=5) # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov, 2010, 14:55 self._d = datetime.datetime(2010, 11, 4, 14, 55) self._bits = self._e.encode(self._d) # season is aaabbbcccddd (1 bit/month) # TODO should be <<3? # should be 000000000111 (centered on month 11 - Nov) seasonExpected = [0,0,0,0,0,0,0,0,0,1,1,1] # week is MTWTFSS # contrary to localtime documentation, Monday = 0 (for python # datetime.datetime.timetuple() dayOfWeekExpected = [0,0,0,1,0,0,0] # not a weekend, so it should be "False" weekendExpected = [1,1,1,0,0,0] # time of day has radius of 4 hours and w of 5 so each bit = 240/5 min = 48min # 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6 # should be 30 bits total (30 * 48 minutes = 24 hours) timeOfDayExpected = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0] self._expected = numpy.array(seasonExpected + dayOfWeekExpected + weekendExpected \ + timeOfDayExpected, dtype=defaultDtype) def testDateEncoder(self): '''creating date encoder instance''' self.assertEqual(self._e.getDescription(), [("season", 0), ("day of week", 12), ("weekend", 19), ("time of day", 25)]) self.assertTrue((self._expected == self._bits).all()) print self._e.pprintHeader() self._e.pprint(self._bits) print def testMissingValues(self): '''missing values''' mvOutput = self._e.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(sum(mvOutput), 0) def testDecoding(self): '''decoding date''' decoded = self._e.decode(self._bits) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 4) (ranges, desc) = fieldsDict['season'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [305, 305]) (ranges, desc) = fieldsDict['time of day'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [14.4, 14.4]) (ranges, desc) = fieldsDict['day of week'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [3, 3]) (ranges, desc) = fieldsDict['weekend'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [0, 0]) print decoded print "decodedToStr=>", self._e.decodedToStr(decoded) def testTopDownCompute(self): '''Check topDownCompute''' topDown = self._e.topDownCompute(self._bits) topDownValues = numpy.array([elem.value for elem in topDown]) errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8]) self.assertAlmostEqual(errs.max(), 0, 4) def testBucketIndexSupport(self): '''Check bucket index support''' bucketIndices = self._e.getBucketIndices(self._d) print "bucket indices:", bucketIndices topDown = self._e.getBucketInfo(bucketIndices) topDownValues = numpy.array([elem.value for elem in topDown]) errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8]) self.assertAlmostEqual(errs.max(), 0, 4) encodings = [] for x in topDown: encodings.extend(x.encoding) self.assertTrue((encodings == self._expected).all()) def testHoliday(self): '''look at holiday more carefully because of the smooth transition''' e = DateEncoder(holiday=5) holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype='uint8') notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype='uint8') holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype='uint8') d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue((e.encode(d) == holiday).all()) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue((e.encode(d) == notholiday).all()) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue((e.encode(d) == holiday2).all()) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue((e.encode(d) == holiday2).all()) def testWeekend(self): '''Test weekend encoder''' e = DateEncoder(customDays = (21,["sat","sun","fri"])) mon = DateEncoder(customDays = (21,"Monday")) e2 = DateEncoder(weekend=(21,1)) d = datetime.datetime(1988,5,29,20,00) self.assertTrue((e.encode(d) == e2.encode(d)).all()) for _ in range(300): d = d+datetime.timedelta(days=1) self.assertTrue((e.encode(d) == e2.encode(d)).all()) print mon.decode(mon.encode(d)) #Make sure if mon.decode(mon.encode(d))[0]["Monday"][0][0][0]==1.0: self.assertEqual(d.weekday(), 0) else: self.assertFalse(d.weekday()==0)
def testWeekend(self): '''Test weekend encoder''' e = DateEncoder(customDays = (21,["sat","sun","fri"])) mon = DateEncoder(customDays = (21,"Monday")) e2 = DateEncoder(weekend=(21,1)) d = datetime.datetime(1988,5,29,20,00) self.assertTrue((e.encode(d) == e2.encode(d)).all()) for _ in range(300): d = d+datetime.timedelta(days=1) self.assertTrue((e.encode(d) == e2.encode(d)).all()) print mon.decode(mon.encode(d)) #Make sure if mon.decode(mon.encode(d))[0]["Monday"][0][0][0]==1.0: self.assertEqual(d.weekday(), 0) else: self.assertFalse(d.weekday()==0)
class DateEncoderTest(unittest.TestCase): """Unit tests for DateEncoder class""" def setUp(self): # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of # day # use of forced is not recommended, used here for readability, see scalar.py self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5) # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov, # 2010, 14:55 self._d = datetime.datetime(2010, 11, 4, 14, 55) self._bits = self._e.encode(self._d) # season is aaabbbcccddd (1 bit/month) # TODO should be <<3? # should be 000000000111 (centered on month 11 - Nov) seasonExpected = [0,0,0,0,0,0,0,0,0,1,1,1] # week is MTWTFSS # contrary to localtime documentation, Monday = 0 (for python # datetime.datetime.timetuple() dayOfWeekExpected = [0,0,0,1,0,0,0] # not a weekend, so it should be "False" weekendExpected = [1, 0] # time of day has radius of 4 hours and w of 5 so each bit = 240/5 # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6 # should be 30 bits total (30 * 48 minutes = 24 hours) timeOfDayExpected = ( [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0]) self._expected = numpy.array(seasonExpected + dayOfWeekExpected + weekendExpected + timeOfDayExpected, dtype=defaultDtype) def testDateEncoder(self): """creating date encoder instance""" self.assertSequenceEqual( self._e.getDescription(), [("season", 0), ("day of week", 12), ("weekend", 19), ("time of day", 21)]) self.assertTrue(numpy.array_equal(self._expected, self._bits)) def testMissingValues(self): """missing values""" mvOutput = self._e.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(sum(mvOutput), 0) def testDecoding(self): """decoding date""" decoded = self._e.decode(self._bits) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 4) (ranges, _) = fieldsDict['season'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [305, 305]) (ranges, _) = fieldsDict['time of day'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [14.4, 14.4]) (ranges, _) = fieldsDict['day of week'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [3, 3]) (ranges, _) = fieldsDict['weekend'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [0, 0]) def testTopDownCompute(self): """Check topDownCompute""" topDown = self._e.topDownCompute(self._bits) topDownValues = numpy.array([elem.value for elem in topDown]) errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8]) self.assertAlmostEqual(errs.max(), 0, 4) def testBucketIndexSupport(self): """Check bucket index support""" bucketIndices = self._e.getBucketIndices(self._d) topDown = self._e.getBucketInfo(bucketIndices) topDownValues = numpy.array([elem.value for elem in topDown]) errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8]) self.assertAlmostEqual(errs.max(), 0, 4) encodings = [] for x in topDown: encodings.extend(x.encoding) self.assertTrue(numpy.array_equal(encodings, self._expected)) def testHoliday(self): """look at holiday more carefully because of the smooth transition""" # use of forced is not recommended, used here for readability, see # scalar.py e = DateEncoder(holiday=5, forced=True) holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype="uint8") notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype="uint8") holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype="uint8") d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), notholiday)) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue(numpy.array_equal(e.encode(d), holiday2)) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue(numpy.array_equal(e.encode(d), holiday2)) def testHolidayMultiple(self): """look at holiday more carefully because of the smooth transition""" # use of forced is not recommended, used here for readability, see # scalar.py e = DateEncoder(holiday=5, forced=True, holidays=[(12, 25), (2018, 4, 1), (2017, 4, 16)]) holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8") notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8") d = datetime.datetime(2011, 12, 25, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2007, 12, 2, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), notholiday)) d = datetime.datetime(2018, 4, 1, 16, 10) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2017, 4, 16, 16, 10) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) def testWeekend(self): """Test weekend encoder""" # use of forced is not recommended, used here for readability, see scalar.py e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]), forced=True) mon = DateEncoder(customDays=(21, "Monday"), forced=True) e2 = DateEncoder(weekend=(21, 1), forced=True) d = datetime.datetime(1988, 5, 29, 20, 00) self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d))) for _ in range(300): d = d+datetime.timedelta(days=1) self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d))) #Make sure if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0: self.assertEqual(d.weekday(), 0) else: self.assertNotEqual(d.weekday(), 0) @unittest.skipUnless( capnp, "pycapnp is not installed, skipping serialization test.") def testReadWrite(self): originalTS = datetime.datetime(1997, 8, 29, 2, 14) originalValue = self._e.encode(originalTS) proto1 = DateEncoderProto.new_message() self._e.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = DateEncoderProto.read(f) encoder = DateEncoder.read(proto2) self.assertIsInstance(encoder, DateEncoder) self.assertEqual(encoder.width, self._e.width) self.assertEqual(encoder.weekendOffset, self._e.weekendOffset) self.assertEqual(encoder.timeOfDayOffset, self._e.timeOfDayOffset) self.assertEqual(encoder.seasonOffset, self._e.seasonOffset) self.assertEqual(encoder.dayOfWeekOffset, self._e.dayOfWeekOffset) self.assertIsInstance(encoder.customDaysEncoder, self._e.customDaysEncoder.__class__) self.assertIsInstance(encoder.dayOfWeekEncoder, self._e.dayOfWeekEncoder.__class__) self.assertIsInstance(encoder.seasonEncoder, self._e.seasonEncoder.__class__) self.assertIsInstance(encoder.timeOfDayEncoder, self._e.timeOfDayEncoder.__class__) self.assertIsInstance(encoder.weekendEncoder, self._e.weekendEncoder.__class__) self.assertTrue(numpy.array_equal(self._bits, encoder.encode(self._d))) self.assertTrue(numpy.array_equal(encoder.encode(originalTS), originalValue)) self.assertEqual(self._e.decode(encoder.encode(self._d)), encoder.decode(self._e.encode(self._d)))
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) CtEncoder = RandomDistributedScalarEncoder(enParams["Ct"]["resolution"]) ZIP_10467Encoder = RandomDistributedScalarEncoder( enParams["ZIP_10467"]["resolution"]) # ZIP_10462Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10462"]["resolution"]) # ZIP_10475Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10475"]["resolution"]) # ZIP_10466Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10466"]["resolution"]) # ZIP_10469Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10469"]["resolution"]) # DEPT_11Encoder = RandomDistributedScalarEncoder(enParams["DEPT_11"]["resolution"]) # DEPT_24Encoder = RandomDistributedScalarEncoder(enParams["DEPT_24"]["resolution"]) # DEPT_41Encoder = RandomDistributedScalarEncoder(enParams["DEPT_41"]["resolution"]) # DEPT_34Encoder = RandomDistributedScalarEncoder(enParams["DEPT_34"]["resolution"]) # DEPT_31Encoder = RandomDistributedScalarEncoder(enParams["DEPT_31"]["resolution"]) # DEPT_60Encoder = RandomDistributedScalarEncoder(enParams["DEPT_60"]["resolution"]) # AGE_0_9Encoder = RandomDistributedScalarEncoder(enParams["AGE_0_9"]["resolution"]) # AGE_10_19Encoder = RandomDistributedScalarEncoder(enParams["AGE_10_19"]["resolution"]) # AGE_20_29Encoder = RandomDistributedScalarEncoder(enParams["AGE_20_29"]["resolution"]) # AGE_30_39Encoder = RandomDistributedScalarEncoder(enParams["AGE_30_39"]["resolution"]) # AGE_40_49Encoder = RandomDistributedScalarEncoder(enParams["AGE_40_49"]["resolution"]) # AGE_50_59Encoder = RandomDistributedScalarEncoder(enParams["AGE_50_59"]["resolution"]) # AGE_60_69Encoder = RandomDistributedScalarEncoder(enParams["AGE_60_69"]["resolution"]) # AGE_70_79Encoder = RandomDistributedScalarEncoder(enParams["AGE_70_79"]["resolution"]) # AGE_80_89Encoder = RandomDistributedScalarEncoder(enParams["AGE_80_89"]["resolution"]) # AGE_90_99Encoder = RandomDistributedScalarEncoder(enParams["AGE_90_99"]["resolution"]) # DIST_1_7Encoder = RandomDistributedScalarEncoder(enParams["DIST_1_7"]["resolution"]) # DIST_8_14Encoder = RandomDistributedScalarEncoder(enParams["DIST_8_14"]["resolution"]) # DIST_15_21Encoder = RandomDistributedScalarEncoder(enParams["DIST_15_21"]["resolution"]) # DIST_22_28Encoder = RandomDistributedScalarEncoder(enParams["DIST_22_28"]["resolution"]) # DIST_29_35Encoder = RandomDistributedScalarEncoder(enParams["DIST_29_35"]["resolution"]) # DIST_36_42Encoder = RandomDistributedScalarEncoder(enParams["DIST_36_42"]["resolution"]) # DIST_43_49Encoder = RandomDistributedScalarEncoder(enParams["DIST_43_49"]["resolution"]) # DIST_50_56Encoder = RandomDistributedScalarEncoder(enParams["DIST_50_56"]["resolution"]) # DIST_57_63Encoder = RandomDistributedScalarEncoder(enParams["DIST_57_63"]["resolution"]) # DIST_64_70Encoder = RandomDistributedScalarEncoder(enParams["DIST_64_70"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + CtEncoder.getWidth() * 2) sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"]) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME) for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%Y-%m-%d %H:%M:%S") # Convert data value string into float. Ct = float(record[1]) ZIP_10467 = float(record[2]) # ZIP_10462 = float(record[3]) # ZIP_10475 = float(record[4]) # ZIP_10466 = float(record[5]) # ZIP_10469 = float(record[6]) # DEPT_11 = float(record[7]) # DEPT_24 = float(record[8]) # DEPT_41 = float(record[9]) # DEPT_34 = float(record[10]) # DEPT_31 = float(record[11]) # DEPT_60 = float(record[12]) # AGE_0_9 = float(record[13]) # AGE_10_19 = float(record[14]) # AGE_20_29 = float(record[15]) # AGE_30_39 = float(record[16]) # AGE_40_49 = float(record[17]) # AGE_50_59 = float(record[18]) # AGE_60_69 = float(record[19]) # AGE_70_79 = float(record[20]) # AGE_80_89 = float(record[21]) # AGE_90_99 = float(record[22]) # DIST_1_7 = float(record[23]) # DIST_8_14 = float(record[24]) # DIST_15_21 = float(record[25]) # DIST_22_28 = float(record[26]) # DIST_29_35 = float(record[27]) # DIST_36_42 = float(record[28]) # DIST_43_49 = float(record[29]) # DIST_50_56 = float(record[30]) # DIST_57_63 = float(record[31]) # DIST_64_70 = float(record[31]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) CtBits = numpy.zeros(CtEncoder.getWidth()) ZIP_10467Bits = numpy.zeros(ZIP_10467Encoder.getWidth()) # ZIP_10462Bits = numpy.zeros(ZIP_10462Encoder.getWidth()) # ZIP_10475Bits = numpy.zeros(ZIP_10475Encoder.getWidth()) # ZIP_10466Bits = numpy.zeros(ZIP_10466Encoder.getWidth()) # ZIP_10469Bits = numpy.zeros(ZIP_10469Encoder.getWidth()) # DEPT_11Bits = numpy.zeros(DEPT_11Encoder.getWidth()) # DEPT_24Bits = numpy.zeros(DEPT_24Encoder.getWidth()) # DEPT_41Bits = numpy.zeros(DEPT_41Encoder.getWidth()) # DEPT_34Bits = numpy.zeros(DEPT_34Encoder.getWidth()) # DEPT_31Bits = numpy.zeros(DEPT_31Encoder.getWidth()) # DEPT_60Bits = numpy.zeros(DEPT_60Encoder.getWidth()) # AGE_0_9Bits = numpy.zeros(AGE_0_9Encoder.getWidth()) # AGE_10_19Bits = numpy.zeros(AGE_10_19Encoder.getWidth()) # AGE_20_29Bits = numpy.zeros(AGE_20_29Encoder.getWidth()) # AGE_30_39Bits = numpy.zeros(AGE_30_39Encoder.getWidth()) # AGE_40_49Bits = numpy.zeros(AGE_40_49Encoder.getWidth()) # AGE_50_59Bits = numpy.zeros(AGE_50_59Encoder.getWidth()) # AGE_60_69Bits = numpy.zeros(AGE_60_69Encoder.getWidth()) # AGE_70_79Bits = numpy.zeros(AGE_70_79Encoder.getWidth()) # AGE_80_89Bits = numpy.zeros(AGE_80_89Encoder.getWidth()) # AGE_90_99Bits = numpy.zeros(AGE_90_99Encoder.getWidth()) # DIST_1_7Bits = numpy.zeros(DIST_1_7Encoder.getWidth()) # DIST_8_14Bits = numpy.zeros(DIST_8_14Encoder.getWidth()) # DIST_15_21Bits = numpy.zeros(DIST_15_21Encoder.getWidth()) # DIST_22_28Bits = numpy.zeros(DIST_22_28Encoder.getWidth()) # DIST_29_35Bits = numpy.zeros(DIST_29_35Encoder.getWidth()) # DIST_36_42Bits = numpy.zeros(DIST_36_42Encoder.getWidth()) # DIST_43_49Bits = numpy.zeros(DIST_43_49Encoder.getWidth()) # DIST_50_56Bits = numpy.zeros(DIST_50_56Encoder.getWidth()) # DIST_57_63Bits = numpy.zeros(DIST_57_63Encoder.getWidth()) # DIST_64_70Bits = numpy.zeros(DIST_64_70Encoder.getWidth()) # Now we call the encoders to create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) CtEncoder.encodeIntoArray(Ct, CtBits) ZIP_10467Encoder.encodeIntoArray(ZIP_10467, ZIP_10467Bits) # ZIP_10462Encoder.encodeIntoArray(ZIP_10462, ZIP_10462Bits) # ZIP_10475Encoder.encodeIntoArray(ZIP_10475, ZIP_10475Bits) # ZIP_10466Encoder.encodeIntoArray(ZIP_10466, ZIP_10466Bits) # ZIP_10469Encoder.encodeIntoArray(ZIP_10469, ZIP_10469Bits) # DEPT_11Encoder.encodeIntoArray(DEPT_11, DEPT_11Bits) # DEPT_24Encoder.encodeIntoArray(DEPT_24, DEPT_24Bits) # DEPT_41Encoder.encodeIntoArray(DEPT_41, DEPT_41Bits) # DEPT_34Encoder.encodeIntoArray(DEPT_34, DEPT_34Bits) # DEPT_31Encoder.encodeIntoArray(DEPT_31, DEPT_31Bits) # DEPT_60Encoder.encodeIntoArray(DEPT_60, DEPT_60Bits) # AGE_0_9Encoder.encodeIntoArray(AGE_0_9, AGE_0_9Bits) # AGE_10_19Encoder.encodeIntoArray(AGE_10_19, AGE_10_19Bits) # AGE_20_29Encoder.encodeIntoArray(AGE_20_29, AGE_20_29Bits) # AGE_30_39Encoder.encodeIntoArray(AGE_30_39, AGE_30_39Bits) # AGE_40_49Encoder.encodeIntoArray(AGE_40_49, AGE_40_49Bits) # AGE_50_59Encoder.encodeIntoArray(AGE_50_59, AGE_50_59Bits) # AGE_60_69Encoder.encodeIntoArray(AGE_60_69, AGE_60_69Bits) # AGE_70_79Encoder.encodeIntoArray(AGE_70_79, AGE_70_79Bits) # AGE_80_89Encoder.encodeIntoArray(AGE_80_89, AGE_80_89Bits) # AGE_90_99Encoder.encodeIntoArray(AGE_90_99, AGE_90_99Bits) # DIST_1_7Encoder.encodeIntoArray(DIST_1_7, DIST_1_7Bits) # DIST_8_14Encoder.encodeIntoArray(DIST_8_14, DIST_8_14Bits) # DIST_15_21Encoder.encodeIntoArray(DIST_15_21, DIST_15_21Bits) # DIST_22_28Encoder.encodeIntoArray(DIST_22_28, DIST_22_28Bits) # DIST_29_35Encoder.encodeIntoArray(DIST_29_35, DIST_29_35Bits) # DIST_36_42Encoder.encodeIntoArray(DIST_36_42, DIST_36_42Bits) # DIST_43_49Encoder.encodeIntoArray(DIST_43_49, DIST_43_49Bits) # DIST_50_56Encoder.encodeIntoArray(DIST_50_56, DIST_50_56Bits) # DIST_57_63Encoder.encodeIntoArray(DIST_57_63, DIST_57_63Bits) # DIST_64_70Encoder.encodeIntoArray(DIST_64_70, DIST_64_70Bits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, CtBits, ZIP_10467Bits]) # encoding = numpy.concatenate( # [timeOfDayBits, weekendBits, CtBits, # ZIP_10467Bits, ZIP_10462Bits, ZIP_10475Bits, ZIP_10466Bits, ZIP_10469Bits, # DEPT_11Bits, DEPT_24Bits, DEPT_41Bits, DEPT_34Bits, DEPT_31Bits, # DEPT_60Bits, AGE_0_9Bits, AGE_10_19Bits, AGE_20_29Bits, AGE_30_39Bits, # AGE_40_49Bits, AGE_50_59Bits, AGE_60_69Bits, AGE_70_79Bits, AGE_80_89Bits, # AGE_90_99Bits, DIST_1_7Bits, DIST_8_14Bits, DIST_15_21Bits, DIST_22_28Bits, # DIST_29_35Bits, DIST_36_42Bits, DIST_43_49Bits, DIST_50_56Bits, DIST_57_63Bits, # DIST_64_70Bits]) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = CtEncoder.getBucketIndices(Ct)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute(recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": Ct }, learn=True, infer=True) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted(zip( classifierResult[1], classifierResult["actualValues"]), reverse=True)[0] # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) # results.append([oneStep, oneStepConfidence * 100, None, None]) results.append([record[0], Ct, oneStep, oneStepConfidence * 100]) output.write(record[0], Ct, oneStep, oneStepConfidence * 100) output.close() return results
def initialize(self): # Keep track of value range for spatial anomaly detection. self.minVal = None self.maxVal = None # Time of day encoder self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49), name='time_enc') # RDSE encoder for the time series value. minResolution = 0.001 rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = self.inputMax + rangePadding numBuckets = 130 resolution = max(minResolution, (maxVal - minVal) / numBuckets) self.value_enc = RandomDistributedScalarEncoder(resolution=resolution, name='value_rdse') # Spatial Pooler. encodingWidth = self.timeOfDayEncoder.getWidth( ) + self.value_enc.getWidth() self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(2048, ), potentialPct=0.8, potentialRadius=encodingWidth, globalInhibition=1, numActiveColumnsPerInhArea=40, synPermInactiveDec=0.0005, synPermActiveInc=0.003, synPermConnected=0.2, boostStrength=0.0, seed=1956, wrapAround=True, ) self.tm = TemporalMemory( columnDimensions=(2048, ), cellsPerColumn=32, activationThreshold=20, initialPermanence=.5, # Increased to connectedPermanence. connectedPermanence=.5, minThreshold=13, maxNewSynapseCount=31, permanenceIncrement=0.04, permanenceDecrement=0.008, predictedSegmentDecrement=0.001, maxSegmentsPerCell=128, maxSynapsesPerSegment= 128, # Changed meaning. Also see connections.topology[2] seed=1993, ) # Initialize the anomaly likelihood object numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100, ) self.age = 0
def testWeekend(self): """Test weekend encoder""" # use of forced is not recommended, used here for readability, see scalar.py e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]), forced=True) mon = DateEncoder(customDays=(21, "Monday"), forced=True) e2 = DateEncoder(weekend=(21, 1), forced=True) d = datetime.datetime(1988, 5, 29, 20, 00) self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d))) for _ in range(300): d = d + datetime.timedelta(days=1) self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d))) #Make sure if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0: self.assertEqual(d.weekday(), 0) else: self.assertNotEqual(d.weekday(), 0)
def initialize(self): # Initialize the RDSE with a resolution; calculated from the data min and # max, the resolution is specific to the data stream. rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) # Initialize the timestamp encoder self.timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = (self.timestampEncoder.getWidth() + self.valueEncoder.getWidth()) self.sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "maxBoost": 1.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.tm = TemporalMemory( **{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048, ), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, }) if self.useLikelihood: learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100)
class DateEncoderTest(unittest.TestCase): """Unit tests for DateEncoder class""" def setUp(self): # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of # day # use of forced is not recommended, used here for readability, see scalar.py self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5) # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov, # 2010, 14:55 self._d = datetime.datetime(2010, 11, 4, 14, 55) self._bits = self._e.encode(self._d) # season is aaabbbcccddd (1 bit/month) # TODO should be <<3? # should be 000000000111 (centered on month 11 - Nov) seasonExpected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1] # week is MTWTFSS # contrary to localtime documentation, Monday = 0 (for python # datetime.datetime.timetuple() dayOfWeekExpected = [0, 0, 0, 1, 0, 0, 0] # not a weekend, so it should be "False" weekendExpected = [1, 0] # time of day has radius of 4 hours and w of 5 so each bit = 240/5 # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6 # should be 30 bits total (30 * 48 minutes = 24 hours) timeOfDayExpected = ([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) self._expected = numpy.array(seasonExpected + dayOfWeekExpected + weekendExpected + timeOfDayExpected, dtype=defaultDtype) def testDateEncoder(self): """creating date encoder instance""" self.assertSequenceEqual(self._e.getDescription(), [("season", 0), ("day of week", 12), ("weekend", 19), ("time of day", 21)]) self.assertTrue(numpy.array_equal(self._expected, self._bits)) def testMissingValues(self): """missing values""" mvOutput = self._e.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(sum(mvOutput), 0) def testDecoding(self): """decoding date""" decoded = self._e.decode(self._bits) (fieldsDict, _) = decoded self.assertEqual(len(fieldsDict), 4) (ranges, _) = fieldsDict['season'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [305, 305]) (ranges, _) = fieldsDict['time of day'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [14.4, 14.4]) (ranges, _) = fieldsDict['day of week'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [3, 3]) (ranges, _) = fieldsDict['weekend'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [0, 0]) def testTopDownCompute(self): """Check topDownCompute""" topDown = self._e.topDownCompute(self._bits) topDownValues = numpy.array([elem.value for elem in topDown]) errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8]) self.assertAlmostEqual(errs.max(), 0, 4) def testBucketIndexSupport(self): """Check bucket index support""" bucketIndices = self._e.getBucketIndices(self._d) topDown = self._e.getBucketInfo(bucketIndices) topDownValues = numpy.array([elem.value for elem in topDown]) errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8]) self.assertAlmostEqual(errs.max(), 0, 4) encodings = [] for x in topDown: encodings.extend(x.encoding) self.assertTrue(numpy.array_equal(encodings, self._expected)) def testHoliday(self): """look at holiday more carefully because of the smooth transition""" # use of forced is not recommended, used here for readability, see # scalar.py e = DateEncoder(holiday=5, forced=True) holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8") notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8") holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype="uint8") d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), notholiday)) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue(numpy.array_equal(e.encode(d), holiday2)) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue(numpy.array_equal(e.encode(d), holiday2)) def testHolidayMultiple(self): """look at holiday more carefully because of the smooth transition""" # use of forced is not recommended, used here for readability, see # scalar.py e = DateEncoder(holiday=5, forced=True, holidays=[(12, 25), (2018, 4, 1), (2017, 4, 16)]) holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8") notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8") d = datetime.datetime(2011, 12, 25, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2007, 12, 2, 4, 55) self.assertTrue(numpy.array_equal(e.encode(d), notholiday)) d = datetime.datetime(2018, 4, 1, 16, 10) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) d = datetime.datetime(2017, 4, 16, 16, 10) self.assertTrue(numpy.array_equal(e.encode(d), holiday)) def testWeekend(self): """Test weekend encoder""" # use of forced is not recommended, used here for readability, see scalar.py e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]), forced=True) mon = DateEncoder(customDays=(21, "Monday"), forced=True) e2 = DateEncoder(weekend=(21, 1), forced=True) d = datetime.datetime(1988, 5, 29, 20, 00) self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d))) for _ in range(300): d = d + datetime.timedelta(days=1) self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d))) #Make sure if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0: self.assertEqual(d.weekday(), 0) else: self.assertNotEqual(d.weekday(), 0) @unittest.skipUnless( capnp, "pycapnp is not installed, skipping serialization test.") def testReadWrite(self): originalTS = datetime.datetime(1997, 8, 29, 2, 14) originalValue = self._e.encode(originalTS) proto1 = DateEncoderProto.new_message() self._e.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = DateEncoderProto.read(f) encoder = DateEncoder.read(proto2) self.assertIsInstance(encoder, DateEncoder) self.assertEqual(encoder.width, self._e.width) self.assertEqual(encoder.weekendOffset, self._e.weekendOffset) self.assertEqual(encoder.timeOfDayOffset, self._e.timeOfDayOffset) self.assertEqual(encoder.seasonOffset, self._e.seasonOffset) self.assertEqual(encoder.dayOfWeekOffset, self._e.dayOfWeekOffset) self.assertIsInstance(encoder.customDaysEncoder, self._e.customDaysEncoder.__class__) self.assertIsInstance(encoder.dayOfWeekEncoder, self._e.dayOfWeekEncoder.__class__) self.assertIsInstance(encoder.seasonEncoder, self._e.seasonEncoder.__class__) self.assertIsInstance(encoder.timeOfDayEncoder, self._e.timeOfDayEncoder.__class__) self.assertIsInstance(encoder.weekendEncoder, self._e.weekendEncoder.__class__) self.assertTrue(numpy.array_equal(self._bits, encoder.encode(self._d))) self.assertTrue( numpy.array_equal(encoder.encode(originalTS), originalValue)) self.assertEqual(self._e.decode(encoder.encode(self._d)), encoder.decode(self._e.encode(self._d)))
class DateEncoderTest(unittest.TestCase): '''Unit tests for DateEncoder class''' def setUp(self): ##TODO: comment and code dont match - weekend?!! # 3 bits for season, 1 bit for day of week, 2 for weekend, 5 for time of day self._e = DateEncoder(season=3, dayOfWeek=1, weekend=3, timeOfDay=5) # in the middle of fall, thursday, not a weekend, afternoon - 4th Nov, 2010, 14:55 self._d = datetime.datetime(2010, 11, 4, 14, 55) self._bits = self._e.encode(self._d) # season is aaabbbcccddd (1 bit/month) # TODO should be <<3? # should be 000000000111 (centered on month 11 - Nov) seasonExpected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1] # week is MTWTFSS # contrary to localtime documentation, Monaday = 0 (for python # datetime.datetime.timetuple() dayOfWeekExpected = [0, 0, 0, 1, 0, 0, 0] # not a weekend, so it should be "False" weekendExpected = [1, 1, 1, 0, 0, 0] # time of day has radius of 4 hours and w of 5 so each bit = 240/5 min = 48min # 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6 # should be 30 bits total (30 * 48 minutes = 24 hours) timeOfDayExpected = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] self._expected = numpy.array(seasonExpected + dayOfWeekExpected + weekendExpected \ + timeOfDayExpected, dtype=defaultDtype) def testDateEncoder(self): '''creating date encoder instance''' self.assertEqual(self._e.getDescription(), [("season", 0), ("day of week", 12), ("weekend", 19), ("time of day", 25)]) self.assertTrue((self._expected == self._bits).all()) print self._e.pprintHeader() self._e.pprint(self._bits) print def testMissingValues(self): '''missing values''' mvOutput = self._e.encode(SENTINEL_VALUE_FOR_MISSING_DATA) self.assertEqual(sum(mvOutput), 0) def testDecoding(self): '''decoding date''' decoded = self._e.decode(self._bits) (fieldsDict, fieldNames) = decoded self.assertEqual(len(fieldsDict), 4) (ranges, desc) = fieldsDict['season'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [305, 305]) (ranges, desc) = fieldsDict['time of day'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [14.4, 14.4]) (ranges, desc) = fieldsDict['day of week'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [3, 3]) (ranges, desc) = fieldsDict['weekend'] self.assertEqual(len(ranges), 1) self.assertSequenceEqual(ranges[0], [0, 0]) print decoded print "decodedToStr=>", self._e.decodedToStr(decoded) def testTopDownCompute(self): '''Check topDownCompute''' topDown = self._e.topDownCompute(self._bits) topDownValues = numpy.array([elem.value for elem in topDown]) errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8]) self.assertAlmostEqual(errs.max(), 0, 4) def testBucketIndexSupport(self): '''Check bucket index support''' bucketIndices = self._e.getBucketIndices(self._d) print "bucket indices:", bucketIndices topDown = self._e.getBucketInfo(bucketIndices) topDownValues = numpy.array([elem.value for elem in topDown]) errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8]) self.assertAlmostEqual(errs.max(), 0, 4) encodings = [] for x in topDown: encodings.extend(x.encoding) self.assertTrue((encodings == self._expected).all()) def testHoliday(self): '''look at holiday more carefully because of the smooth transition''' e = DateEncoder(holiday=5) holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype='uint8') notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype='uint8') holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype='uint8') d = datetime.datetime(2010, 12, 25, 4, 55) self.assertTrue((e.encode(d) == holiday).all()) d = datetime.datetime(2008, 12, 27, 4, 55) self.assertTrue((e.encode(d) == notholiday).all()) d = datetime.datetime(1999, 12, 26, 8, 00) self.assertTrue((e.encode(d) == holiday2).all()) d = datetime.datetime(2011, 12, 24, 16, 00) self.assertTrue((e.encode(d) == holiday2).all()) def testWeekend(self): '''Test weekend encoder''' e = DateEncoder(customDays=(21, ["sat", "sun", "fri"])) mon = DateEncoder(customDays=(21, "Monday")) e2 = DateEncoder(weekend=(21, 1)) d = datetime.datetime(1988, 5, 29, 20, 00) self.assertTrue((e.encode(d) == e2.encode(d)).all()) for _ in range(300): d = d + datetime.timedelta(days=1) self.assertTrue((e.encode(d) == e2.encode(d)).all()) print mon.decode(mon.encode(d)) #Make sure if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0: self.assertEqual(d.weekday(), 0) else: self.assertFalse(d.weekday() == 0)
class FourSquareAnomalyDetector(): def __init__(self): self.lat = ScalarEncoder(name='latitude', w=3, n=100, minval=-90, maxval=90, periodic=False) self.long= ScalarEncoder(name='longitude', w=3, n=100, minval=-180, maxval=180, periodic=True) self.timeenc= DateEncoder(season=0, dayOfWeek=1, weekend=3, timeOfDay=5) self.likes = ScalarEncoder(name='likes', w=3, n=50, minval=0, maxval=100000, periodic=False) self.people = ScalarEncoder(name='numpeople', w=3, n=20, minval=0, maxval=100, periodic=False) self.categories = SDRCategoryEncoder(n=87, w=3, categoryList = None, name="cats", verbosity=0) self.run() def run(self): check1=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check2=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check3=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check4=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check5=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check6=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check7=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") check8=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe") list_of_unencoded_checkins=[check1,check2,check3,check4,check5,check6,check7,check8] list_of_encoded_checkins=[] for check in list_of_unencoded_checkins: print check list_of_encoded_checkins.append(self.encode(check)) print self.LastAnomalyScore(list_of_encoded_checkins) def createModel(self): return ModelFactory.create(model_params.MODEL_PARAMS) def encode(self, checkin): print checkin latenc=self.lat.encode(checkin.latitude) longenc=self.long.encode(checkin.longitude) timenc=self.timeenc.encode(checkin.time) likeenc=self.likes.encode(checkin.likes) peoplenc=self.people.encode(checkin.people) for cat in checkin.categories: try: catenc=numpy.logical_or(catenc,self.categories.encode(cat)) except: catenc=self.categories.encode(cat) checkinsdr=numpy.concatenate((latenc,longenc,timenc,likeenc,peoplenc,catenc)) print checkinsdr print type(checkinsdr) return checkinsdr def LastAnomalyScore(self, checkin_list): model = self.createModel() model.enableInference({'predictedField': 'checkin'}) last_anomaly = 0 for i, record in enumerate(checkin_list, start=1): modelInput = {"checkin": record} result = model.run(modelInput) anomalyScore = result.inferences['anomalyScore'] last_anomaly = anomalyScore return last_anomaly
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) scalarEncoder2 = RandomDistributedScalarEncoder( enParams["consumption2"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + scalarEncoder.getWidth() + scalarEncoder2.getWidth()) sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnCount"],), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True ) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME) for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. prediction = float(record[1]) prediction2 = float(record[2]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) consumptionBits2 = numpy.zeros(scalarEncoder2.getWidth()) # Now we call the encoders to create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(prediction, consumptionBits) scalarEncoder2.encodeIntoArray(prediction2, consumptionBits2) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits, consumptionBits2] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(prediction)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": prediction }, learn=True, infer=True ) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) # results.append([oneStep, oneStepConfidence * 100, None, None]) results.append([record[0], prediction, oneStep, oneStepConfidence * 100]) output.write(record[0], prediction, oneStep, oneStepConfidence * 100) output.close() return results
class NumentaTMLowLevelDetector(AnomalyDetector): """The 'numentaTM' detector, but not using the CLAModel or network API """ def __init__(self, *args, **kwargs): super(NumentaTMLowLevelDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.sp = None self.spOutput = None self.tm = None self.anomalyLikelihood = None # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): # Initialize the RDSE with a resolution; calculated from the data min and # max, the resolution is specific to the data stream. rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) # Initialize the timestamp encoder self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = (self.timestampEncoder.getWidth() + self.valueEncoder.getWidth()) self.sp = SpatialPooler(**{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.tm = TemporalMemory(**{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, }) if self.useLikelihood: learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100 ) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" # Encode the input data record self.valueEncoder.encodeIntoArray( inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray( inputData["timestamp"], self.encodedTimestamp) # Run the encoded data through the spatial pooler self.sp.compute(np.concatenate((self.encodedTimestamp, self.encodedValue,)), True, self.spOutput) # At the current state, the set of the region's active columns and the set # of columns that have previously-predicted cells are used to calculate the # raw anomaly score. activeColumns = set(self.spOutput.nonzero()[0].tolist()) prevPredictedColumns = set(self.tm.columnForCell(cell) for cell in self.tm.getPredictiveCells()) rawScore = (len(activeColumns - prevPredictedColumns) / float(len(activeColumns))) self.tm.compute(activeColumns) if self.useLikelihood: # Compute the log-likelihood score anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) return (logScore, rawScore) return (rawScore, rawScore)
from nupic.algorithms.sdr_classifier_factory import SDRClassifierFactory from nupic.algorithms.spatial_pooler import SpatialPooler from nupic.algorithms.temporal_memory import TemporalMemory from nupic.encoders.adaptive_scalar import AdaptiveScalarEncoder from nupic.encoders.date import DateEncoder from nupic.encoders.random_distributed_scalar \ import RandomDistributedScalarEncoder from nupic.encoders.scalar import ScalarEncoder from simhash_distributed_scalar import SimHashDistributedScalarEncoder from stats import mae, mape, nll, rmse # setup COL_WIDTH = 2048 timeOfDayEncoder = DateEncoder(timeOfDay=(21, 1)) weekendEncoder = DateEncoder(weekend=21) #consumeEncoder = RandomDistributedScalarEncoder( # n=400, # w=21, # resolution=0.4) # best, 0.88 original #consumeEncoder = ScalarEncoder( # n=400, # w=21, # minval=0, # maxval=100) #consumeEncoder = AdaptiveScalarEncoder( # n=400, # w=21) consumeEncoder = SimHashDistributedScalarEncoder(n=400, w=21, resolution=0.25) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() +
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + scalarEncoder.getWidth()) sp = SpatialPooler( # How large the input encoding will be. inputDimensions=(encodingWidth), # How many mini-columns will be in the Spatial Pooler. columnDimensions=(spParams["columnCount"]), # What percent of the columns"s receptive field is available for potential # synapses? potentialPct=spParams["potentialPct"], # This means that the input space has no topology. globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], # Roughly 2%, giving that there is only one inhibition area because we have # turned on globalInhibition (40 / 2048 = 0.0195) numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], # How quickly synapses grow and degrade. synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], # boostStrength controls the strength of boosting. Boosting encourages # efficient usage of SP columns. boostStrength=spParams["boostStrength"], # Random number generator seed. seed=spParams["seed"], # TODO: is this useful? # Determines if inputs at the beginning and end of an input dimension should # be considered neighbors when mapping columns to inputs. wrapAround=False ) tm = TemporalMemory( # Must be the same dimensions as the SP columnDimensions=(tmParams["columnCount"],), # How many cells in each mini-column. cellsPerColumn=tmParams["cellsPerColumn"], # A segment is active if it has >= activationThreshold connected synapses # that are active due to infActiveState activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], # TODO: This comes from the SP params, is this normal connectedPermanence=spParams["synPermConnected"], # Minimum number of active synapses for a segment to be considered during # search for the best-matching segments. minThreshold=tmParams["minThreshold"], # The max number of synapses added to a segment during learning maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) results.append([oneStep, oneStepConfidence * 100, None, None]) return results
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] timeOfDayEncoder = DateEncoder( timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) weekendEncoder = DateEncoder( weekend=enParams["timestamp_weekend"]["weekend"]) scalarEncoder = RandomDistributedScalarEncoder( enParams["consumption"]["resolution"]) encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() + scalarEncoder.getWidth()) sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnCount"],), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True ) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) classifier = SDRClassifierFactory.create() results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = reader.next() reader.next() reader.next() for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) weekendBits = numpy.zeros(weekendEncoder.getWidth()) consumptionBits = numpy.zeros(scalarEncoder.getWidth()) # Now we call the encoders to create bit representations for each value. timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encodeIntoArray(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. encoding = numpy.concatenate( [timeOfDayBits, weekendBits, consumptionBits] ) # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. activeColumns = numpy.zeros(spParams["columnCount"]) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) activeColumnIndices = numpy.nonzero(activeColumns)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() # Get the bucket info for this input value for classification. bucketIdx = scalarEncoder.getBucketIndices(consumption)[0] # Run classifier to translate active cells back to scalar value. classifierResult = classifier.compute( recordNum=count, patternNZ=activeCells, classification={ "bucketIdx": bucketIdx, "actValue": consumption }, learn=True, infer=True ) # Print the best prediction for 1 step out. oneStepConfidence, oneStep = sorted( zip(classifierResult[1], classifierResult["actualValues"]), reverse=True )[0] print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100)) results.append([oneStep, oneStepConfidence * 100, None, None]) return results
class DendriteDetector(AnomalyDetector): def initialize(self): # Keep track of value range for spatial anomaly detection. self.minVal = None self.maxVal = None # Time of day encoder self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49), name='time_enc') # RDSE encoder for the time series value. minResolution = 0.001 rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = self.inputMax + rangePadding numBuckets = 130 resolution = max(minResolution, (maxVal - minVal) / numBuckets) self.value_enc = RandomDistributedScalarEncoder(resolution=resolution, name='value_rdse') # Spatial Pooler. encodingWidth = self.timeOfDayEncoder.getWidth( ) + self.value_enc.getWidth() self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(2048, ), potentialPct=0.8, potentialRadius=encodingWidth, globalInhibition=1, numActiveColumnsPerInhArea=40, synPermInactiveDec=0.0005, synPermActiveInc=0.003, synPermConnected=0.2, boostStrength=0.0, seed=1956, wrapAround=True, ) self.tm = TemporalMemory( columnDimensions=(2048, ), cellsPerColumn=32, activationThreshold=20, initialPermanence=.5, # Increased to connectedPermanence. connectedPermanence=.5, minThreshold=13, maxNewSynapseCount=31, permanenceIncrement=0.04, permanenceDecrement=0.008, predictedSegmentDecrement=0.001, maxSegmentsPerCell=128, maxSynapsesPerSegment= 128, # Changed meaning. Also see connections.topology[2] seed=1993, ) # Initialize the anomaly likelihood object numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100, ) self.age = 0 def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def handleRecord(self, inputData): """ Argument inputData is {"value": instantaneous_value, "timestamp": pandas.Timestamp} Returns a tuple (anomalyScore, rawScore). Internally to NuPIC "anomalyScore" corresponds to "likelihood_score" and "rawScore" corresponds to "anomaly_score". Sorry about that. """ # Check for spatial anomalies and update min/max values. value = inputData["value"] spatialAnomaly = False if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE maxExpected = self.maxVal + tolerance minExpected = self.minVal - tolerance if value > maxExpected or value < minExpected: spatialAnomaly = True if self.maxVal is None or value > self.maxVal: self.maxVal = value if self.minVal is None or value < self.minVal: self.minVal = value # Run the HTM stack. First Encoders. timestamp = inputData["timestamp"] timeOfDayBits = np.zeros(self.timeOfDayEncoder.getWidth()) self.timeOfDayEncoder.encodeIntoArray(timestamp, timeOfDayBits) valueBits = np.zeros(self.value_enc.getWidth()) self.value_enc.encodeIntoArray(value, valueBits) encoding = np.concatenate([timeOfDayBits, valueBits]) # Spatial Pooler. activeColumns = np.zeros(self.sp.getNumColumns()) self.sp.compute(encoding, True, activeColumns) activeColumnIndices = np.nonzero(activeColumns)[0] # Temporal Memory and Anomaly. predictions = self.tm.getPredictiveCells() predictedColumns = list(self.tm.mapCellsToColumns(predictions).keys()) self.tm.compute(activeColumnIndices, learn=True) activeCells = self.tm.getActiveCells() rawScore = anomaly.computeRawAnomalyScore(activeColumnIndices, predictedColumns) # Compute log(anomaly likelihood) anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) finalScore = logScore = self.anomalyLikelihood.computeLogLikelihood( anomalyScore) if spatialAnomaly: finalScore = 1.0 if False: # Plot correlation of excitement versus compartmentalization. if self.age == 0: print("Correlation Plots ENABLED.") if False: start_age = 1000 end_age = 1800 else: start_age = 4000 end_age = 7260 if self.age == start_age: import correlation import random self.cor_samplers = [] sampled_cells = [] while len(self.cor_samplers) < 20: n = random.choice(xrange(self.tm.numberOfCells())) if n in sampled_cells: continue else: sampled_cells.append(n) neuron = self.tm.connections.dataForCell(n) if neuron._roots: c = correlation.CorrelationSampler(neuron._roots[0]) c.random_sample_points(100) self.cor_samplers.append(c) print("Created %d Correlation Samplers" % len(self.cor_samplers)) if self.age >= start_age: for smplr in self.cor_samplers: smplr.sample() if self.age == end_age: import matplotlib.pyplot as plt for idx, smplr in enumerate(self.cor_samplers): if smplr.num_samples == 0: print("No samples, plot not shown.") continue plt.figure("Sample %d" % idx) smplr.plot(period=64) # Different value! plt.show() if False: # Plot excitement of a typical detection on a dendrite. if self.age == 7265: #if self.age == 1800: import matplotlib.pyplot as plt import random from connections import SYN_CONNECTED_ACTIVE sampled_cells = set() for figure_num in xrange(40): plt.figure("(%d)" % figure_num) # Find an active cell to view. cell = None for attempt in range(100): event = random.choice(self.tm.activeEvents) cell = event.cell # This is an integer. if cell is not None and cell not in sampled_cells: break else: break sampled_cells.add(cell) cell = self.tm.connections.dataForCell(cell) # Organize the data. EPSPs = [] excitement = [] distance_to_root = 0 segment_offsets = {} branch = cell._roots[0] while True: segment_offsets[branch] = distance_to_root distance_to_root += len(branch._synapses) excitement.extend(branch.excitement) for syn in branch._synapses: if syn is None: EPSPs.append(0) else: EPSPs.append(syn.state == SYN_CONNECTED_ACTIVE) if branch.children: branch = random.choice(branch.children) else: break plt.plot( np.arange(distance_to_root), EPSPs, 'r', np.arange(distance_to_root), excitement, 'b', ) plt.title( "Dendrite Activation\n Horizontal line is activation threshold, Vertical lines are segment bifurcations" ) plt.xlabel("Distance along Dendrite", ) plt.ylabel("EPSPs are Red, Excitement is Blue") # Show lines where the excitement crosses thresholds. plt.axhline(20, color='k') # Hard coded parameter value. for offset in segment_offsets.values(): if offset != 0: plt.axvline(offset, color='k') print("\nShowing %d excitement plots." % len(sampled_cells)) plt.show() self.age += 1 return (finalScore, rawScore)
def testWeekend(self): '''Test weekend encoder''' e = DateEncoder(customDays=(21, ["sat", "sun", "fri"])) mon = DateEncoder(customDays=(21, "Monday")) e2 = DateEncoder(weekend=(21, 1)) d = datetime.datetime(1988, 5, 29, 20, 00) self.assertTrue((e.encode(d) == e2.encode(d)).all()) for _ in range(300): d = d + datetime.timedelta(days=1) self.assertTrue((e.encode(d) == e2.encode(d)).all()) print mon.decode(mon.encode(d)) #Make sure if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0: self.assertEqual(d.weekday(), 0) else: self.assertFalse(d.weekday() == 0)
print "3 = ", rdse.encode(3) print "4 = ", rdse.encode(4) print "5 = ", rdse.encode(5) print print "100 = ", rdse.encode(100) print "100000 =", rdse.encode(1000) import datetime from nupic.encoders.date import DateEncoder DateEncoder? de = DateEncoder(season=5) now = datetime.datetime.strptime("2014-05-02 13:08:58", "%Y-%m-%d %H:%M:%S") print "now = ", de.encode(now) nextMonth = datetime.datetime.strptime("2014-06-02 13:08:58", "%Y-%m-%d %H:%M:%S") print "next month =", de.encode(nextMonth) xmas = datetime.datetime.strptime("2014-12-25 13:08:58", "%Y-%m-%d %H:%M:%S") print "xmas = ", de.encode(xmas) from nupic.encoders.category import CategoryEncoder categories = ("cat", "dog", "monkey", "slow loris") encoder = CategoryEncoder(w=3, categoryList=categories, forced=True) cat = encoder.encode("cat") dog = encoder.encode("dog")