def testSerialization5(self): # This test verifies that saveToFile() and loadFromFile with JSON on Predictor are accessible from Python. inputData = SDR( 1000 ).randomize( 0.02 ) categories = { 'A': 0, 'B': 1, 'C': 2, 'D': 3 } c1 = Predictor( steps=[1], alpha=1.0 ) c1.learn(1, inputData, categories['B'] ) file = "Predictor_test_save.JSON" c1.saveToFile(file, "JSON") c2 = Predictor( steps=[1], alpha=1.0 ) c2.loadFromFile(file, "JSON") os.remove(file)
def testMultistepSingleValue(self): classifier = Predictor(steps=[1, 2]) inp = SDR(10) inp.randomize(.2) for recordNum in range(10): classifier.learn(recordNum, inp, 0) retval = classifier.infer(10, inp) # Should have a probability of 100% for that bucket. self.assertEqual(retval[1], [1.]) self.assertEqual(retval[2], [1.])
def testSingleValue0Steps(self): """Send same value 10 times and expect high likelihood for prediction using 0-step ahead prediction""" pred = Predictor(steps=[0], alpha=0.5) # Enough times to perform Inference and learn associations inp = SDR(10) inp.randomize(.2) for recordNum in range(10): pred.learn(recordNum, inp, 2) retval = pred.infer(10, inp) self.assertGreater(retval[0][2], 0.9)
def testComputeComplex(self): c = Predictor([1], 1.0) inp = SDR(100) inp.sparse = [1, 5, 9] c.learn(recordNum=0, pattern=inp, classification=4,) inp.sparse = [0, 6, 9, 11] c.learn(recordNum=1, pattern=inp, classification=5,) inp.sparse = [6, 9] c.learn(recordNum=2, pattern=inp, classification=5,) inp.sparse = [1, 5, 9] c.learn(recordNum=3, pattern=inp, classification=4,) inp.sparse = [1, 5, 9] result = c.infer(pattern=inp) self.assertSetEqual(set(result.keys()), set([1])) self.assertEqual(len(result[1]), 6) self.assertAlmostEqual(result[1][0], 0.034234, places=5) self.assertAlmostEqual(result[1][1], 0.034234, places=5) self.assertAlmostEqual(result[1][2], 0.034234, places=5) self.assertAlmostEqual(result[1][3], 0.034234, places=5) self.assertAlmostEqual(result[1][4], 0.093058, places=5) self.assertAlmostEqual(result[1][5], 0.770004, places=5)
def testMultistepSimple(self): classifier = Predictor(steps=[1, 2], alpha=10.0) inp = SDR(10) for i in range(100): inp.sparse = [i % 10] classifier.learn(recordNum=i, pattern=inp, classification=(i % 10)) retval = classifier.infer(99, inp) self.assertGreater(retval[1][0], 0.99) for i in range(1, 10): self.assertLess(retval[1][i], 0.01) self.assertGreater(retval[2][1], 0.99) for i in [0] + list(range(2, 10)): self.assertLess(retval[2][i], 0.01)
def testComputeInferOrLearnOnly(self): c = Predictor([1], 1.0) inp = SDR(10) inp.randomize(.3) # learn only c.infer(recordNum=0, pattern=inp) # Don't crash with not enough training data. c.learn(recordNum=0, pattern=inp, classification=4) c.infer(recordNum=1, pattern=inp) # Don't crash with not enough training data. c.learn(recordNum=2, pattern=inp, classification=4) c.learn(recordNum=3, pattern=inp, classification=4) # infer only retval1 = c.infer(recordNum=5, pattern=inp) retval2 = c.infer(recordNum=6, pattern=inp) self.assertSequenceEqual(list(retval1[1]), list(retval2[1]))
def testComputeInferOrLearnOnly(self): c = Predictor([1], 1.0) inp = SDR(10) inp.randomize( .3 ) # learn only prediction = c.infer(pattern=inp)[1] self.assertTrue(prediction == []) # not enough training data -> [] c.learn(recordNum=0, pattern=inp, classification=4) self.assertTrue(c.infer(pattern=inp)[1] == []) # not enough training data. c.learn(recordNum=2, pattern=inp, classification=4) c.learn(recordNum=3, pattern=inp, classification=4) self.assertTrue(c.infer(pattern=inp)[1] != []) # Don't crash with enough training data. # infer only retval1 = c.infer(pattern=inp) retval2 = c.infer(pattern=inp) self.assertSequenceEqual(list(retval1[1]), list(retval2[1]))
def testExampleUsage(self): # Make a random SDR and associate it with a category. inputData = SDR(1000).randomize(0.02) categories = {'A': 0, 'B': 1, 'C': 2, 'D': 3} clsr = Classifier() clsr.learn(inputData, categories['B']) assert (numpy.argmax(clsr.infer(inputData)) == categories['B']) # Estimate a scalar value. The Classifier only accepts categories, so # put real valued inputs into bins (AKA buckets) by subtracting the # minimum value and dividing by a resolution. scalar = 567.8 minimum = 500 resolution = 10 clsr.learn(inputData, int((scalar - minimum) / resolution)) assert (numpy.argmax(clsr.infer(inputData)) * resolution + minimum == 560) # Predict 1 and 2 time steps into the future. # Make a sequence of 4 random SDRs, each SDR has 1000 bits and 2% sparsity. sequence = [SDR(1000).randomize(0.02) for i in range(4)] # Make category labels for the sequence. labels = [4, 5, 6, 7] # Make a Predictor and train it. pred = Predictor([1, 2]) pred.learn(0, sequence[0], labels[0]) pred.learn(1, sequence[1], labels[1]) pred.learn(2, sequence[2], labels[2]) pred.learn(3, sequence[3], labels[3]) # Give the predictor partial information, and make predictions # about the future. pred.reset() A = pred.infer(0, sequence[0]) assert (numpy.argmax(A[1]) == labels[1]) assert (numpy.argmax(A[2]) == labels[2]) B = pred.infer(1, sequence[1]) assert (numpy.argmax(B[1]) == labels[2]) assert (numpy.argmax(B[2]) == labels[3])
def testMultiStepPredictions(self): """ Test multi-step predictions We train the 0-step and the 1-step classifiers simultaneously on data stream (SDR1, bucketIdx0) (SDR2, bucketIdx1) (SDR1, bucketIdx0) (SDR2, bucketIdx1) ... We intend the 0-step classifier to learn the associations: SDR1 => bucketIdx 0 SDR2 => bucketIdx 1 and the 1-step classifier to learn the associations SDR1 => bucketIdx 1 SDR2 => bucketIdx 0 """ c = Predictor([0, 1], 1.0) SDR1 = SDR(10) SDR1.sparse = [1, 3, 5] SDR2 = SDR(10) SDR2.sparse = [2, 4, 6] recordNum = 0 for _ in range(100): c.learn(recordNum, pattern=SDR1, classification=0) recordNum += 1 c.learn(recordNum, pattern=SDR2, classification=1) recordNum += 1 result1 = c.infer(recordNum, SDR1) result2 = c.infer(recordNum, SDR2) self.assertAlmostEqual(result1[0][0], 1.0, places=1) self.assertAlmostEqual(result1[0][1], 0.0, places=1) self.assertAlmostEqual(result2[0][0], 0.0, places=1) self.assertAlmostEqual(result2[0][1], 1.0, places=1)
def testSerialization2(self): # This test verifies that pickle works for pickle of a Predictor SDR1 = SDR(15); SDR1.sparse = [1, 5, 9] SDR2 = SDR(15); SDR2.sparse = [0, 6, 9, 11] SDR3 = SDR(15); SDR3.sparse = [6, 9] SDR4 = SDR(15); SDR4.sparse = [1, 5, 9] c1 = Predictor( steps=[1], alpha=1.0 ) c1.learn(1, pattern=SDR1, classification=4) c1.learn(2, pattern=SDR2, classification=5) c1.learn(3, pattern=SDR3, classification=5) c1.learn(4, pattern=SDR4, classification=4) c1.learn(5, pattern=SDR4, classification=4) serialized = pickle.dumps(c1) c2 = pickle.loads(serialized) result1 = c1.infer(SDR1) result2 = c2.infer(SDR1) #print(" testSerialization2 result: %.6f, %.6f, %.6f, %.6f, %.6f, %.6f "%( result1[1][0], result1[1][1], result1[1][2], result1[1][3], result1[1][4], result1[1][5])); self.assertEqual(len(result1[1]), 6) self.assertEqual(len(result1[1]), len(result2[1])) for i in range(len(result1[1])): self.assertAlmostEqual(result1[1][i], result2[1][i], places=5)
def testMissingRecords(self): """ Test missing record support. Here, we intend the classifier to learn the associations: [1,3,5] => bucketIdx 1 [2,4,6] => bucketIdx 2 [7,8,9] => don"t care If it doesn't pay attention to the recordNums in this test, it will learn the wrong associations. """ c = Predictor(steps=[1], alpha=1.0) recordNum = 0 inp = SDR(10) inp.sparse = [1, 3, 5] c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 inp.sparse = [2, 4, 6] c.learn(recordNum=recordNum, pattern=inp, classification=1) recordNum += 1 inp.sparse = [1, 3, 5] c.learn(recordNum=recordNum, pattern=inp, classification=2) recordNum += 1 inp.sparse = [2, 4, 6] c.learn(recordNum=recordNum, pattern=inp, classification=1) recordNum += 1 # ----------------------------------------------------------------------- # At this point, we should have learned [1,3,5] => bucket 1 # [2,4,6] => bucket 2 inp.sparse = [1, 3, 5] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=2) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertGreater(result[1][1], 0.9) self.assertLess(result[1][2], 0.1) inp.sparse = [2, 4, 6] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=1) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertLess(result[1][1], 0.1) self.assertGreater(result[1][2], 0.9) # ----------------------------------------------------------------------- # Feed in records that skip and make sure they don"t mess up what we # learned # If we skip a record, the CLA should NOT learn that [2,4,6] from # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [1, 3, 5] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertGreater(result[1][1], 0.9) self.assertLess(result[1][2], 0.1) # If we skip a record, the CLA should NOT learn that [1,3,5] from # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [2, 4, 6] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertLess(result[1][1], 0.1) self.assertGreater(result[1][2], 0.9) # If we skip a record, the CLA should NOT learn that [2,4,6] from # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [1, 3, 5] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertGreater(result[1][1], 0.9) self.assertLess(result[1][2], 0.1)
def main(parameters=default_parameters, argv=None, verbose=True): if verbose: import pprint print("Parameters:") pprint.pprint(parameters, indent=4) print("") # Read the input file. records = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = next(reader) next(reader) next(reader) for record in reader: records.append(record) # Make the Encoders. These will convert input data into binary representations. dateEncoder = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] scalarEncoder = RDSE( scalarEncoderParams ) encodingWidth = (dateEncoder.size + scalarEncoder.size) enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. spParams = parameters["sp"] sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) sp_info = Metrics( sp.getColumnDimensions(), 999999999 ) tmParams = parameters["tm"] tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) tm_info = Metrics( [tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB anParams = parameters["anomaly"]["likelihood"] probationaryPeriod = int(math.floor(float(anParams["probationaryPct"])*len(records))) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood(learningPeriod= learningPeriod, estimationSamples= probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) predictor_resolution = 1 # Iterate through every datum in the dataset, record the inputs & outputs. inputs = [] anomaly = [] anomalyProb = [] predictions = {1: [], 5: []} for count, record in enumerate(records): # Convert date string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) inputs.append( consumption ) # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = dateEncoder.encode(dateString) consumptionBits = scalarEncoder.encode(consumption) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR( encodingWidth ).concatenate([consumptionBits, dateBits]) enc_info.addData( encoding ) # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR( sp.getColumnDimensions() ) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) sp_info.addData( activeColumns ) # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumns, learn=True) tm_info.addData( tm.getActiveCells().flatten() ) # Predict what will happen, and then train the predictor based on what just happened. pdf = predictor.infer( count, tm.getActiveCells() ) for n in (1, 5): if pdf[n]: predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution ) else: predictions[n].append(float('nan')) predictor.learn( count, tm.getActiveCells(), int(consumption / predictor_resolution)) anomalyLikelihood = anomaly_history.anomalyProbability( consumption, tm.anomaly ) anomaly.append( tm.anomaly ) anomalyProb.append( anomalyLikelihood ) # Print information & statistics about the state of the HTM. print("Encoded Input", enc_info) print("") print("Spatial Pooler Mini-Columns", sp_info) print(str(sp)) print("") print("Temporal Memory Cells", tm_info) print(str(tm)) print("") # Shift the predictions so that they are aligned with the input they predict. for n_steps, pred_list in predictions.items(): for x in range(n_steps): pred_list.insert(0, float('nan')) pred_list.pop() # Calculate the predictive accuracy, Root-Mean-Squared accuracy = {1: 0, 5: 0} accuracy_samples = {1: 0, 5: 0} for idx, inp in enumerate(inputs): for n in predictions: # For each [N]umber of time steps ahead which was predicted. val = predictions[n][ idx ] if not math.isnan(val): accuracy[n] += (inp - val) ** 2 accuracy_samples[n] += 1 for n in sorted(predictions): accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5 print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n]) # Show info about the anomaly (mean & std) print("Anomaly Mean", np.mean(anomaly)) print("Anomaly Std ", np.std(anomaly)) # Plot the Predictions and Anomalies. if verbose: try: import matplotlib.pyplot as plt except: print("WARNING: failed to import matplotlib, plots cannot be shown.") return -accuracy[5] plt.subplot(2,1,1) plt.title("Predictions") plt.xlabel("Time") plt.ylabel("Power Consumption") plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), predictions[1], 'blue', np.arange(len(inputs)), predictions[5], 'green',) plt.legend(labels=('Input', '1 Step Prediction, Shifted 1 step', '5 Step Prediction, Shifted 5 steps')) plt.subplot(2,1,2) plt.title("Anomaly Score") plt.xlabel("Time") plt.ylabel("Power Consumption") inputs = np.array(inputs) / max(inputs) plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), anomaly, 'blue',) plt.legend(labels=('Input', 'Anomaly Score')) plt.show() return -accuracy[5]
tm.compute(activeColumns, learn=True) pdf = predictor.infer(tm.getActiveCells()) for n in (1, 5): if pdf[n]: predictions[n].append(np.argmax(pdf[n]) * predictor_resolution) else: predictions[n].append(float('nan')) anomalyLikelihood = anomaly_history.anomalyProbability(temp, tm.anomaly) anomaly.append(tm.anomaly) anomalyProb.append(anomalyLikelihood) predictor.learn(count, tm.getActiveCells(), int(temp / predictor_resolution)) print(count, " ", dateObject, " ", temp) time.sleep(5) plt.subplot(2, 1, 1) plt.plot(inputs, color='green', linestyle="solid", linewidth=2.0, label="Temp") plt.plot(predictions[1], color='red', linestyle="dotted", label="Temp Pred Next Step") plt.ylim(45.0, 65.0) plt.title("Prediction", fontsize=18) plt.legend(loc='lower left', fontsize=14) plt.subplot(2, 1, 2) plt.plot(anomaly, color='skyblue', linestyle="dotted", label="Anomaly")