def testPredictionMultipleCategories(self): """ Test the distribution of predictions. Here, we intend the classifier to learn the associations: [1,3,5] => bucketIdx 0 & 1 [2,4,6] => bucketIdx 2 & 3 The classifier should get the distribution almost right given enough repetitions and a small learning rate """ c = Classifier(0.001) SDR1 = SDR(10) SDR1.sparse = [1, 3, 5] SDR2 = SDR(10) SDR2.sparse = [2, 4, 6] random.seed(42) for _ in range(5000): c.learn(pattern=SDR1, classification=[0, 1]) c.learn(pattern=SDR2, classification=[2, 3]) result1 = c.infer(pattern=SDR1) self.assertAlmostEqual(result1[0], 0.5, places=1) self.assertAlmostEqual(result1[1], 0.5, places=1) result2 = c.infer(pattern=SDR2) self.assertAlmostEqual(result2[2], 0.5, places=1) self.assertAlmostEqual(result2[3], 0.5, places=1)
def main(parameters=default_parameters, argv=None, verbose=True): # Load data. train_labels, train_images, test_labels, test_images = load_ds( 'mnist_784', 10000, shape=[28, 28]) # HTM: ~95.6% #train_labels, train_images, test_labels, test_images = load_ds('Fashion-MNIST', 10000, shape=[28,28]) # HTM baseline: ~83% training_data = list(zip(train_images, train_labels)) test_data = list(zip(test_images, test_labels)) random.shuffle(training_data) # Setup the AI. enc = SDR(train_images[0].shape) sp = SpatialPooler( inputDimensions=enc.dimensions, columnDimensions=parameters['columnDimensions'], potentialRadius=parameters['potentialRadius'], potentialPct=parameters['potentialPct'], globalInhibition=True, localAreaDensity=parameters['localAreaDensity'], stimulusThreshold=int(round(parameters['stimulusThreshold'])), synPermInactiveDec=parameters['synPermInactiveDec'], synPermActiveInc=parameters['synPermActiveInc'], synPermConnected=parameters['synPermConnected'], minPctOverlapDutyCycle=parameters['minPctOverlapDutyCycle'], dutyCyclePeriod=int(round(parameters['dutyCyclePeriod'])), boostStrength=parameters['boostStrength'], seed= 0, # this is important, 0="random" seed which changes on each invocation spVerbosity=99, wrapAround=False) columns = SDR(sp.getColumnDimensions()) columns_stats = Metrics(columns, 99999999) sdrc = Classifier() # Training Loop for i in range(len(train_images)): img, lbl = training_data[i] encode(img, enc) sp.compute(enc, True, columns) sdrc.learn( columns, lbl ) #TODO SDRClassifier could accept string as a label, currently must be int print(str(sp)) print(str(columns_stats)) # Testing Loop score = 0 for img, lbl in test_data: encode(img, enc) sp.compute(enc, False, columns) if lbl == np.argmax(sdrc.infer(columns)): score += 1 score = score / len(test_data) print('Score:', 100 * score, '%') return score
def testSingleValue(self): """Send same value 10 times and expect high likelihood for prediction.""" classifier = Classifier(alpha=0.5) # Enough times to perform inference and learn associations inp = SDR(10) inp.randomize(.2) for recordNum in range(10): classifier.learn(inp, 2) retval = classifier.infer(inp) self.assertGreater(retval[2], 0.9)
def testSerialization4(self): # This test verifies that saveToFile() and loadFromFile with BINARY on Classifier are accessible from Python. inputData = SDR( 1000 ).randomize( 0.02 ) categories = { 'A': 0, 'B': 1, 'C': 2, 'D': 3 } c1 = Classifier() c1.learn(inputData, categories['B'] ) file = "Classifier_test_save.BINARY" c1.saveToFile(file) c2 = Classifier() c2.loadFromFile(file) result2 = c2.infer( inputData ) self.assertTrue(numpy.argmax( result2 ) == categories['B']) os.remove(file)
def testPredictionDistributionOverlap(self): """ Test the distribution of predictions with overlapping input SDRs Here, we intend the classifier to learn the associations: SDR1 => bucketIdx 0 (30%) => bucketIdx 1 (30%) => bucketIdx 2 (40%) SDR2 => bucketIdx 1 (50%) => bucketIdx 3 (50%) SDR1 and SDR2 has 10% overlaps (2 bits out of 20) The classifier should get the distribution almost right despite the overlap """ c = Classifier(0.0005) # generate 2 SDRs with 2 shared bits SDR1 = SDR(100) SDR2 = SDR(100) SDR1.randomize(.20) SDR2.setSDR(SDR1) SDR2.addNoise(.9) random.seed(42) for _ in range(5000): randomNumber = random.random() if randomNumber < 0.3: bucketIdx = 0 elif randomNumber < 0.6: bucketIdx = 1 else: bucketIdx = 2 c.learn(SDR1, bucketIdx) randomNumber = random.random() if randomNumber < 0.5: bucketIdx = 1 else: bucketIdx = 3 c.learn(SDR2, bucketIdx) result1 = c.infer(SDR1) self.assertAlmostEqual(result1[0], 0.3, places=1) self.assertAlmostEqual(result1[1], 0.3, places=1) self.assertAlmostEqual(result1[2], 0.4, places=1) result2 = c.infer(SDR2) self.assertAlmostEqual(result2[1], 0.5, places=1) self.assertAlmostEqual(result2[3], 0.5, places=1)
def testPredictionDistribution(self): """ Test the distribution of predictions. Here, we intend the classifier to learn the associations: [1,3,5] => bucketIdx 0 (30%) => bucketIdx 1 (30%) => bucketIdx 2 (40%) [2,4,6] => bucketIdx 1 (50%) => bucketIdx 3 (50%) The classifier should get the distribution almost right given enough repetitions and a small learning rate """ c = Classifier(alpha=0.001) SDR1 = SDR(10) SDR1.sparse = [1, 3, 5] SDR2 = SDR(10) SDR2.sparse = [2, 4, 6] random.seed(42) for _ in range(5000): randomNumber = random.random() if randomNumber < 0.3: bucketIdx = 0 elif randomNumber < 0.6: bucketIdx = 1 else: bucketIdx = 2 c.learn(pattern=SDR1, classification=bucketIdx) randomNumber = random.random() if randomNumber < 0.5: bucketIdx = 1 else: bucketIdx = 3 c.learn(pattern=SDR2, classification=bucketIdx) result1 = c.infer(pattern=SDR1) self.assertAlmostEqual(result1[0], 0.3, places=1) self.assertAlmostEqual(result1[1], 0.3, places=1) self.assertAlmostEqual(result1[2], 0.4, places=1) result2 = c.infer(pattern=SDR2) self.assertAlmostEqual(result2[1], 0.5, places=1) self.assertAlmostEqual(result2[3], 0.5, places=1)
def trainNumDecoder(encoder, mini, maxi, noise): clsr = Classifier() # loop from smallest to largest number 10 times for y in range(10): for x in range(mini, maxi + 1): # encode current number encoded = encoder.encode(x) #corruptSDR(encoded, noise) # associate encoding to 'class' # but classes here are numbers. clsr.learn(encoded, x) # test every number to see if we learned it for x in range(mini, maxi + 1): encoded = encoder.encode(x) out = decode(clsr, encoded) if out != x: print("error in decode training:", out, "->", x) return clsr
def testExampleUsage(self): # Make a random SDR and associate it with a category. inputData = SDR(1000).randomize(0.02) categories = {'A': 0, 'B': 1, 'C': 2, 'D': 3} clsr = Classifier() clsr.learn(inputData, categories['B']) assert (numpy.argmax(clsr.infer(inputData)) == categories['B']) # Estimate a scalar value. The Classifier only accepts categories, so # put real valued inputs into bins (AKA buckets) by subtracting the # minimum value and dividing by a resolution. scalar = 567.8 minimum = 500 resolution = 10 clsr.learn(inputData, int((scalar - minimum) / resolution)) assert (numpy.argmax(clsr.infer(inputData)) * resolution + minimum == 560) # Predict 1 and 2 time steps into the future. # Make a sequence of 4 random SDRs, each SDR has 1000 bits and 2% sparsity. sequence = [SDR(1000).randomize(0.02) for i in range(4)] # Make category labels for the sequence. labels = [4, 5, 6, 7] # Make a Predictor and train it. pred = Predictor([1, 2]) pred.learn(0, sequence[0], labels[0]) pred.learn(1, sequence[1], labels[1]) pred.learn(2, sequence[2], labels[2]) pred.learn(3, sequence[3], labels[3]) # Give the predictor partial information, and make predictions # about the future. pred.reset() A = pred.infer(0, sequence[0]) assert (numpy.argmax(A[1]) == labels[1]) assert (numpy.argmax(A[2]) == labels[2]) B = pred.infer(1, sequence[1]) assert (numpy.argmax(B[1]) == labels[2]) assert (numpy.argmax(B[2]) == labels[3])
def testOverlapPattern(self): classifier = Classifier(alpha=10.0) inp = SDR(10) inp.randomize(.2) classifier.learn(pattern=inp, classification=9) classifier.learn(pattern=inp, classification=9) inp.addNoise(.5) retval = classifier.infer(pattern=inp) # Since overlap - should be previous with high likelihood self.assertGreater(retval[9], 0.9) classifier.learn(pattern=inp, classification=2) classifier.learn(pattern=inp, classification=2) # Second example: now new value should be more probable than old retval = classifier.infer(pattern=inp) self.assertGreater(retval[2], retval[9])
def testSerialization1(self): # This test verifies that pickle works for pickle of a Classifier SDR1 = SDR(15); SDR1.sparse = [1, 5, 9] SDR2 = SDR(15); SDR2.sparse = [0, 6, 9, 11] SDR3 = SDR(15); SDR3.sparse = [6, 9] SDR4 = SDR(15); SDR4.sparse = [1, 5, 9] c1 = Classifier() c1.learn(pattern=SDR1, classification=4) c1.learn(pattern=SDR2, classification=5) c1.learn(pattern=SDR3, classification=5) c1.learn(pattern=SDR4, classification=4) c1.learn(pattern=SDR4, classification=4) serialized = pickle.dumps(c1) c2 = pickle.loads(serialized) result1 = c1.infer(SDR1) result2 = c2.infer(SDR1) #print(" testSerialization1 result: %.6f, %.6f, %.6f, %.6f, %.6f, %.6f "%( result1[0], result1[1], result1[2], result1[3], result1[4], result1[5])); self.assertEqual(len(result1), 6) self.assertAlmostEqual(result1[0], 0.166344, places=5) self.assertAlmostEqual(result1[1], 0.166344, places=5) self.assertAlmostEqual(result1[2], 0.166344, places=5) self.assertAlmostEqual(result1[3], 0.166344, places=5) self.assertAlmostEqual(result1[4], 0.167847, places=5) self.assertAlmostEqual(result1[5], 0.166777, places=5) self.assertEqual(len(result1), len(result2)) for i in range(len(result1)): self.assertAlmostEqual(result1[i], result2[i], places=5)
def testSerialization3(self): # This test verifies that saveToFile() and loadFromFile() on Classifier are accessable from Python. SDR1 = SDR(15); SDR1.sparse = [1, 5, 9] SDR2 = SDR(15); SDR2.sparse = [0, 6, 9, 11] SDR3 = SDR(15); SDR3.sparse = [6, 9] SDR4 = SDR(15); SDR4.sparse = [1, 5, 9] c1 = Classifier() c1.learn(pattern=SDR1, classification=4) c1.learn(pattern=SDR2, classification=5) c1.learn(pattern=SDR3, classification=5) c1.learn(pattern=SDR4, classification=4) # The Predictor now has some data in it, try serialization. file = "Classifier_test_save.XML" c1.saveToFile(file, "XML") c2 = Classifier() c2.loadFromFile(file, "XML") os.remove(file) result1 = c1.infer(SDR1) result2 = c2.infer(SDR1) self.assertEqual(len(result1), len(result2)) for i in range(len(result1)): self.assertAlmostEqual(result1[i], result2[i], places=5)
def testPredictionDistributionContinuousLearning(self): """ Test continuous learning First, we intend the classifier to learn the associations: SDR1 => bucketIdx 0 (30%) => bucketIdx 1 (30%) => bucketIdx 2 (40%) SDR2 => bucketIdx 1 (50%) => bucketIdx 3 (50%) After 20000 iterations, we change the association to SDR1 => bucketIdx 0 (30%) => bucketIdx 1 (20%) => bucketIdx 3 (40%) No further training for SDR2 The classifier should adapt continuously and learn new associations for SDR1, but at the same time remember the old association for SDR2 """ c = Classifier(0.001) SDR1 = SDR(10) SDR1.sparse = [1, 3, 5] SDR2 = SDR(10) SDR2.sparse = [2, 4, 6] random.seed(42) for _ in range(10000): randomNumber = random.random() if randomNumber < 0.3: bucketIdx = 0 elif randomNumber < 0.6: bucketIdx = 1 else: bucketIdx = 2 c.learn(SDR1, bucketIdx) randomNumber = random.random() if randomNumber < 0.5: bucketIdx = 1 else: bucketIdx = 3 c.learn(SDR2, bucketIdx) result1 = c.infer(SDR1) self.assertAlmostEqual(result1[0], 0.3, places=1) self.assertAlmostEqual(result1[1], 0.3, places=1) self.assertAlmostEqual(result1[2], 0.4, places=1) result2 = c.infer(SDR2) self.assertAlmostEqual(result2[1], 0.5, places=1) self.assertAlmostEqual(result2[3], 0.5, places=1) for _ in range(20000): randomNumber = random.random() if randomNumber < 0.3: bucketIdx = 0 elif randomNumber < 0.6: bucketIdx = 1 else: bucketIdx = 3 c.learn(SDR1, bucketIdx) result1new = c.infer(SDR1) self.assertAlmostEqual(result1new[0], 0.3, places=1) self.assertAlmostEqual(result1new[1], 0.3, places=1) self.assertAlmostEqual(result1new[3], 0.4, places=1) result2new = c.infer(SDR2) self.assertSequenceEqual(list(result2), list(result2new))
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default=os.path.join(os.path.dirname(__file__), '..', '..', '..', 'build', 'ThirdParty', 'mnist_data', 'mnist-src')) args = parser.parse_args(args=argv) # Load data. train_labels, train_images, test_labels, test_images = load_mnist( args.data_dir) training_data = list(zip(train_images, train_labels)) test_data = list(zip(test_images, test_labels)) random.shuffle(training_data) random.shuffle(test_data) # Setup the AI. enc = SDR((train_images[0].shape)) sp = SpatialPooler( inputDimensions=enc.dimensions, columnDimensions=parameters['columnDimensions'], potentialRadius=parameters['potentialRadius'], potentialPct=parameters['potentialPct'], globalInhibition=True, localAreaDensity=parameters['localAreaDensity'], stimulusThreshold=int(round(parameters['stimulusThreshold'])), synPermInactiveDec=parameters['synPermInactiveDec'], synPermActiveInc=parameters['synPermActiveInc'], synPermConnected=parameters['synPermConnected'], minPctOverlapDutyCycle=parameters['minPctOverlapDutyCycle'], dutyCyclePeriod=int(round(parameters['dutyCyclePeriod'])), boostStrength=parameters['boostStrength'], seed=0, spVerbosity=99, wrapAround=False) columns = SDR(sp.getColumnDimensions()) columns_stats = Metrics(columns, 99999999) sdrc = Classifier() # Training Loop for i in range(len(train_images)): img, lbl = random.choice(training_data) enc.dense = img >= np.mean(img) # Convert greyscale image to binary. sp.compute(enc, True, columns) sdrc.learn(columns, lbl) print(str(sp)) print(str(columns_stats)) # Testing Loop score = 0 for img, lbl in test_data: enc.dense = img >= np.mean(img) # Convert greyscale image to binary. sp.compute(enc, False, columns) if lbl == np.argmax(sdrc.infer(columns)): score += 1 score = score / len(test_data) print('Score:', 100 * score, '%') return score