Python Classifier.learn Examples, htm.bindings.algorithms.Classifier.learn Python Examples

Example #1

0

Show file

    def testPredictionMultipleCategories(self):
        """ Test the distribution of predictions.

    Here, we intend the classifier to learn the associations:
      [1,3,5] => bucketIdx 0 & 1
      [2,4,6] => bucketIdx 2 & 3

    The classifier should get the distribution almost right given enough
    repetitions and a small learning rate
    """
        c = Classifier(0.001)

        SDR1 = SDR(10)
        SDR1.sparse = [1, 3, 5]
        SDR2 = SDR(10)
        SDR2.sparse = [2, 4, 6]
        random.seed(42)
        for _ in range(5000):
            c.learn(pattern=SDR1, classification=[0, 1])
            c.learn(pattern=SDR2, classification=[2, 3])

        result1 = c.infer(pattern=SDR1)
        self.assertAlmostEqual(result1[0], 0.5, places=1)
        self.assertAlmostEqual(result1[1], 0.5, places=1)

        result2 = c.infer(pattern=SDR2)
        self.assertAlmostEqual(result2[2], 0.5, places=1)
        self.assertAlmostEqual(result2[3], 0.5, places=1)

Example #2

0

Show file

def main(parameters=default_parameters, argv=None, verbose=True):

    # Load data.
    train_labels, train_images, test_labels, test_images = load_ds(
        'mnist_784', 10000, shape=[28, 28])  # HTM: ~95.6%
    #train_labels, train_images, test_labels, test_images = load_ds('Fashion-MNIST', 10000, shape=[28,28]) # HTM baseline: ~83%

    training_data = list(zip(train_images, train_labels))
    test_data = list(zip(test_images, test_labels))
    random.shuffle(training_data)

    # Setup the AI.
    enc = SDR(train_images[0].shape)
    sp = SpatialPooler(
        inputDimensions=enc.dimensions,
        columnDimensions=parameters['columnDimensions'],
        potentialRadius=parameters['potentialRadius'],
        potentialPct=parameters['potentialPct'],
        globalInhibition=True,
        localAreaDensity=parameters['localAreaDensity'],
        stimulusThreshold=int(round(parameters['stimulusThreshold'])),
        synPermInactiveDec=parameters['synPermInactiveDec'],
        synPermActiveInc=parameters['synPermActiveInc'],
        synPermConnected=parameters['synPermConnected'],
        minPctOverlapDutyCycle=parameters['minPctOverlapDutyCycle'],
        dutyCyclePeriod=int(round(parameters['dutyCyclePeriod'])),
        boostStrength=parameters['boostStrength'],
        seed=
        0,  # this is important, 0="random" seed which changes on each invocation
        spVerbosity=99,
        wrapAround=False)
    columns = SDR(sp.getColumnDimensions())
    columns_stats = Metrics(columns, 99999999)
    sdrc = Classifier()

    # Training Loop
    for i in range(len(train_images)):
        img, lbl = training_data[i]
        encode(img, enc)
        sp.compute(enc, True, columns)
        sdrc.learn(
            columns, lbl
        )  #TODO SDRClassifier could accept string as a label, currently must be int

    print(str(sp))
    print(str(columns_stats))

    # Testing Loop
    score = 0
    for img, lbl in test_data:
        encode(img, enc)
        sp.compute(enc, False, columns)
        if lbl == np.argmax(sdrc.infer(columns)):
            score += 1
    score = score / len(test_data)

    print('Score:', 100 * score, '%')
    return score

Example #3

0

Show file

    def testSingleValue(self):
        """Send same value 10 times and expect high likelihood for prediction."""
        classifier = Classifier(alpha=0.5)

        # Enough times to perform inference and learn associations
        inp = SDR(10)
        inp.randomize(.2)
        for recordNum in range(10):
            classifier.learn(inp, 2)

        retval = classifier.infer(inp)
        self.assertGreater(retval[2], 0.9)

Example #4

0

Show file

 def testSerialization4(self):
   # This test verifies that saveToFile() and loadFromFile with BINARY on Classifier are accessible from Python.
   inputData  = SDR( 1000 ).randomize( 0.02 )
   categories = { 'A': 0, 'B': 1, 'C': 2, 'D': 3 }
   c1 = Classifier()
   c1.learn(inputData, categories['B'] )
   file = "Classifier_test_save.BINARY"
   c1.saveToFile(file)
   
   c2 = Classifier()
   c2.loadFromFile(file)
   result2 = c2.infer( inputData )
   self.assertTrue(numpy.argmax( result2 )  ==  categories['B'])
   os.remove(file)

Example #5

0

Show file

    def testPredictionDistributionOverlap(self):
        """ Test the distribution of predictions with overlapping input SDRs

    Here, we intend the classifier to learn the associations:
      SDR1    => bucketIdx 0 (30%)
              => bucketIdx 1 (30%)
              => bucketIdx 2 (40%)

      SDR2    => bucketIdx 1 (50%)
              => bucketIdx 3 (50%)

    SDR1 and SDR2 has 10% overlaps (2 bits out of 20)
    The classifier should get the distribution almost right despite the overlap
    """
        c = Classifier(0.0005)

        # generate 2 SDRs with 2 shared bits
        SDR1 = SDR(100)
        SDR2 = SDR(100)
        SDR1.randomize(.20)
        SDR2.setSDR(SDR1)
        SDR2.addNoise(.9)

        random.seed(42)
        for _ in range(5000):
            randomNumber = random.random()
            if randomNumber < 0.3:
                bucketIdx = 0
            elif randomNumber < 0.6:
                bucketIdx = 1
            else:
                bucketIdx = 2
            c.learn(SDR1, bucketIdx)

            randomNumber = random.random()
            if randomNumber < 0.5:
                bucketIdx = 1
            else:
                bucketIdx = 3
            c.learn(SDR2, bucketIdx)

        result1 = c.infer(SDR1)
        self.assertAlmostEqual(result1[0], 0.3, places=1)
        self.assertAlmostEqual(result1[1], 0.3, places=1)
        self.assertAlmostEqual(result1[2], 0.4, places=1)

        result2 = c.infer(SDR2)
        self.assertAlmostEqual(result2[1], 0.5, places=1)
        self.assertAlmostEqual(result2[3], 0.5, places=1)

Example #6

0

Show file

    def testPredictionDistribution(self):
        """ Test the distribution of predictions.

    Here, we intend the classifier to learn the associations:
      [1,3,5] => bucketIdx 0 (30%)
              => bucketIdx 1 (30%)
              => bucketIdx 2 (40%)

      [2,4,6] => bucketIdx 1 (50%)
              => bucketIdx 3 (50%)

    The classifier should get the distribution almost right given enough
    repetitions and a small learning rate
    """

        c = Classifier(alpha=0.001)

        SDR1 = SDR(10)
        SDR1.sparse = [1, 3, 5]
        SDR2 = SDR(10)
        SDR2.sparse = [2, 4, 6]

        random.seed(42)
        for _ in range(5000):
            randomNumber = random.random()
            if randomNumber < 0.3:
                bucketIdx = 0
            elif randomNumber < 0.6:
                bucketIdx = 1
            else:
                bucketIdx = 2
            c.learn(pattern=SDR1, classification=bucketIdx)

            randomNumber = random.random()
            if randomNumber < 0.5:
                bucketIdx = 1
            else:
                bucketIdx = 3
            c.learn(pattern=SDR2, classification=bucketIdx)

        result1 = c.infer(pattern=SDR1)
        self.assertAlmostEqual(result1[0], 0.3, places=1)
        self.assertAlmostEqual(result1[1], 0.3, places=1)
        self.assertAlmostEqual(result1[2], 0.4, places=1)

        result2 = c.infer(pattern=SDR2)
        self.assertAlmostEqual(result2[1], 0.5, places=1)
        self.assertAlmostEqual(result2[3], 0.5, places=1)

Example #7

0

Show file

def trainNumDecoder(encoder, mini, maxi, noise):
    clsr = Classifier()
    # loop from smallest to largest number 10 times
    for y in range(10):
        for x in range(mini, maxi + 1):
            # encode current number
            encoded = encoder.encode(x)
            #corruptSDR(encoded, noise)
            # associate encoding to 'class'
            # but classes here are numbers.
            clsr.learn(encoded, x)
    # test every number to see if we learned it
    for x in range(mini, maxi + 1):
        encoded = encoder.encode(x)
        out = decode(clsr, encoded)
        if out != x:
            print("error in decode training:", out, "->", x)
    return clsr

Example #8

0

Show file

    def testExampleUsage(self):
        # Make a random SDR and associate it with a category.
        inputData = SDR(1000).randomize(0.02)
        categories = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
        clsr = Classifier()
        clsr.learn(inputData, categories['B'])
        assert (numpy.argmax(clsr.infer(inputData)) == categories['B'])

        # Estimate a scalar value.  The Classifier only accepts categories, so
        # put real valued inputs into bins (AKA buckets) by subtracting the
        # minimum value and dividing by a resolution.
        scalar = 567.8
        minimum = 500
        resolution = 10
        clsr.learn(inputData, int((scalar - minimum) / resolution))
        assert (numpy.argmax(clsr.infer(inputData)) * resolution +
                minimum == 560)

        # Predict 1 and 2 time steps into the future.

        # Make a sequence of 4 random SDRs, each SDR has 1000 bits and 2% sparsity.
        sequence = [SDR(1000).randomize(0.02) for i in range(4)]

        # Make category labels for the sequence.
        labels = [4, 5, 6, 7]

        # Make a Predictor and train it.
        pred = Predictor([1, 2])
        pred.learn(0, sequence[0], labels[0])
        pred.learn(1, sequence[1], labels[1])
        pred.learn(2, sequence[2], labels[2])
        pred.learn(3, sequence[3], labels[3])

        # Give the predictor partial information, and make predictions
        # about the future.
        pred.reset()
        A = pred.infer(0, sequence[0])
        assert (numpy.argmax(A[1]) == labels[1])
        assert (numpy.argmax(A[2]) == labels[2])

        B = pred.infer(1, sequence[1])
        assert (numpy.argmax(B[1]) == labels[2])
        assert (numpy.argmax(B[2]) == labels[3])

Example #9

0

Show file

    def testOverlapPattern(self):
        classifier = Classifier(alpha=10.0)
        inp = SDR(10)
        inp.randomize(.2)

        classifier.learn(pattern=inp, classification=9)
        classifier.learn(pattern=inp, classification=9)

        inp.addNoise(.5)
        retval = classifier.infer(pattern=inp)

        # Since overlap - should be previous with high likelihood
        self.assertGreater(retval[9], 0.9)

        classifier.learn(pattern=inp, classification=2)
        classifier.learn(pattern=inp, classification=2)
        # Second example: now new value should be more probable than old

        retval = classifier.infer(pattern=inp)
        self.assertGreater(retval[2], retval[9])

Example #10

0

Show file

  def testSerialization1(self):
    # This test verifies that pickle works for pickle of a Classifier
    SDR1 = SDR(15);  SDR1.sparse = [1, 5, 9]
    SDR2 = SDR(15);  SDR2.sparse = [0, 6, 9, 11]
    SDR3 = SDR(15);  SDR3.sparse = [6, 9]
    SDR4 = SDR(15);  SDR4.sparse = [1, 5, 9]
    c1 = Classifier()
    c1.learn(pattern=SDR1, classification=4)
    c1.learn(pattern=SDR2, classification=5)
    c1.learn(pattern=SDR3, classification=5)
    c1.learn(pattern=SDR4, classification=4)
    c1.learn(pattern=SDR4, classification=4)
    
    serialized = pickle.dumps(c1)
    c2 = pickle.loads(serialized)

    result1 = c1.infer(SDR1)
    result2 = c2.infer(SDR1)
    #print("  testSerialization1 result: %.6f, %.6f, %.6f, %.6f, %.6f, %.6f "%( result1[0], result1[1], result1[2], result1[3], result1[4], result1[5]));
    self.assertEqual(len(result1), 6)
    self.assertAlmostEqual(result1[0], 0.166344, places=5)
    self.assertAlmostEqual(result1[1], 0.166344, places=5)
    self.assertAlmostEqual(result1[2], 0.166344, places=5)
    self.assertAlmostEqual(result1[3], 0.166344, places=5)
    self.assertAlmostEqual(result1[4], 0.167847, places=5)
    self.assertAlmostEqual(result1[5], 0.166777, places=5)
    self.assertEqual(len(result1), len(result2))
    for i in range(len(result1)):
      self.assertAlmostEqual(result1[i], result2[i], places=5)

Example #11

0

Show file

  def testSerialization3(self):
    # This test verifies that saveToFile() and loadFromFile() on Classifier are accessable from Python.
    SDR1 = SDR(15);  SDR1.sparse = [1, 5, 9]
    SDR2 = SDR(15);  SDR2.sparse = [0, 6, 9, 11]
    SDR3 = SDR(15);  SDR3.sparse = [6, 9]
    SDR4 = SDR(15);  SDR4.sparse = [1, 5, 9]
    c1 = Classifier()
    c1.learn(pattern=SDR1, classification=4)
    c1.learn(pattern=SDR2, classification=5)
    c1.learn(pattern=SDR3, classification=5)
    c1.learn(pattern=SDR4, classification=4)

    # The Predictor now has some data in it, try serialization.
    file = "Classifier_test_save.XML"
    c1.saveToFile(file, "XML")
    c2 = Classifier()
    c2.loadFromFile(file, "XML")
    os.remove(file)

    result1 = c1.infer(SDR1)
    result2 = c2.infer(SDR1)
    self.assertEqual(len(result1), len(result2))
    for i in range(len(result1)):
      self.assertAlmostEqual(result1[i], result2[i], places=5)

Example #12

0

Show file

    def testPredictionDistributionContinuousLearning(self):
        """ Test continuous learning

    First, we intend the classifier to learn the associations:
      SDR1    => bucketIdx 0 (30%)
              => bucketIdx 1 (30%)
              => bucketIdx 2 (40%)

      SDR2    => bucketIdx 1 (50%)
              => bucketIdx 3 (50%)

    After 20000 iterations, we change the association to
      SDR1    => bucketIdx 0 (30%)
              => bucketIdx 1 (20%)
              => bucketIdx 3 (40%)

      No further training for SDR2

    The classifier should adapt continuously and learn new associations for
    SDR1, but at the same time remember the old association for SDR2
    """
        c = Classifier(0.001)
        SDR1 = SDR(10)
        SDR1.sparse = [1, 3, 5]
        SDR2 = SDR(10)
        SDR2.sparse = [2, 4, 6]

        random.seed(42)
        for _ in range(10000):
            randomNumber = random.random()
            if randomNumber < 0.3:
                bucketIdx = 0
            elif randomNumber < 0.6:
                bucketIdx = 1
            else:
                bucketIdx = 2
            c.learn(SDR1, bucketIdx)

            randomNumber = random.random()
            if randomNumber < 0.5:
                bucketIdx = 1
            else:
                bucketIdx = 3
            c.learn(SDR2, bucketIdx)

        result1 = c.infer(SDR1)
        self.assertAlmostEqual(result1[0], 0.3, places=1)
        self.assertAlmostEqual(result1[1], 0.3, places=1)
        self.assertAlmostEqual(result1[2], 0.4, places=1)

        result2 = c.infer(SDR2)
        self.assertAlmostEqual(result2[1], 0.5, places=1)
        self.assertAlmostEqual(result2[3], 0.5, places=1)

        for _ in range(20000):
            randomNumber = random.random()
            if randomNumber < 0.3:
                bucketIdx = 0
            elif randomNumber < 0.6:
                bucketIdx = 1
            else:
                bucketIdx = 3
            c.learn(SDR1, bucketIdx)

        result1new = c.infer(SDR1)
        self.assertAlmostEqual(result1new[0], 0.3, places=1)
        self.assertAlmostEqual(result1new[1], 0.3, places=1)
        self.assertAlmostEqual(result1new[3], 0.4, places=1)

        result2new = c.infer(SDR2)
        self.assertSequenceEqual(list(result2), list(result2new))

Example #13

0

Show file

def main(parameters=default_parameters, argv=None, verbose=True):
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir',
                        type=str,
                        default=os.path.join(os.path.dirname(__file__), '..',
                                             '..', '..', 'build', 'ThirdParty',
                                             'mnist_data', 'mnist-src'))
    args = parser.parse_args(args=argv)

    # Load data.
    train_labels, train_images, test_labels, test_images = load_mnist(
        args.data_dir)
    training_data = list(zip(train_images, train_labels))
    test_data = list(zip(test_images, test_labels))
    random.shuffle(training_data)
    random.shuffle(test_data)

    # Setup the AI.
    enc = SDR((train_images[0].shape))
    sp = SpatialPooler(
        inputDimensions=enc.dimensions,
        columnDimensions=parameters['columnDimensions'],
        potentialRadius=parameters['potentialRadius'],
        potentialPct=parameters['potentialPct'],
        globalInhibition=True,
        localAreaDensity=parameters['localAreaDensity'],
        stimulusThreshold=int(round(parameters['stimulusThreshold'])),
        synPermInactiveDec=parameters['synPermInactiveDec'],
        synPermActiveInc=parameters['synPermActiveInc'],
        synPermConnected=parameters['synPermConnected'],
        minPctOverlapDutyCycle=parameters['minPctOverlapDutyCycle'],
        dutyCyclePeriod=int(round(parameters['dutyCyclePeriod'])),
        boostStrength=parameters['boostStrength'],
        seed=0,
        spVerbosity=99,
        wrapAround=False)
    columns = SDR(sp.getColumnDimensions())
    columns_stats = Metrics(columns, 99999999)
    sdrc = Classifier()

    # Training Loop
    for i in range(len(train_images)):
        img, lbl = random.choice(training_data)
        enc.dense = img >= np.mean(img)  # Convert greyscale image to binary.
        sp.compute(enc, True, columns)
        sdrc.learn(columns, lbl)

    print(str(sp))
    print(str(columns_stats))

    # Testing Loop
    score = 0
    for img, lbl in test_data:
        enc.dense = img >= np.mean(img)  # Convert greyscale image to binary.
        sp.compute(enc, False, columns)
        if lbl == np.argmax(sdrc.infer(columns)):
            score += 1
    score = score / len(test_data)

    print('Score:', 100 * score, '%')
    return score