def __init__(self, verbosity=1, numLabels=3, modelDir="ClassificationModelFingerprint", fingerprintType=EncoderTypes.word, unionSparsity=20.0): super(ClassificationModelFingerprint, self).__init__(verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # Init kNN classifier and Cortical.io encoder; need valid API key (see # CioEncoder init for details). self.classifier = KNNClassifier(k=numLabels, distanceMethod='rawOverlap', exact=False, verbosity=verbosity - 1) if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invaid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") self.encoder = CioEncoder(cacheDir="./fluent/experiments/cioCache", fingerprintType=fingerprintType, unionSparsity=unionSparsity) self.n = self.encoder.n self.w = int((self.encoder.targetSparsity / 100) * self.n)
def __init__( self, n=100, w=20, verbosity=1, numLabels=3, modelDir="ClassificationModelKeywords", classifierMetric="rawOverlap", k=None, ): super(ClassificationModelKeywords, self).__init__(verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # Backward compatibility to support previous odd behavior if k == None: k = numLabels # We use the pctOverlapOfInput distance metric for this model so the # queryModel() output is consistent (i.e. 0.0-1.0). The KNN classifications # aren't affected b/c the raw overlap distance is still used under the hood. self.classifier = KNNClassifier(exact=True, distanceMethod=classifierMetric, k=k, verbosity=verbosity - 1) self.n = n self.w = w
def __init__(self, verbosity=1, numLabels=3, modelDir="ClassificationModelWindow", unionSparsity=0.20, retinaScaling=1.0, retina="en_associative", apiKey=None, classifierMetric="rawOverlap"): super(ClassificationModelWindows, self).__init__(verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # window patterns below minSparsity will be skipped over self.minSparsity = 0.9 * unionSparsity self.classifier = KNNClassifier(k=numLabels, distanceMethod=classifierMetric, exact=False, verbosity=verbosity - 1) # need valid API key (see CioEncoder init for details) root = os.path.dirname(os.path.realpath(__file__)) self.encoder = CioEncoder(retinaScaling=retinaScaling, cacheDir=os.path.join(root, "CioCache"), fingerprintType=EncoderTypes.word, unionSparsity=unionSparsity, retina=retina, apiKey=apiKey)
def __init__(self, tmOverrides=None, upOverrides=None, classifierOverrides=None, seed=42, consoleVerbosity=0): print "Initializing Temporal Memory..." params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS) params.update(tmOverrides or {}) params["seed"] = seed print "params: " print params self.tm = MonitoredFastExtendedTemporalMemory(mmName="TM", **params) print "Initializing Union Temporal Pooler..." start = time.time() params = dict(self.DEFAULT_UNION_POOLER_PARAMS) params.update(upOverrides or {}) params["inputDimensions"] = [self.tm.numberOfCells()] params["potentialRadius"] = self.tm.numberOfCells() params["seed"] = seed self.up = MonitoredUnionTemporalPooler(mmName="UP", **params) elapsed = int(time.time() - start) print "Total time: {0:2} seconds.".format(elapsed) print "Initializing KNN Classifier..." params = dict(self.DEFAULT_CLASSIFIER_PARAMS) params.update(classifierOverrides or {}) self.classifier = KNNClassifier(**params)
def __init__(self, netInfo, options, baseline, logNames): """ Instantiate data structures for training the input and output classifiers Parameters: ------------------------------------------------------------ netInfo: trained network info options: object containing all "command-line" options for post-processsing baseline: dictionary of information from the corresponding baseline test set if any, or None logNames: Names of the available log files """ # Save arguments self.netInfo = netInfo self.options = options # Create KNNs self.inputCl = KNNClassifier(k=1, distanceNorm=1.0, distThreshold=0.0, useSparseMemory = True) self.outputCl = KNNClassifier(k=1, distanceNorm=1.0, distThreshold=0.0, useSparseMemory = True) # Init vars self.uniqueInput = [] self.category = [] self.numSamples = 0
def __init__(self, netInfo, options, baseline, logNames): """ Instantiate data structures for training the input and output classifiers Parameters: ------------------------------------------------------------ netInfo: trained network info options: object containing all "command-line" options for post-processsing baseline: dictionary of information from the corresponding baseline test set if any, or None logNames: Names of the available log files """ # Save arguments self.netInfo = netInfo self.options = options # Create KNNs self.inputCl = KNNClassifier(k=1, distanceNorm=1.0, distThreshold=0.0, useSparseMemory=True) self.outputCl = KNNClassifier(k=1, distanceNorm=1.0, distThreshold=0.0, useSparseMemory=True) # Init vars self.uniqueInput = [] self.category = [] self.numSamples = 0
def __init__(self, netInfo, options, baseline, logNames): """ Instantiate data structures for training the input and output classifiers Parameters: ------------------------------------------------------------ netInfo: trained network info options: object containing all "command-line" options for post-processsing baseline: dictionary of information from the corresponding baseline test set if any, or None logNames: Names of the available log files """ # Save net info self.netInfo = netInfo self.options = options self.trainedClassificationStats = baseline['classificationStats'] # Init member vars self.numSamples = 0 self.classificationSamples = 0 self.classificationErrs = 0 # We use this classifier to detect which input samples are unique. This # way we don't get classification accuracy skewed by multiple instances # of the same input. self.inputCl = KNNClassifier(k=1, distanceNorm=1.0, distThreshold=0.0, useSparseMemory=True) # This array is used to recored the Temporal Pooler fitness score for each # sample. self.tpFitnessScores = []
def __init__(self, fingerprintType=EncoderTypes.word, unionSparsity=0.20, retinaScaling=1.0, retina="en_associative", apiKey=None, k=1, classifierMetric="rawOverlap", cacheRoot=None, **kwargs): super(ClassificationModelFingerprint, self).__init__(**kwargs) self.classifier = KNNClassifier(k=k, distanceMethod=classifierMetric, exact=False, verbosity=self.verbosity - 1) # Need a valid API key for the Cortical.io encoder (see CioEncoder # constructor for details). if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invalid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") self.encoder = CioEncoder(retinaScaling=retinaScaling, fingerprintType=fingerprintType, unionSparsity=unionSparsity, retina=retina, apiKey=apiKey, cacheDir=cacheRoot) self.currentDocument = None
def __init__(self, n=100, w=20, verbosity=1, numLabels=3): super(ClassificationModelKeywords, self).__init__(n, w, verbosity, numLabels) self.classifier = KNNClassifier(exact=True, distanceMethod='rawOverlap', k=numLabels, verbosity=verbosity-1)
def testOverlapDistanceMethodBadSparsity(self): """Sparsity (input dimensionality) less than input array""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) # Learn with incorrect dimensionality, less than some bits (23, 29) with self.assertRaises(RuntimeError): classifier.learn(a, 0, isSparse=20)
def testGetPartitionIdWithNoIdsAtFirst(self): """ Tests that we can correctly retrieve partition Id even if the first few vectors do not have Ids """ params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) c = np.array([1, 2, 3, 14, 16, 19, 22, 24, 33], dtype=np.int32) d = np.array([2, 4, 8, 12, 14, 19, 22, 24, 33], dtype=np.int32) denseA = np.zeros(dimensionality) denseA[a] = 1.0 denseD = np.zeros(dimensionality) denseD[d] = 1.0 classifier.learn(a, 0, isSparse=dimensionality, partitionId=None) classifier.learn(b, 1, isSparse=dimensionality, partitionId=None) classifier.learn(c, 2, isSparse=dimensionality, partitionId=211) classifier.learn(d, 1, isSparse=dimensionality, partitionId=405) cat, _, _, _ = classifier.infer(denseA, partitionId=405) self.assertEquals(cat, 0) cat, _, _, _ = classifier.infer(denseD, partitionId=405) self.assertEquals(cat, 2) cat, _, _, _ = classifier.infer(denseD) self.assertEquals(cat, 1)
def __init__(self, verbosity=1): super(ClassificationModelRandomSDR, self).__init__(verbosity) # Init kNN classifier: # specify 'distanceMethod'='rawOverlap' for overlap; Euclidean is std. # verbosity=1 for debugging # standard k is 1 self.classifier = KNNClassifier(exact=True, verbosity=verbosity - 1) # SDR dimensions: self.n = 100 self.w = 20
def testPartitionId(self): """Tests that paritionId properly excludes training data points""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) denseA = np.zeros(dimensionality) denseA[a] = 1.0 denseB = np.zeros(dimensionality) denseB[b] = 1.0 classifier.learn(a, 0, isSparse=dimensionality, partitionId=0) classifier.learn(b, 1, isSparse=dimensionality, partitionId=1) cat, _, _, _ = classifier.infer(denseA, partitionId=1) self.assertEquals(cat, 0) cat, _, _, _ = classifier.infer(denseA, partitionId=0) self.assertEquals(cat, 1) cat, _, _, _ = classifier.infer(denseB, partitionId=0) self.assertEquals(cat, 1) cat, _, _, _ = classifier.infer(denseB, partitionId=1) self.assertEquals(cat, 0)
def testOverlapDistanceMethodEmptyArray(self): """Tests case where pattern has no ON bits""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([], dtype=np.int32) numPatterns = classifier.learn(a, 0, isSparse=dimensionality) self.assertEquals(numPatterns, 1) denseA = np.zeros(dimensionality) denseA[a] = 1.0 cat, _, _, _ = classifier.infer(denseA) self.assertEquals(cat, 0)
def testExtractVectorsFromKNN(self): vectors = numpy.random.rand(10, 25) < 0.1 # Populate KNN knn = KNNClassifier() for i in xrange(vectors.shape[0]): knn.learn(vectors[i], 0) # Extract vectors from KNN sparseDataMatrix = HierarchicalClustering._extractVectorsFromKNN(knn) self.assertEqual( sorted(sparseDataMatrix.todense().tolist()), sorted(vectors.tolist()) )
def __init__(self, n=100, w=20, verbosity=1, numLabels=3, modelDir="ClassificationModelKeywords", classifierMetric="rawOverlap", k=None, ): super(ClassificationModelKeywords, self).__init__( verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # Backward compatibility to support previous odd behavior if k == None: k = numLabels # We use the pctOverlapOfInput distance metric for this model so the # queryModel() output is consistent (i.e. 0.0-1.0). The KNN classifications # aren't affected b/c the raw overlap distance is still used under the hood. self.classifier = KNNClassifier(exact=True, distanceMethod=classifierMetric, k=k, verbosity=verbosity-1) self.n = n self.w = w
def __init__(self, netInfo, options, baseline, logNames): """ Instantiate data structures for training the input and output classifiers Parameters: ------------------------------------------------------------ netInfo: trained network info options: object containing all "command-line" options for post-processsing baseline: dictionary of information from the corresponding baseline test set if any, or None logNames: Names of the available log files """ # Save net info self.netInfo = netInfo self.options = options self.trainedClassificationStats = baseline['classificationStats'] # Init member vars self.numSamples = 0 self.classificationSamples = 0 self.classificationErrs = 0 # We use this classifier to detect which input samples are unique. This # way we don't get classification accuracy skewed by multiple instances # of the same input. self.inputCl = KNNClassifier(k=1, distanceNorm=1.0, distThreshold=0.0, useSparseMemory = True) # This array is used to recored the Temporal Pooler fitness score for each # sample. self.tpFitnessScores = []
def __init__(self, verbosity=1, numLabels=3, modelDir="ClassificationModelWindow", unionSparsity=0.20, retinaScaling=1.0, retina="en_associative", apiKey=None, classifierMetric="rawOverlap", cacheRoot=None): super(ClassificationModelWindows, self).__init__( verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # window patterns below minSparsity will be skipped over self.minSparsity = 0.9 * unionSparsity self.classifier = KNNClassifier(k=numLabels, distanceMethod=classifierMetric, exact=False, verbosity=verbosity-1) # need valid API key (see CioEncoder init for details) cacheRoot = cacheRoot or os.path.dirname(os.path.realpath(__file__)) self.encoder = CioEncoder(retinaScaling=retinaScaling, cacheDir=os.path.join(cacheRoot, "CioCache"), fingerprintType=EncoderTypes.word, unionSparsity=unionSparsity, retina=retina, apiKey=apiKey)
def __init__(self, verbosity=1, numLabels=3, modelDir="ClassificationModelFingerprint", fingerprintType=EncoderTypes.word, unionSparsity=0.20, retinaScaling=1.0, retina="en_associative", apiKey=None, classifierMetric="rawOverlap", cacheRoot=None): super(ClassificationModelFingerprint, self).__init__( verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # Init kNN classifier and Cortical.io encoder; need valid API key (see # CioEncoder init for details). self.classifier = KNNClassifier(k=numLabels, distanceMethod=classifierMetric, exact=False, verbosity=verbosity-1) if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invaid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") cacheRoot = cacheRoot or os.path.dirname(os.path.realpath(__file__)) self.encoder = CioEncoder(retinaScaling=retinaScaling, cacheDir=os.path.join(cacheRoot, "CioCache"), fingerprintType=fingerprintType, unionSparsity=unionSparsity, retina=retina, apiKey=apiKey)
def __init__(self, fingerprintType=EncoderTypes.word, unionSparsity=0.20, retinaScaling=1.0, retina="en_associative", apiKey=None, k=1, classifierMetric="rawOverlap", cacheRoot=None, **kwargs): super(ClassificationModelFingerprint, self).__init__(**kwargs) self.classifier = KNNClassifier(k=k, distanceMethod=classifierMetric, exact=False, verbosity=self.verbosity-1) # Need a valid API key for the Cortical.io encoder (see CioEncoder # constructor for details). if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invalid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") self.encoder = CioEncoder(retinaScaling=retinaScaling, fingerprintType=fingerprintType, unionSparsity=unionSparsity, retina=retina, apiKey=apiKey, cacheDir=cacheRoot) self.currentDocument = None
def __init__(self, verbosity=1, numLabels=3, modelDir="ClassificationModelFingerprint", fingerprintType=EncoderTypes.word, unionSparsity=20.0): super(ClassificationModelFingerprint, self).__init__( verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # Init kNN classifier and Cortical.io encoder; need valid API key (see # CioEncoder init for details). self.classifier = KNNClassifier(k=numLabels, distanceMethod='rawOverlap', exact=False, verbosity=verbosity-1) if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invaid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") self.encoder = CioEncoder(cacheDir="./fluent/experiments/cioCache", fingerprintType=fingerprintType, unionSparsity=unionSparsity) self.n = self.encoder.n self.w = int((self.encoder.targetSparsity/100)*self.n)
def testMinSparsity(self): """Tests overlap distance with min sparsity""" # Require sparsity >= 20% params = {"distanceMethod": "rawOverlap", "minSparsity": 0.2} classifier = KNNClassifier(**params) dimensionality = 30 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 21, 28], dtype=np.int32) # This has 20% sparsity and should be inserted c = np.array([2, 3, 8, 11, 14, 18], dtype=np.int32) # This has 17% sparsity and should NOT be inserted d = np.array([2, 3, 8, 11, 18], dtype=np.int32) numPatterns = classifier.learn(a, 0, isSparse=dimensionality) self.assertEquals(numPatterns, 1) numPatterns = classifier.learn(b, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 2) numPatterns = classifier.learn(c, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 3) numPatterns = classifier.learn(d, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 3) # Test that inference ignores low sparsity vectors but not others e = np.array([2, 4, 5, 6, 8, 12, 14, 18, 20], dtype=np.int32) dense = np.zeros(dimensionality) dense[e] = 1.0 cat, inference, _, _ = classifier.infer(dense) self.assertIsNotNone(cat) self.assertGreater(inference.sum(), 0.0) # This has 20% sparsity and should be used for inference f = np.array([2, 5, 8, 11, 14, 18], dtype=np.int32) dense = np.zeros(dimensionality) dense[f] = 1.0 cat, inference, _, _ = classifier.infer(dense) self.assertIsNotNone(cat) self.assertGreater(inference.sum(), 0.0) # This has 17% sparsity and should return null inference results g = np.array([2, 3, 8, 11, 19], dtype=np.int32) dense = np.zeros(dimensionality) dense[g] = 1.0 cat, inference, _, _ = classifier.infer(dense) self.assertIsNone(cat) self.assertEqual(inference.sum(), 0.0)
def __init__(self, n=100, w=20, verbosity=1, classifierMetric="rawOverlap", k=1, **kwargs): super(ClassificationModelKeywords, self).__init__(**kwargs) self.classifier = KNNClassifier(exact=True, distanceMethod=classifierMetric, k=k, verbosity=verbosity - 1) self.n = n self.w = w
def __init__(self, netInfo, options, baseline, logNames): """ Instantiate data structures for calculating the knn regression stats on a given data set. Parameters: ------------------------------------------------------------ netInfo: trained network info options: object containing all "command-line" options for post-processsing baseline: dictionary of information from the corresponding baseline test set if any, or None logNames: Names of the available log files """ # ----------------------------------------------------------------------- # Get info about the network self.netInfo = netInfo self.options = options self.verbosity = self.options['verbosity'] #self.verbosity = 3 # Uncomment for verbosity only in this module self.computeMode, self.regressionField = \ self._getComputeModeAndRegressionField(options['knnRegression']) # If testing, make sure we have a trained classifier state if self.computeMode == 'test': self.training = False global gTrainedKNNClassifierState if gTrainedKNNClassifierState is None: print "\nWARNING: You are using the option 'knnRegression=test,FIELD', "\ " but haven't trained the classifier using "\ " 'knnRegression=train,FIELD', "\ " No regression will be performed." else: if gTrainedKNNClassifierState[ 'regressionField'] != self.regressionField: print "\nWARNING: You are using different regression fields for testing,"\ " and training. No regression will be performed." self.trainedState = gTrainedKNNClassifierState else: self.training = True self.trainedState = dict() self.trainedState['classifier'] = KNNClassifier( k=1, distanceNorm=1.0, distThreshold=0.0, useSparseMemory=True) self.trainedState['regressionField'] = self.regressionField self.trainedState['categoryMap'] = [] # ----------------------------------------------------------------------- # Init variables self.sourceFieldNames = self.netInfo['encoder'].getScalarNames() self.numFields = len(self.sourceFieldNames) self.regressionFieldIdx = self.sourceFieldNames.index( self.regressionField) # Accumuated closeness scores self.numSamples = 0 self.sourceClosenessSum = 0.0 self.absSourceClosenessSum = 0.0 self.rmseSourceClosenessSum = 0.0
def simulateCategories(numSamples=100, numDimensions=500): """Simulate running KNN classifier on many disjoint categories""" failures = "" LOGGER.info( "Testing the sparse KNN Classifier on many disjoint categories") knn = KNNClassifier(k=1, distanceNorm=1.0, useSparseMemory=True) for i in range(0, numSamples): # select category randomly and generate vector c = 2 * numpy.random.randint(0, 50) + 50 v = createPattern(c, numDimensions) knn.learn(v, c) # Go through each category and ensure we have at least one from each! for i in range(0, 50): c = 2 * i + 50 v = createPattern(c, numDimensions) knn.learn(v, c) errors = 0 for i in range(0, numSamples): # select category randomly and generate vector c = 2 * numpy.random.randint(0, 50) + 50 v = createPattern(c, numDimensions) inferCat, _kir, _kd, _kcd = knn.infer(v) if inferCat != c: LOGGER.info("Mistake with %s %s %s %s %s", v[v.nonzero()], \ "mapped to category", inferCat, "instead of category", c) LOGGER.info(" %s", v.nonzero()) errors += 1 if errors != 0: failures += "Failure in handling non-consecutive category indices\n" # Test closest methods errors = 0 for i in range(0, 10): # select category randomly and generate vector c = 2 * numpy.random.randint(0, 50) + 50 v = createPattern(c, numDimensions) p = knn.closestTrainingPattern(v, c) if not (c in p.nonzero()[0]): LOGGER.info("Mistake %s %s", p.nonzero(), v.nonzero()) LOGGER.info("%s %s", p[p.nonzero()], v[v.nonzero()]) errors += 1 if errors != 0: failures += "Failure in closestTrainingPattern method\n" return failures, knn
def __init__(self, verbosity=1): super(ClassificationModelFingerprint, self).__init__(verbosity) # Init kNN classifier and Cortical.io encoder; need valid API key (see # CioEncoder init for details). self.classifier = KNNClassifier(k=1, exact=False, verbosity=verbosity-1) self.encoder = CioEncoder(cacheDir="./experiments/cache") self.n = self.encoder.n self.w = int((self.encoder.targetSparsity/100)*self.n)
def testOverlapDistanceMethodInconsistentDimensionality(self): """Inconsistent sparsity (input dimensionality)""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) # Learn with incorrect dimensionality, greater than largest ON bit, but # inconsistent when inferring numPatterns = classifier.learn(a, 0, isSparse=31) self.assertEquals(numPatterns, 1) denseA = np.zeros(dimensionality) denseA[a] = 1.0 cat, _, _, _ = classifier.infer(denseA) self.assertEquals(cat, 0)
def testMinSparsity(self): """Tests overlap distance with min sparsity""" # Require sparsity >= 20% params = {"distanceMethod": "rawOverlap", "minSparsity": 0.2} classifier = KNNClassifier(**params) dimensionality = 30 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 21, 28], dtype=np.int32) # This has 20% sparsity and should be inserted c = np.array([2, 3, 8, 11, 14, 18], dtype=np.int32) # This has 17% sparsity and should NOT be inserted d = np.array([2, 3, 8, 11, 18], dtype=np.int32) numPatterns = classifier.learn(a, 0, isSparse=dimensionality) self.assertEquals(numPatterns, 1) numPatterns = classifier.learn(b, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 2) numPatterns = classifier.learn(c, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 3) numPatterns = classifier.learn(d, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 3) # Test that inference ignores low sparsity vectors but not others e = np.array([2, 4, 5, 6, 8, 12, 14, 18, 20], dtype=np.int32) dense= np.zeros(dimensionality) dense[e] = 1.0 cat, inference, _, _ = classifier.infer(dense) self.assertIsNotNone(cat) self.assertGreater(inference.sum(),0.0) # This has 20% sparsity and should be used for inference f = np.array([2, 5, 8, 11, 14, 18], dtype=np.int32) dense= np.zeros(dimensionality) dense[f] = 1.0 cat, inference, _, _ = classifier.infer(dense) self.assertIsNotNone(cat) self.assertGreater(inference.sum(),0.0) # This has 17% sparsity and should return null inference results g = np.array([2, 3, 8, 11, 19], dtype=np.int32) dense= np.zeros(dimensionality) dense[g] = 1.0 cat, inference, _, _ = classifier.infer(dense) self.assertIsNone(cat) self.assertEqual(inference.sum(),0.0)
def __init__(self, verbosity=1): super(ClassificationModelRandomSDR, self).__init__(verbosity) # Init kNN classifier: # specify 'distanceMethod'='rawOverlap' for overlap; Euclidean is std. # verbosity=1 for debugging # standard k is 1 self.classifier = KNNClassifier(exact=True, verbosity=verbosity-1) # SDR dimensions: self.n = 100 self.w = 20
def simulateCategories(numSamples=100, numDimensions=500): """Simulate running KNN classifier on many disjoint categories""" failures = "" LOGGER.info("Testing the sparse KNN Classifier on many disjoint categories") knn = KNNClassifier(k=1, distanceNorm=1.0, useSparseMemory=True) for i in range(0, numSamples): # select category randomly and generate vector c = 2*numpy.random.randint(0, 50) + 50 v = createPattern(c, numDimensions) knn.learn(v, c) # Go through each category and ensure we have at least one from each! for i in range(0, 50): c = 2*i+50 v = createPattern(c, numDimensions) knn.learn(v, c) errors = 0 for i in range(0, numSamples): # select category randomly and generate vector c = 2*numpy.random.randint(0, 50) + 50 v = createPattern(c, numDimensions) inferCat, _kir, _kd, _kcd = knn.infer(v) if inferCat != c: LOGGER.info("Mistake with %s %s %s %s %s", v[v.nonzero()], \ "mapped to category", inferCat, "instead of category", c) LOGGER.info(" %s", v.nonzero()) errors += 1 if errors != 0: failures += "Failure in handling non-consecutive category indices\n" # Test closest methods errors = 0 for i in range(0, 10): # select category randomly and generate vector c = 2*numpy.random.randint(0, 50) + 50 v = createPattern(c, numDimensions) p = knn.closestTrainingPattern(v, c) if not (c in p.nonzero()[0]): LOGGER.info("Mistake %s %s", p.nonzero(), v.nonzero()) LOGGER.info("%s %s", p[p.nonzero()], v[v.nonzero()]) errors += 1 if errors != 0: failures += "Failure in closestTrainingPattern method\n" return failures, knn
def __init__(self, n=100, w=20, verbosity=1, numLabels=3, modelDir="ClassificationModelKeywords"): super(ClassificationModelKeywords, self).__init__(verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # We use the pctOverlapOfInput distance metric for this model so the # queryModel() output is consistent (i.e. 0.0-1.0). The KNN classifications # aren't affected b/c the raw overlap distance is still used under the hood. self.classifier = KNNClassifier( exact=True, distanceMethod="pctOverlapOfInput", k=numLabels, verbosity=verbosity - 1 ) self.n = n self.w = w
def __init__(self, n=100, w=20, verbosity=1, numLabels=3, modelDir="ClassificationModelKeywords"): super(ClassificationModelKeywords, self).__init__( n, w, verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) self.classifier = KNNClassifier(exact=True, distanceMethod='rawOverlap', k=numLabels, verbosity=verbosity-1)
def __init__(self, verbosity=1, numLabels=3, modelDir="ClassificationModelFingerprint", fingerprintType=EncoderTypes.word, unionSparsity=0.20, retinaScaling=1.0, retina="en_associative", apiKey=None, classifierMetric="rawOverlap", cacheRoot=None): super(ClassificationModelFingerprint, self).__init__(verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # Init kNN classifier and Cortical.io encoder; need valid API key (see # CioEncoder init for details). self.classifier = KNNClassifier(k=numLabels, distanceMethod=classifierMetric, exact=False, verbosity=verbosity - 1) if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invaid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") cacheRoot = cacheRoot or os.path.dirname(os.path.realpath(__file__)) self.encoder = CioEncoder(retinaScaling=retinaScaling, cacheDir=os.path.join(cacheRoot, "CioCache"), fingerprintType=fingerprintType, unionSparsity=unionSparsity, retina=retina, apiKey=apiKey)
def __init__(self, tmOverrides=None, upOverrides=None, classifierOverrides=None, seed=42, consoleVerbosity=0): print "Initializing Temporal Memory..." params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS) params.update(tmOverrides or {}) params["seed"] = seed self.tm = MonitoredFastGeneralTemporalMemory(mmName="TM", **params) print "Initializing Union Pooler..." params = dict(self.DEFAULT_UNION_POOLER_PARAMS) params.update(upOverrides or {}) params["inputDimensions"] = [self.tm.numberOfCells()] params["potentialRadius"] = self.tm.numberOfCells() params["seed"] = seed self.up = MonitoredUnionPooler(mmName="UP", **params) print "Initializing KNN Classifier..." params = dict(self.DEFAULT_CLASSIFIER_PARAMS) params.update(classifierOverrides or {}) self.classifier = KNNClassifier(**params)
def __init__(self, n=100, w=20, verbosity=1, classifierMetric="rawOverlap", k=1, **kwargs ): super(ClassificationModelKeywords, self).__init__(**kwargs) self.classifier = KNNClassifier(exact=True, distanceMethod=classifierMetric, k=k, verbosity=verbosity-1) self.n = n self.w = w
def testOverlapDistanceMethodStandardUnsorted(self): """If sparse representation indices are unsorted expect error.""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([29, 3, 7, 11, 13, 17, 19, 23, 1], dtype=np.int32) b = np.array([2, 4, 20, 12, 14, 18, 8, 28, 30], dtype=np.int32) with self.assertRaises(RuntimeError): classifier.learn(a, 0, isSparse=dimensionality) with self.assertRaises(RuntimeError): classifier.learn(b, 1, isSparse=dimensionality)
def testOverlapDistanceMethod_ClassifySparse(self): params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) classifier.learn(a, 0, isSparse=dimensionality) classifier.learn(b, 1, isSparse=dimensionality) # TODO Test case where infer is passed a sparse representation after # infer() has been extended to handle sparse and dense cat, _, _, _ = classifier.infer(a) self.assertEquals(cat, 0) cat, _, _, _ = classifier.infer(b) self.assertEquals(cat, 1)
def testOverlapDistanceMethodStandard(self): """Tests standard learning case for raw overlap""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) numPatterns = classifier.learn(a, 0, isSparse=dimensionality) self.assertEquals(numPatterns, 1) numPatterns = classifier.learn(b, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 2) denseA = np.zeros(dimensionality) denseA[a] = 1.0 cat, _, _, _ = classifier.infer(denseA) self.assertEquals(cat, 0) denseB = np.zeros(dimensionality) denseB[b] = 1.0 cat, _, _, _ = classifier.infer(denseB) self.assertEquals(cat, 1)
def testSparsifyVector(self): classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0) inputPattern = np.array([0, 1, 3, 7, 11], dtype=np.int32) # Each of the 4 tests correspond with the each decisional branch in the # sparsifyVector method # # tests: if not self.relativeThreshold: outputPattern = classifier._sparsifyVector(inputPattern, doWinners=True) self.assertTrue(np.array_equal(np.array([0, 1, 3, 7, 11], dtype=np.int32), outputPattern)) # tests: elif self.sparseThreshold > 0: classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0, relativeThreshold=True, sparseThreshold=.2) outputPattern = classifier._sparsifyVector(inputPattern, doWinners=True) self.assertTrue(np.array_equal(np.array([0, 0, 3, 7, 11], dtype=np.int32), outputPattern)) # tests: if doWinners: classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0, relativeThreshold=True, sparseThreshold=.2, numWinners=2) outputPattern = classifier._sparsifyVector(inputPattern, doWinners=True) self.assertTrue(np.array_equal(np.array([0, 0, 0, 0, 0], dtype=np.int32), outputPattern)) # tests: Do binarization classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0, relativeThreshold=True, sparseThreshold=.2, doBinarization=True) outputPattern = classifier._sparsifyVector(inputPattern, doWinners=True) self.assertTrue(np.array_equal(np.array( [0., 0., 1., 1., 1.], dtype=np.float32), outputPattern))
class UnionTemporalPoolerExperiment(object): """ This class defines a Temporal Memory-Union Temporal Pooler network and provides methods to run the network on data sequences. """ DEFAULT_TEMPORAL_MEMORY_PARAMS = { "columnCount": 1024, "cellsPerColumn": 8, "activationThreshold": 20, "initialPermanence": 0.5, "connectedPermanence": 0.6, "minThreshold": 20, "sampleSize": 30, "permanenceIncrement": 0.10, "permanenceDecrement": 0.02, "seed": 42, "learnOnOneCell": False } DEFAULT_UNION_POOLER_PARAMS = { # Spatial Pooler Params # inputDimensions set to TM cell count # potentialRadius set to TM cell count "columnDimensions": [1024], "numActiveColumnsPerInhArea": 20, "stimulusThreshold": 0, "synPermInactiveDec": 0.01, "synPermActiveInc": 0.1, "synPermConnected": 0.1, "potentialPct": 0.5, "globalInhibition": True, "localAreaDensity": -1, "minPctOverlapDutyCycle": 0.001, "dutyCyclePeriod": 1000, "boostStrength": 10.0, "seed": 42, "spVerbosity": 0, "wrapAround": True, # Union Temporal Pooler Params "activeOverlapWeight": 1.0, "predictedActiveOverlapWeight": 10.0, "maxUnionActivity": 0.20, "exciteFunctionType": 'Fixed', "decayFunctionType": 'NoDecay' } DEFAULT_CLASSIFIER_PARAMS = { "k": 1, "distanceMethod": "rawOverlap", "distThreshold": 0 } def __init__(self, tmOverrides=None, upOverrides=None, classifierOverrides=None, seed=42, consoleVerbosity=0): print "Initializing Temporal Memory..." params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS) params.update(tmOverrides or {}) params["seed"] = seed print "params: " print params self.tm = MonitoredFastExtendedTemporalMemory(mmName="TM", **params) print "Initializing Union Temporal Pooler..." start = time.time() params = dict(self.DEFAULT_UNION_POOLER_PARAMS) params.update(upOverrides or {}) params["inputDimensions"] = [self.tm.numberOfCells()] params["potentialRadius"] = self.tm.numberOfCells() params["seed"] = seed self.up = MonitoredUnionTemporalPooler(mmName="UP", **params) elapsed = int(time.time() - start) print "Total time: {0:2} seconds.".format(elapsed) print "Initializing KNN Classifier..." params = dict(self.DEFAULT_CLASSIFIER_PARAMS) params.update(classifierOverrides or {}) self.classifier = KNNClassifier(**params) def runNetworkOnSequences(self, inputSequences, inputCategories, tmLearn=True, upLearn=None, classifierLearn=False, verbosity=0, progressInterval=None): """ Runs Union Temporal Pooler network on specified sequence. @param inputSequences One or more sequences of input patterns. Each should be terminated with None. @param inputCategories A sequence of category representations for each element in inputSequences Each should be terminated with None. @param tmLearn: (bool) Temporal Memory learning mode @param upLearn: (None, bool) Union Temporal Pooler learning mode. If None, Union Temporal Pooler will not be run. @param classifierLearn: (bool) Classifier learning mode @param progressInterval: (int) Interval of console progress updates in terms of timesteps. """ currentTime = time.time() for i in xrange(len(inputSequences)): sensorPattern = inputSequences[i] inputCategory = inputCategories[i] self.runNetworkOnPattern(sensorPattern, tmLearn=tmLearn, upLearn=upLearn, sequenceLabel=inputCategory) if classifierLearn and sensorPattern is not None: unionSDR = self.up.getUnionSDR() upCellCount = self.up.getColumnDimensions() self.classifier.learn(unionSDR, inputCategory, isSparse=upCellCount) if verbosity > 1: pprint.pprint("{0} is category {1}".format( unionSDR, inputCategory)) if progressInterval is not None and i > 0 and i % progressInterval == 0: elapsed = (time.time() - currentTime) / 60.0 print( "Ran {0} / {1} elements of sequence in " "{2:0.2f} minutes.".format(i, len(inputSequences), elapsed)) currentTime = time.time() print MonitorMixinBase.mmPrettyPrintMetrics( self.tm.mmGetDefaultMetrics()) if verbosity >= 2: traces = self.tm.mmGetDefaultTraces(verbosity=verbosity) print MonitorMixinBase.mmPrettyPrintTraces( traces, breakOnResets=self.tm.mmGetTraceResets()) if upLearn is not None: traces = self.up.mmGetDefaultTraces(verbosity=verbosity) print MonitorMixinBase.mmPrettyPrintTraces( traces, breakOnResets=self.up.mmGetTraceResets()) print def runNetworkOnPattern(self, sensorPattern, tmLearn=True, upLearn=None, sequenceLabel=None): if sensorPattern is None: self.tm.reset() self.up.reset() else: self.tm.compute(sensorPattern, learn=tmLearn, sequenceLabel=sequenceLabel) if upLearn is not None: activeCells, predActiveCells, burstingCols, = self.getUnionTemporalPoolerInput( ) self.up.compute(activeCells, predActiveCells, learn=upLearn, sequenceLabel=sequenceLabel) def getUnionTemporalPoolerInput(self): """ Gets the Union Temporal Pooler input from the Temporal Memory """ activeCells = numpy.zeros(self.tm.numberOfCells()).astype(realDType) activeCells[list(self.tm.activeCellsIndices())] = 1 predictedActiveCells = numpy.zeros( self.tm.numberOfCells()).astype(realDType) predictedActiveCells[list(self.tm.predictedActiveCellsIndices())] = 1 burstingColumns = numpy.zeros( self.tm.numberOfColumns()).astype(realDType) burstingColumns[list(self.tm.unpredictedActiveColumns)] = 1 return activeCells, predictedActiveCells, burstingColumns def getBurstingColumnsStats(self): """ Gets statistics on the Temporal Memory's bursting columns. Used as a metric of Temporal Memory's learning performance. :return: mean, standard deviation, and max of Temporal Memory's bursting columns over time """ traceData = self.tm.mmGetTraceUnpredictedActiveColumns().data resetData = self.tm.mmGetTraceResets().data countTrace = [] for x in xrange(len(traceData)): if not resetData[x]: countTrace.append(len(traceData[x])) mean = numpy.mean(countTrace) stdDev = numpy.std(countTrace) maximum = max(countTrace) return mean, stdDev, maximum
def testPartitionIdExcluded(self): """ Tests that paritionId properly excludes training data points during inference """ params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) denseA = np.zeros(dimensionality) denseA[a] = 1.0 denseB = np.zeros(dimensionality) denseB[b] = 1.0 classifier.learn(a, 0, isSparse=dimensionality, partitionId=0) classifier.learn(b, 1, isSparse=dimensionality, partitionId=1) cat, _, _, _ = classifier.infer(denseA, partitionId=1) self.assertEquals(cat, 0) cat, _, _, _ = classifier.infer(denseA, partitionId=0) self.assertEquals(cat, 1) cat, _, _, _ = classifier.infer(denseB, partitionId=0) self.assertEquals(cat, 1) cat, _, _, _ = classifier.infer(denseB, partitionId=1) self.assertEquals(cat, 0) # Ensure it works even if you invoke learning again. To make it a bit more # complex this time we insert A again but now with Id=2 classifier.learn(a, 0, isSparse=dimensionality, partitionId=2) # Even though first A should be ignored, the second instance of A should # not be ignored. cat, _, _, _ = classifier.infer(denseA, partitionId=0) self.assertEquals(cat, 0)
def simulateKMoreThanOne(): """A small test with k=3""" failures = "" LOGGER.info("Testing the sparse KNN Classifier with k=3") knn = KNNClassifier(k=3) v = numpy.zeros((6, 2)) v[0] = [1.0, 0.0] v[1] = [1.0, 0.2] v[2] = [1.0, 0.2] v[3] = [1.0, 2.0] v[4] = [1.0, 4.0] v[5] = [1.0, 4.5] knn.learn(v[0], 0) knn.learn(v[1], 0) knn.learn(v[2], 0) knn.learn(v[3], 1) knn.learn(v[4], 1) knn.learn(v[5], 1) winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[0]) if winner != 0: failures += "Inference failed with k=3\n" winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[2]) if winner != 0: failures += "Inference failed with k=3\n" winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[3]) if winner != 0: failures += "Inference failed with k=3\n" winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[5]) if winner != 1: failures += "Inference failed with k=3\n" if len(failures) == 0: LOGGER.info("Tests passed.") return failures
def runTestPCAKNN(self, short = 0): LOGGER.info('\nTesting PCA/k-NN classifier') LOGGER.info('Mode=%s', short) numDims = 10 numClasses = 10 k = 10 numPatternsPerClass = 100 numPatterns = int(.9 * numClasses * numPatternsPerClass) numTests = numClasses * numPatternsPerClass - numPatterns numSVDSamples = int(.1 * numPatterns) keep = 1 train_data, train_class, test_data, test_class = \ pca_knn_data.generate(numDims, numClasses, k, numPatternsPerClass, numPatterns, numTests, numSVDSamples, keep) pca_knn = KNNClassifier(k=k,numSVDSamples=numSVDSamples, numSVDDims=keep) knn = KNNClassifier(k=k) LOGGER.info('Training PCA k-NN') for i in range(numPatterns): knn.learn(train_data[i], train_class[i]) pca_knn.learn(train_data[i], train_class[i]) LOGGER.info('Testing PCA k-NN') numWinnerFailures = 0 numInferenceFailures = 0 numDistFailures = 0 numAbsErrors = 0 for i in range(numTests): winner, inference, dist, categoryDist = knn.infer(test_data[i]) pca_winner, pca_inference, pca_dist, pca_categoryDist \ = pca_knn.infer(test_data[i]) if winner != test_class[i]: numAbsErrors += 1 if pca_winner != winner: numWinnerFailures += 1 if (numpy.abs(pca_inference - inference) > 1e-4).any(): numInferenceFailures += 1 if (numpy.abs(pca_dist - dist) > 1e-4).any(): numDistFailures += 1 s0 = 100*float(numTests - numAbsErrors) / float(numTests) s1 = 100*float(numTests - numWinnerFailures) / float(numTests) s2 = 100*float(numTests - numInferenceFailures) / float(numTests) s3 = 100*float(numTests - numDistFailures) / float(numTests) LOGGER.info('PCA/k-NN success rate=%s%s', s0, '%') LOGGER.info('Winner success=%s%s', s1, '%') LOGGER.info('Inference success=%s%s', s2, '%') LOGGER.info('Distance success=%s%s', s3, '%') self.assertEqual(s1, 100.0, "PCA/k-NN test failed")
def testGetPartitionId(self): """ Test a sequence of calls to KNN to ensure we can retrieve partition Id: - We first learn on some patterns (including one pattern with no partitionId in the middle) and test that we can retrieve Ids. - We then invoke inference and then check partitionId again. - We check incorrect indices to ensure we get an exception. - We check the case where the partitionId to be ignored is not in the list. - We learn on one more pattern and check partitionIds again - We remove rows and ensure partitionIds still work """ params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) c = np.array([1, 2, 3, 14, 16, 19, 22, 24, 33], dtype=np.int32) d = np.array([2, 4, 8, 12, 14, 19, 22, 24, 33], dtype=np.int32) e = np.array([1, 3, 7, 12, 14, 19, 22, 24, 33], dtype=np.int32) denseA = np.zeros(dimensionality) denseA[a] = 1.0 classifier.learn(a, 0, isSparse=dimensionality, partitionId=433) classifier.learn(b, 1, isSparse=dimensionality, partitionId=213) classifier.learn(c, 1, isSparse=dimensionality, partitionId=None) classifier.learn(d, 1, isSparse=dimensionality, partitionId=433) self.assertEquals(classifier.getPartitionId(0), 433) self.assertEquals(classifier.getPartitionId(1), 213) self.assertEquals(classifier.getPartitionId(2), None) self.assertEquals(classifier.getPartitionId(3), 433) cat, _, _, _ = classifier.infer(denseA, partitionId=213) self.assertEquals(cat, 0) # Test with patternId not in classifier cat, _, _, _ = classifier.infer(denseA, partitionId=666) self.assertEquals(cat, 0) # Partition Ids should be maintained after inference self.assertEquals(classifier.getPartitionId(0), 433) self.assertEquals(classifier.getPartitionId(1), 213) self.assertEquals(classifier.getPartitionId(2), None) self.assertEquals(classifier.getPartitionId(3), 433) # Should return exceptions if we go out of bounds with self.assertRaises(RuntimeError): classifier.getPartitionId(4) with self.assertRaises(RuntimeError): classifier.getPartitionId(-1) # Learn again classifier.learn(e, 4, isSparse=dimensionality, partitionId=413) self.assertEquals(classifier.getPartitionId(4), 413) # Test getPatternIndicesWithPartitionId self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(433), [0, 3]) self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(666), []) self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(413), [4]) self.assertEquals(classifier.getNumPartitionIds(), 3) # Check that the full set of partition ids is what we expect self.assertItemsEqual(classifier.getPartitionIdPerPattern(), [433, 213, np.inf, 433, 413]) self.assertItemsEqual(classifier.getPartitionIdList(),[433, 413, 213]) # Remove two rows - all indices shift down self.assertEquals(classifier._removeRows([0,2]), 2) self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(433), [1]) self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(413), [2]) # Remove another row and check number of partitions have decreased classifier._removeRows([0]) self.assertEquals(classifier.getNumPartitionIds(), 2) # Check that the full set of partition ids is what we expect self.assertItemsEqual(classifier.getPartitionIdPerPattern(), [433, 413]) self.assertItemsEqual(classifier.getPartitionIdList(),[433, 413])
def testDistanceMetrics(self): classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0) dimensionality = 40 protoA = np.array([0, 1, 3, 7, 11], dtype=np.int32) protoB = np.array([20, 28, 30], dtype=np.int32) classifier.learn(protoA, 0, isSparse=dimensionality) classifier.learn(protoB, 0, isSparse=dimensionality) # input is an arbitrary point, close to protoA, orthogonal to protoB input = np.zeros(dimensionality) input[:4] = 1.0 # input0 is used to test that the distance from a point to itself is 0 input0 = np.zeros(dimensionality) input0[protoA] = 1.0 # Test l2 norm metric _, _, dist, _ = classifier.infer(input) l2Distances = [0.65465367, 1.0] for actual, predicted in zip(l2Distances, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="l2 distance norm is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="l2 norm did not calculate 0 distance as expected.") # Test l1 norm metric classifier.distanceNorm = 1.0 _, _, dist, _ = classifier.infer(input) l1Distances = [0.42857143, 1.0] for actual, predicted in zip(l1Distances, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="l1 distance norm is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="l1 norm did not calculate 0 distance as expected.") # Test raw overlap metric classifier.distanceMethod = "rawOverlap" _, _, dist, _ = classifier.infer(input) rawOverlaps = [1, 4] for actual, predicted in zip(rawOverlaps, dist): self.assertEqual( actual, predicted, msg="Raw overlap is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="Raw overlap did not calculate 0 distance as expected.") # Test pctOverlapOfInput metric classifier.distanceMethod = "pctOverlapOfInput" _, _, dist, _ = classifier.infer(input) pctOverlaps = [0.25, 1.0] for actual, predicted in zip(pctOverlaps, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="pctOverlapOfInput is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="pctOverlapOfInput did not calculate 0 distance as expected.") # Test pctOverlapOfProto metric classifier.distanceMethod = "pctOverlapOfProto" _, _, dist, _ = classifier.infer(input) pctOverlaps = [0.40, 1.0] for actual, predicted in zip(pctOverlaps, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="pctOverlapOfProto is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="pctOverlapOfProto did not calculate 0 distance as expected.") # Test pctOverlapOfLarger metric classifier.distanceMethod = "pctOverlapOfLarger" _, _, dist, _ = classifier.infer(input) pctOverlaps = [0.40, 1.0] for actual, predicted in zip(pctOverlaps, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="pctOverlapOfLarger is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="pctOverlapOfLarger did not calculate 0 distance as expected.")
class ClassificationModelFingerprint(ClassificationModel): """ Class to run the survey response classification task with Coritcal.io fingerprint encodings. From the experiment runner, the methods expect to be fed one sample at a time. """ def __init__(self, verbosity=1, numLabels=3, fingerprintType=EncoderTypes.document): super(ClassificationModelFingerprint, self).__init__(verbosity, numLabels) # Init kNN classifier and Cortical.io encoder; need valid API key (see # CioEncoder init for details). self.classifier = KNNClassifier(k=numLabels, distanceMethod='rawOverlap', exact=False, verbosity=verbosity - 1) if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invaid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") self.encoder = CioEncoder(cacheDir="./fluent/experiments/cioCache", fingerprintType=fingerprintType) self.n = self.encoder.n self.w = int((self.encoder.targetSparsity / 100) * self.n) def encodePattern(self, sample): """ Encode an SDR of the input string by querying the Cortical.io API. If the client returns None, we create a random SDR with the model's dimensions n and w. @param sample (list) Tokenized sample, where each item is a string token. @return fp (dict) The sample text, sparsity, and bitmap. Example return dict: { "text": "Example text", "sparsity": 0.03, "bitmap": numpy.array([]) } """ sample = " ".join(sample) fpInfo = self.encoder.encode(sample) if fpInfo: fp = { "text": fpInfo["text"] if "text" in fpInfo else fpInfo["term"], "sparsity": fpInfo["sparsity"], "bitmap": numpy.array(fpInfo["fingerprint"]["positions"]) } else: fp = { "text": sample, "sparsity": float(self.w) / self.n, "bitmap": self.encodeRandomly(sample) } return fp def resetModel(self): """Reset the model by clearing the classifier.""" self.classifier.clear() def trainModel(self, samples, labels): """ Train the classifier on the input sample and labels. @param samples (list) List of dictionaries containing the sample text, sparsity, and bitmap. @param labels (list) List of numpy arrays containing the reference indices for the classifications of each sample. """ for sample, sample_labels in zip(samples, labels): if sample["bitmap"].any(): for label in sample_labels: self.classifier.learn(sample["bitmap"], label, isSparse=self.n) def testModel(self, sample, numLabels=3): """ Test the kNN classifier on the input sample. Returns the classification most frequent amongst the classifications of the sample's individual tokens. We ignore the terms that are unclassified, picking the most frequent classification among those that are detected. @param sample (dict) The sample text, sparsity, and bitmap. @param numLabels (int) Number of predicted classifications. @return (numpy array) The numLabels most-frequent classifications for the data samples; values are int or empty. """ (_, inferenceResult, _, _) = self.classifier.infer(self._densifyPattern(sample["bitmap"])) return self.getWinningLabels(inferenceResult, numLabels)