def testOverlapDistanceMethodBadSparsity(self): """Sparsity (input dimensionality) less than input array""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) # Learn with incorrect dimensionality, less than some bits (23, 29) with self.assertRaises(RuntimeError): classifier.learn(a, 0, isSparse=20)
def simulateCategories(numSamples=100, numDimensions=500): """Simulate running KNN classifier on many disjoint categories""" failures = "" LOGGER.info("Testing the sparse KNN Classifier on many disjoint categories") knn = KNNClassifier(k=1, distanceNorm=1.0, useSparseMemory=True) for i in range(0, numSamples): # select category randomly and generate vector c = 2*numpy.random.randint(0, 50) + 50 v = createPattern(c, numDimensions) knn.learn(v, c) # Go through each category and ensure we have at least one from each! for i in range(0, 50): c = 2*i+50 v = createPattern(c, numDimensions) knn.learn(v, c) errors = 0 for i in range(0, numSamples): # select category randomly and generate vector c = 2*numpy.random.randint(0, 50) + 50 v = createPattern(c, numDimensions) inferCat, _kir, _kd, _kcd = knn.infer(v) if inferCat != c: LOGGER.info("Mistake with %s %s %s %s %s", v[v.nonzero()], \ "mapped to category", inferCat, "instead of category", c) LOGGER.info(" %s", v.nonzero()) errors += 1 if errors != 0: failures += "Failure in handling non-consecutive category indices\n" # Test closest methods errors = 0 for i in range(0, 10): # select category randomly and generate vector c = 2*numpy.random.randint(0, 50) + 50 v = createPattern(c, numDimensions) p = knn.closestTrainingPattern(v, c) if not (c in p.nonzero()[0]): LOGGER.info("Mistake %s %s", p.nonzero(), v.nonzero()) LOGGER.info("%s %s", p[p.nonzero()], v[v.nonzero()]) errors += 1 if errors != 0: failures += "Failure in closestTrainingPattern method\n" return failures, knn
def __init__(self, verbosity=1): super(ClassificationModelRandomSDR, self).__init__(verbosity) # Init kNN classifier: # specify 'distanceMethod'='rawOverlap' for overlap; Euclidean is std. # verbosity=1 for debugging # standard k is 1 self.classifier = KNNClassifier(exact=True, verbosity=verbosity - 1) # SDR dimensions: self.n = 100 self.w = 20
def testMinSparsity(self): """Tests overlap distance with min sparsity""" # Require sparsity >= 20% params = {"distanceMethod": "rawOverlap", "minSparsity": 0.2} classifier = KNNClassifier(**params) dimensionality = 30 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 21, 28], dtype=np.int32) # This has 20% sparsity and should be inserted c = np.array([2, 3, 8, 11, 14, 18], dtype=np.int32) # This has 17% sparsity and should NOT be inserted d = np.array([2, 3, 8, 11, 18], dtype=np.int32) numPatterns = classifier.learn(a, 0, isSparse=dimensionality) self.assertEquals(numPatterns, 1) numPatterns = classifier.learn(b, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 2) numPatterns = classifier.learn(c, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 3) numPatterns = classifier.learn(d, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 3) # Test that inference ignores low sparsity vectors but not others e = np.array([2, 4, 5, 6, 8, 12, 14, 18, 20], dtype=np.int32) dense = np.zeros(dimensionality) dense[e] = 1.0 cat, inference, _, _ = classifier.infer(dense) self.assertIsNotNone(cat) self.assertGreater(inference.sum(), 0.0) # This has 20% sparsity and should be used for inference f = np.array([2, 5, 8, 11, 14, 18], dtype=np.int32) dense = np.zeros(dimensionality) dense[f] = 1.0 cat, inference, _, _ = classifier.infer(dense) self.assertIsNotNone(cat) self.assertGreater(inference.sum(), 0.0) # This has 17% sparsity and should return null inference results g = np.array([2, 3, 8, 11, 19], dtype=np.int32) dense = np.zeros(dimensionality) dense[g] = 1.0 cat, inference, _, _ = classifier.infer(dense) self.assertIsNone(cat) self.assertEqual(inference.sum(), 0.0)
def testOverlapDistanceMethodStandardUnsorted(self): """If sparse representation indices are unsorted expect error.""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([29, 3, 7, 11, 13, 17, 19, 23, 1], dtype=np.int32) b = np.array([2, 4, 20, 12, 14, 18, 8, 28, 30], dtype=np.int32) with self.assertRaises(RuntimeError): classifier.learn(a, 0, isSparse=dimensionality) with self.assertRaises(RuntimeError): classifier.learn(b, 1, isSparse=dimensionality)
def testExtractVectorsFromKNN(self): vectors = numpy.random.rand(10, 25) < 0.1 # Populate KNN knn = KNNClassifier() for i in xrange(vectors.shape[0]): knn.learn(vectors[i], 0) # Extract vectors from KNN sparseDataMatrix = HierarchicalClustering._extractVectorsFromKNN(knn) self.assertEqual( sorted(sparseDataMatrix.todense().tolist()), sorted(vectors.tolist()) )
def testOverlapDistanceMethodEmptyArray(self): """Tests case where pattern has no ON bits""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([], dtype=np.int32) numPatterns = classifier.learn(a, 0, isSparse=dimensionality) self.assertEquals(numPatterns, 1) denseA = np.zeros(dimensionality) denseA[a] = 1.0 cat, _, _, _ = classifier.infer(denseA) self.assertEquals(cat, 0)
def __init__(self, n=100, w=20, verbosity=1, classifierMetric="rawOverlap", k=1, **kwargs): super(ClassificationModelKeywords, self).__init__(**kwargs) self.classifier = KNNClassifier(exact=True, distanceMethod=classifierMetric, k=k, verbosity=verbosity - 1) self.n = n self.w = w
def __init__(self, n=100, w=20, verbosity=1, numLabels=3, modelDir="ClassificationModelKeywords"): super(ClassificationModelKeywords, self).__init__(n, w, verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) self.classifier = KNNClassifier(exact=True, distanceMethod="rawOverlap", k=numLabels, verbosity=verbosity - 1)
def testOverlapDistanceMethodInconsistentDimensionality(self): """Inconsistent sparsity (input dimensionality)""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) # Learn with incorrect dimensionality, greater than largest ON bit, but # inconsistent when inferring numPatterns = classifier.learn(a, 0, isSparse=31) self.assertEquals(numPatterns, 1) denseA = np.zeros(dimensionality) denseA[a] = 1.0 cat, _, _, _ = classifier.infer(denseA) self.assertEquals(cat, 0)
def testOverlapDistanceMethod_ClassifySparse(self): params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) classifier.learn(a, 0, isSparse=dimensionality) classifier.learn(b, 1, isSparse=dimensionality) # TODO Test case where infer is passed a sparse representation after # infer() has been extended to handle sparse and dense cat, _, _, _ = classifier.infer(a) self.assertEquals(cat, 0) cat, _, _, _ = classifier.infer(b) self.assertEquals(cat, 1)
def simulateKMoreThanOne(): """A small test with k=3""" failures = "" LOGGER.info("Testing the sparse KNN Classifier with k=3") knn = KNNClassifier(k=3) v = numpy.zeros((6, 2)) v[0] = [1.0, 0.0] v[1] = [1.0, 0.2] v[2] = [1.0, 0.2] v[3] = [1.0, 2.0] v[4] = [1.0, 4.0] v[5] = [1.0, 4.5] knn.learn(v[0], 0) knn.learn(v[1], 0) knn.learn(v[2], 0) knn.learn(v[3], 1) knn.learn(v[4], 1) knn.learn(v[5], 1) winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[0]) if winner != 0: failures += "Inference failed with k=3\n" winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[2]) if winner != 0: failures += "Inference failed with k=3\n" winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[3]) if winner != 0: failures += "Inference failed with k=3\n" winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[5]) if winner != 1: failures += "Inference failed with k=3\n" if len(failures) == 0: LOGGER.info("Tests passed.") return failures
def testPartitionIdExcluded(self): """ Tests that paritionId properly excludes training data points during inference """ params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) denseA = np.zeros(dimensionality) denseA[a] = 1.0 denseB = np.zeros(dimensionality) denseB[b] = 1.0 classifier.learn(a, 0, isSparse=dimensionality, partitionId=0) classifier.learn(b, 1, isSparse=dimensionality, partitionId=1) cat, _, _, _ = classifier.infer(denseA, partitionId=1) self.assertEquals(cat, 0) cat, _, _, _ = classifier.infer(denseA, partitionId=0) self.assertEquals(cat, 1) cat, _, _, _ = classifier.infer(denseB, partitionId=0) self.assertEquals(cat, 1) cat, _, _, _ = classifier.infer(denseB, partitionId=1) self.assertEquals(cat, 0) # Ensure it works even if you invoke learning again. To make it a bit more # complex this time we insert A again but now with Id=2 classifier.learn(a, 0, isSparse=dimensionality, partitionId=2) # Even though first A should be ignored, the second instance of A should # not be ignored. cat, _, _, _ = classifier.infer(denseA, partitionId=0) self.assertEquals(cat, 0)
def __init__(self, verbosity=1, numLabels=3, fingerprintType=EncoderTypes.document): super(ClassificationModelFingerprint, self).__init__(verbosity, numLabels) # Init kNN classifier and Cortical.io encoder; need valid API key (see # CioEncoder init for details). self.classifier = KNNClassifier(k=numLabels, distanceMethod='rawOverlap', exact=False, verbosity=verbosity - 1) if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invaid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") self.encoder = CioEncoder(cacheDir="./fluent/experiments/cioCache", fingerprintType=fingerprintType) self.n = self.encoder.n self.w = int((self.encoder.targetSparsity / 100) * self.n)
def __init__(self, tmOverrides=None, upOverrides=None, classifierOverrides=None, seed=42, consoleVerbosity=0): print "Initializing Temporal Memory..." params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS) params.update(tmOverrides or {}) params["seed"] = seed self.tm = MonitoredFastExtendedTemporalMemory(mmName="TM", **params) print "Initializing Union Temporal Pooler..." start = time.time() params = dict(self.DEFAULT_UNION_POOLER_PARAMS) params.update(upOverrides or {}) params["inputDimensions"] = [self.tm.numberOfCells()] params["potentialRadius"] = self.tm.numberOfCells() params["seed"] = seed self.up = MonitoredUnionTemporalPooler(mmName="UP", **params) elapsed = int(time.time() - start) print "Total time: {0:2} seconds.".format(elapsed) print "Initializing KNN Classifier..." params = dict(self.DEFAULT_CLASSIFIER_PARAMS) params.update(classifierOverrides or {}) self.classifier = KNNClassifier(**params)
def __init__(self, verbosity=1, numLabels=3, modelDir="ClassificationModelFingerprint", fingerprintType=EncoderTypes.word, unionSparsity=0.20, retinaScaling=1.0, retina="en_associative", apiKey=None, classifierMetric="rawOverlap", cacheRoot=None): super(ClassificationModelFingerprint, self).__init__(verbosity=verbosity, numLabels=numLabels, modelDir=modelDir) # Init kNN classifier and Cortical.io encoder; need valid API key (see # CioEncoder init for details). self.classifier = KNNClassifier(k=numLabels, distanceMethod=classifierMetric, exact=False, verbosity=verbosity - 1) if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word): raise ValueError("Invaid type of fingerprint encoding; see the " "EncoderTypes class for eligble types.") cacheRoot = cacheRoot or os.path.dirname(os.path.realpath(__file__)) self.encoder = CioEncoder(retinaScaling=retinaScaling, cacheDir=os.path.join(cacheRoot, "CioCache"), fingerprintType=fingerprintType, unionSparsity=unionSparsity, retina=retina, apiKey=apiKey)
def testOverlapDistanceMethodStandard(self): """Tests standard learning case for raw overlap""" params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) numPatterns = classifier.learn(a, 0, isSparse=dimensionality) self.assertEquals(numPatterns, 1) numPatterns = classifier.learn(b, 1, isSparse=dimensionality) self.assertEquals(numPatterns, 2) denseA = np.zeros(dimensionality) denseA[a] = 1.0 cat, _, _, _ = classifier.infer(denseA) self.assertEquals(cat, 0) denseB = np.zeros(dimensionality) denseB[b] = 1.0 cat, _, _, _ = classifier.infer(denseB) self.assertEquals(cat, 1)
def __init__(self, tmOverrides=None, upOverrides=None, classifierOverrides=None, seed=42, consoleVerbosity=0): print "Initializing Temporal Memory..." params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS) params.update(tmOverrides or {}) params["seed"] = seed self.tm = MonitoredFastGeneralTemporalMemory(mmName="TM", **params) print "Initializing Union Pooler..." params = dict(self.DEFAULT_UNION_POOLER_PARAMS) params.update(upOverrides or {}) params["inputDimensions"] = [self.tm.numberOfCells()] params["potentialRadius"] = self.tm.numberOfCells() params["seed"] = seed self.up = MonitoredUnionPooler(mmName="UP", **params) print "Initializing KNN Classifier..." params = dict(self.DEFAULT_CLASSIFIER_PARAMS) params.update(classifierOverrides or {}) self.classifier = KNNClassifier(**params)
def runTestKNNClassifier(self, short=0): """ Test the KNN classifier in this module. short can be: 0 (short), 1 (medium), or 2 (long) """ failures = "" if short != 2: numpy.random.seed(42) else: seed_value = int(time.time()) # seed_value = 1276437656 #seed_value = 1277136651 numpy.random.seed(seed_value) LOGGER.info('Seed used: %d', seed_value) f = open('seedval', 'a') f.write(str(seed_value)) f.write('\n') f.close() failures += simulateKMoreThanOne() LOGGER.info("\nTesting KNN Classifier on dense patterns") numPatterns, numClasses = getNumTestPatterns(short) patterns = numpy.random.rand(numPatterns, 100) patternDict = dict() # Assume there are no repeated patterns -- if there are, then # numpy.random would be completely broken. for i in xrange(numPatterns): randCategory = numpy.random.randint(0, numClasses - 1) patternDict[i] = dict() patternDict[i]['pattern'] = patterns[i] patternDict[i]['category'] = randCategory LOGGER.info("\nTesting KNN Classifier with L2 norm") knn = KNNClassifier(k=1) failures += simulateClassifier(knn, patternDict, \ "KNN Classifier with L2 norm test") LOGGER.info("\nTesting KNN Classifier with L1 norm") knnL1 = KNNClassifier(k=1, distanceNorm=1.0) failures += simulateClassifier(knnL1, patternDict, \ "KNN Classifier with L1 norm test") numPatterns, numClasses = getNumTestPatterns(short) patterns = (numpy.random.rand(numPatterns, 25) > 0.7).astype(RealNumpyDType) patternDict = dict() for i in patterns: iString = str(i.tolist()) if not patternDict.has_key(iString): randCategory = numpy.random.randint(0, numClasses - 1) patternDict[iString] = dict() patternDict[iString]['pattern'] = i patternDict[iString]['category'] = randCategory LOGGER.info("\nTesting KNN on sparse patterns") knnDense = KNNClassifier(k=1) failures += simulateClassifier(knnDense, patternDict, \ "KNN Classifier on sparse pattern test") self.assertEqual(len(failures), 0, "Tests failed: \n" + failures) if short == 2: f = open('seedval', 'a') f.write('Pass\n') f.close()
def testGetPartitionId(self): """ Test a sequence of calls to KNN to ensure we can retrieve partition Id: - We first learn on some patterns (including one pattern with no partitionId in the middle) and test that we can retrieve Ids. - We then invoke inference and then check partitionId again. - We check incorrect indices to ensure we get an exception. - We check the case where the partitionId to be ignored is not in the list. - We learn on one more pattern and check partitionIds again - We remove rows and ensure partitionIds still work """ params = {"distanceMethod": "rawOverlap"} classifier = KNNClassifier(**params) dimensionality = 40 a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32) b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32) c = np.array([1, 2, 3, 14, 16, 19, 22, 24, 33], dtype=np.int32) d = np.array([2, 4, 8, 12, 14, 19, 22, 24, 33], dtype=np.int32) e = np.array([1, 3, 7, 12, 14, 19, 22, 24, 33], dtype=np.int32) denseA = np.zeros(dimensionality) denseA[a] = 1.0 classifier.learn(a, 0, isSparse=dimensionality, partitionId=433) classifier.learn(b, 1, isSparse=dimensionality, partitionId=213) classifier.learn(c, 1, isSparse=dimensionality, partitionId=None) classifier.learn(d, 1, isSparse=dimensionality, partitionId=433) self.assertEquals(classifier.getPartitionId(0), 433) self.assertEquals(classifier.getPartitionId(1), 213) self.assertEquals(classifier.getPartitionId(2), None) self.assertEquals(classifier.getPartitionId(3), 433) cat, _, _, _ = classifier.infer(denseA, partitionId=213) self.assertEquals(cat, 0) # Test with patternId not in classifier cat, _, _, _ = classifier.infer(denseA, partitionId=666) self.assertEquals(cat, 0) # Partition Ids should be maintained after inference self.assertEquals(classifier.getPartitionId(0), 433) self.assertEquals(classifier.getPartitionId(1), 213) self.assertEquals(classifier.getPartitionId(2), None) self.assertEquals(classifier.getPartitionId(3), 433) # Should return exceptions if we go out of bounds with self.assertRaises(RuntimeError): classifier.getPartitionId(4) with self.assertRaises(RuntimeError): classifier.getPartitionId(-1) # Learn again classifier.learn(e, 4, isSparse=dimensionality, partitionId=413) self.assertEquals(classifier.getPartitionId(4), 413) # Test getPatternIndicesWithPartitionId self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(433), [0, 3]) self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(666), []) self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(413), [4]) self.assertEquals(classifier.getNumPartitionIds(), 3) # Check that the full set of partition ids is what we expect self.assertItemsEqual(classifier.getPartitionIdPerPattern(), [433, 213, np.inf, 433, 413]) self.assertItemsEqual(classifier.getPartitionIdList(),[433, 413, 213]) # Remove two rows - all indices shift down self.assertEquals(classifier._removeRows([0,2]), 2) self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(433), [1]) self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(413), [2]) # Remove another row and check number of partitions have decreased classifier._removeRows([0]) self.assertEquals(classifier.getNumPartitionIds(), 2) # Check that the full set of partition ids is what we expect self.assertItemsEqual(classifier.getPartitionIdPerPattern(), [433, 413]) self.assertItemsEqual(classifier.getPartitionIdList(),[433, 413])
def runTestPCAKNN(self, short = 0): LOGGER.info('\nTesting PCA/k-NN classifier') LOGGER.info('Mode=%s', short) numDims = 10 numClasses = 10 k = 10 numPatternsPerClass = 100 numPatterns = int(.9 * numClasses * numPatternsPerClass) numTests = numClasses * numPatternsPerClass - numPatterns numSVDSamples = int(.1 * numPatterns) keep = 1 train_data, train_class, test_data, test_class = \ pca_knn_data.generate(numDims, numClasses, k, numPatternsPerClass, numPatterns, numTests, numSVDSamples, keep) pca_knn = KNNClassifier(k=k,numSVDSamples=numSVDSamples, numSVDDims=keep) knn = KNNClassifier(k=k) LOGGER.info('Training PCA k-NN') for i in range(numPatterns): knn.learn(train_data[i], train_class[i]) pca_knn.learn(train_data[i], train_class[i]) LOGGER.info('Testing PCA k-NN') numWinnerFailures = 0 numInferenceFailures = 0 numDistFailures = 0 numAbsErrors = 0 for i in range(numTests): winner, inference, dist, categoryDist = knn.infer(test_data[i]) pca_winner, pca_inference, pca_dist, pca_categoryDist \ = pca_knn.infer(test_data[i]) if winner != test_class[i]: numAbsErrors += 1 if pca_winner != winner: numWinnerFailures += 1 if (numpy.abs(pca_inference - inference) > 1e-4).any(): numInferenceFailures += 1 if (numpy.abs(pca_dist - dist) > 1e-4).any(): numDistFailures += 1 s0 = 100*float(numTests - numAbsErrors) / float(numTests) s1 = 100*float(numTests - numWinnerFailures) / float(numTests) s2 = 100*float(numTests - numInferenceFailures) / float(numTests) s3 = 100*float(numTests - numDistFailures) / float(numTests) LOGGER.info('PCA/k-NN success rate=%s%s', s0, '%') LOGGER.info('Winner success=%s%s', s1, '%') LOGGER.info('Inference success=%s%s', s2, '%') LOGGER.info('Distance success=%s%s', s3, '%') self.assertEqual(s1, 100.0, "PCA/k-NN test failed")
def testDistanceMetrics(self): classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0) dimensionality = 40 protoA = np.array([0, 1, 3, 7, 11], dtype=np.int32) protoB = np.array([20, 28, 30], dtype=np.int32) classifier.learn(protoA, 0, isSparse=dimensionality) classifier.learn(protoB, 0, isSparse=dimensionality) # input is an arbitrary point, close to protoA, orthogonal to protoB input = np.zeros(dimensionality) input[:4] = 1.0 # input0 is used to test that the distance from a point to itself is 0 input0 = np.zeros(dimensionality) input0[protoA] = 1.0 # Test l2 norm metric _, _, dist, _ = classifier.infer(input) l2Distances = [0.65465367, 1.0] for actual, predicted in zip(l2Distances, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="l2 distance norm is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="l2 norm did not calculate 0 distance as expected.") # Test l1 norm metric classifier.distanceNorm = 1.0 _, _, dist, _ = classifier.infer(input) l1Distances = [0.42857143, 1.0] for actual, predicted in zip(l1Distances, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="l1 distance norm is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="l1 norm did not calculate 0 distance as expected.") # Test raw overlap metric classifier.distanceMethod = "rawOverlap" _, _, dist, _ = classifier.infer(input) rawOverlaps = [1, 4] for actual, predicted in zip(rawOverlaps, dist): self.assertEqual( actual, predicted, msg="Raw overlap is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="Raw overlap did not calculate 0 distance as expected.") # Test pctOverlapOfInput metric classifier.distanceMethod = "pctOverlapOfInput" _, _, dist, _ = classifier.infer(input) pctOverlaps = [0.25, 1.0] for actual, predicted in zip(pctOverlaps, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="pctOverlapOfInput is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="pctOverlapOfInput did not calculate 0 distance as expected.") # Test pctOverlapOfProto metric classifier.distanceMethod = "pctOverlapOfProto" _, _, dist, _ = classifier.infer(input) pctOverlaps = [0.40, 1.0] for actual, predicted in zip(pctOverlaps, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="pctOverlapOfProto is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="pctOverlapOfProto did not calculate 0 distance as expected.") # Test pctOverlapOfLarger metric classifier.distanceMethod = "pctOverlapOfLarger" _, _, dist, _ = classifier.infer(input) pctOverlaps = [0.40, 1.0] for actual, predicted in zip(pctOverlaps, dist): self.assertAlmostEqual( actual, predicted, places=5, msg="pctOverlapOfLarger is not calculated as expected.") _, _, dist0, _ = classifier.infer(input0) self.assertEqual( 0.0, dist0[0], msg="pctOverlapOfLarger did not calculate 0 distance as expected.")
def runTestKNNClassifier(self, short = 0): """ Test the KNN classifier in this module. short can be: 0 (short), 1 (medium), or 2 (long) """ failures = "" if short != 2: numpy.random.seed(42) else: seed_value = int(time.time()) numpy.random.seed(seed_value) LOGGER.info('Seed used: %d', seed_value) f = open('seedval', 'a') f.write(str(seed_value)) f.write('\n') f.close() failures += simulateKMoreThanOne() LOGGER.info("\nTesting KNN Classifier on dense patterns") numPatterns, numClasses = getNumTestPatterns(short) patternSize = 100 patterns = numpy.random.rand(numPatterns, patternSize) patternDict = dict() testDict = dict() # Assume there are no repeated patterns -- if there are, then # numpy.random would be completely broken. # Patterns in testDict are identical to those in patternDict but for the # first 2% of items. for i in xrange(numPatterns): patternDict[i] = dict() patternDict[i]['pattern'] = patterns[i] patternDict[i]['category'] = numpy.random.randint(0, numClasses-1) testDict[i] = copy.deepcopy(patternDict[i]) testDict[i]['pattern'][:0.02*patternSize] = numpy.random.rand() testDict[i]['category'] = None LOGGER.info("\nTesting KNN Classifier with L2 norm") knn = KNNClassifier(k=1) failures += simulateClassifier(knn, patternDict, \ "KNN Classifier with L2 norm test") LOGGER.info("\nTesting KNN Classifier with L1 norm") knnL1 = KNNClassifier(k=1, distanceNorm=1.0) failures += simulateClassifier(knnL1, patternDict, \ "KNN Classifier with L1 norm test") # Test with exact matching classifications. LOGGER.info("\nTesting KNN Classifier with exact matching. For testing we " "slightly alter the training data and expect None to be returned for the " "classifications.") knnExact = KNNClassifier(k=1, exact=True) failures += simulateClassifier(knnExact, patternDict, "KNN Classifier with exact matching test", testDict=testDict) numPatterns, numClasses = getNumTestPatterns(short) patterns = (numpy.random.rand(numPatterns, 25) > 0.7).astype(RealNumpyDType) patternDict = dict() for i in patterns: iString = str(i.tolist()) if not patternDict.has_key(iString): randCategory = numpy.random.randint(0, numClasses-1) patternDict[iString] = dict() patternDict[iString]['pattern'] = i patternDict[iString]['category'] = randCategory LOGGER.info("\nTesting KNN on sparse patterns") knnDense = KNNClassifier(k=1) failures += simulateClassifier(knnDense, patternDict, \ "KNN Classifier on sparse pattern test") self.assertEqual(len(failures), 0, "Tests failed: \n" + failures) if short == 2: f = open('seedval', 'a') f.write('Pass\n') f.close()