Ejemplo n.º 1
0
    def testOverlapDistanceMethodBadSparsity(self):
        """Sparsity (input dimensionality) less than input array"""
        params = {"distanceMethod": "rawOverlap"}
        classifier = KNNClassifier(**params)

        dimensionality = 40
        a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)

        # Learn with incorrect dimensionality, less than some bits (23, 29)
        with self.assertRaises(RuntimeError):
            classifier.learn(a, 0, isSparse=20)
Ejemplo n.º 2
0
def simulateCategories(numSamples=100, numDimensions=500):
  """Simulate running KNN classifier on many disjoint categories"""

  failures = ""
  LOGGER.info("Testing the sparse KNN Classifier on many disjoint categories")
  knn = KNNClassifier(k=1, distanceNorm=1.0, useSparseMemory=True)

  for i in range(0, numSamples):

    # select category randomly and generate vector
    c = 2*numpy.random.randint(0, 50) + 50
    v = createPattern(c, numDimensions)
    knn.learn(v, c)

  # Go through each category and ensure we have at least one from each!
  for i in range(0, 50):
    c = 2*i+50
    v = createPattern(c, numDimensions)
    knn.learn(v, c)

  errors = 0
  for i in range(0, numSamples):

    # select category randomly and generate vector
    c = 2*numpy.random.randint(0, 50) + 50
    v = createPattern(c, numDimensions)

    inferCat, _kir, _kd, _kcd = knn.infer(v)
    if inferCat != c:
      LOGGER.info("Mistake with %s %s %s %s %s", v[v.nonzero()], \
        "mapped to category", inferCat, "instead of category", c)
      LOGGER.info("   %s", v.nonzero())
      errors += 1
  if errors != 0:
    failures += "Failure in handling non-consecutive category indices\n"

  # Test closest methods
  errors = 0
  for i in range(0, 10):

    # select category randomly and generate vector
    c = 2*numpy.random.randint(0, 50) + 50
    v = createPattern(c, numDimensions)

    p = knn.closestTrainingPattern(v, c)
    if not (c in p.nonzero()[0]):
      LOGGER.info("Mistake %s %s", p.nonzero(), v.nonzero())
      LOGGER.info("%s %s", p[p.nonzero()], v[v.nonzero()])
      errors += 1

  if errors != 0:
    failures += "Failure in closestTrainingPattern method\n"

  return failures, knn
Ejemplo n.º 3
0
    def __init__(self, verbosity=1):
        super(ClassificationModelRandomSDR, self).__init__(verbosity)

        # Init kNN classifier:
        #   specify 'distanceMethod'='rawOverlap' for overlap; Euclidean is std.
        #   verbosity=1 for debugging
        #   standard k is 1
        self.classifier = KNNClassifier(exact=True, verbosity=verbosity - 1)

        # SDR dimensions:
        self.n = 100
        self.w = 20
Ejemplo n.º 4
0
    def testMinSparsity(self):
        """Tests overlap distance with min sparsity"""

        # Require sparsity >= 20%
        params = {"distanceMethod": "rawOverlap", "minSparsity": 0.2}
        classifier = KNNClassifier(**params)

        dimensionality = 30
        a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
        b = np.array([2, 4, 8, 12, 14, 18, 20, 21, 28], dtype=np.int32)

        # This has 20% sparsity and should be inserted
        c = np.array([2, 3, 8, 11, 14, 18], dtype=np.int32)

        # This has 17% sparsity and should NOT be inserted
        d = np.array([2, 3, 8, 11, 18], dtype=np.int32)

        numPatterns = classifier.learn(a, 0, isSparse=dimensionality)
        self.assertEquals(numPatterns, 1)

        numPatterns = classifier.learn(b, 1, isSparse=dimensionality)
        self.assertEquals(numPatterns, 2)

        numPatterns = classifier.learn(c, 1, isSparse=dimensionality)
        self.assertEquals(numPatterns, 3)

        numPatterns = classifier.learn(d, 1, isSparse=dimensionality)
        self.assertEquals(numPatterns, 3)

        # Test that inference ignores low sparsity vectors but not others
        e = np.array([2, 4, 5, 6, 8, 12, 14, 18, 20], dtype=np.int32)
        dense = np.zeros(dimensionality)
        dense[e] = 1.0
        cat, inference, _, _ = classifier.infer(dense)
        self.assertIsNotNone(cat)
        self.assertGreater(inference.sum(), 0.0)

        # This has 20% sparsity and should be used for inference
        f = np.array([2, 5, 8, 11, 14, 18], dtype=np.int32)
        dense = np.zeros(dimensionality)
        dense[f] = 1.0
        cat, inference, _, _ = classifier.infer(dense)
        self.assertIsNotNone(cat)
        self.assertGreater(inference.sum(), 0.0)

        # This has 17% sparsity and should return null inference results
        g = np.array([2, 3, 8, 11, 19], dtype=np.int32)
        dense = np.zeros(dimensionality)
        dense[g] = 1.0
        cat, inference, _, _ = classifier.infer(dense)
        self.assertIsNone(cat)
        self.assertEqual(inference.sum(), 0.0)
Ejemplo n.º 5
0
  def testOverlapDistanceMethodStandardUnsorted(self):
    """If sparse representation indices are unsorted expect error."""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([29, 3, 7, 11, 13, 17, 19, 23, 1], dtype=np.int32)
    b = np.array([2, 4, 20, 12, 14, 18, 8, 28, 30], dtype=np.int32)

    with self.assertRaises(RuntimeError):
      classifier.learn(a, 0, isSparse=dimensionality)

    with self.assertRaises(RuntimeError):
      classifier.learn(b, 1, isSparse=dimensionality)
Ejemplo n.º 6
0
  def testExtractVectorsFromKNN(self):
    vectors = numpy.random.rand(10, 25) < 0.1

    # Populate KNN
    knn = KNNClassifier()
    for i in xrange(vectors.shape[0]):
      knn.learn(vectors[i], 0)

    # Extract vectors from KNN
    sparseDataMatrix = HierarchicalClustering._extractVectorsFromKNN(knn)

    self.assertEqual(
      sorted(sparseDataMatrix.todense().tolist()), 
      sorted(vectors.tolist())
    )
Ejemplo n.º 7
0
  def testOverlapDistanceMethodEmptyArray(self):
    """Tests case where pattern has no ON bits"""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([], dtype=np.int32)

    numPatterns = classifier.learn(a, 0, isSparse=dimensionality)
    self.assertEquals(numPatterns, 1)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0
    cat, _, _, _ = classifier.infer(denseA)
    self.assertEquals(cat, 0)
Ejemplo n.º 8
0
    def __init__(self,
                 n=100,
                 w=20,
                 verbosity=1,
                 classifierMetric="rawOverlap",
                 k=1,
                 **kwargs):

        super(ClassificationModelKeywords, self).__init__(**kwargs)

        self.classifier = KNNClassifier(exact=True,
                                        distanceMethod=classifierMetric,
                                        k=k,
                                        verbosity=verbosity - 1)

        self.n = n
        self.w = w
Ejemplo n.º 9
0
    def __init__(self,
                 n=100,
                 w=20,
                 verbosity=1,
                 numLabels=3,
                 modelDir="ClassificationModelKeywords"):

        super(ClassificationModelKeywords, self).__init__(n,
                                                          w,
                                                          verbosity=verbosity,
                                                          numLabels=numLabels,
                                                          modelDir=modelDir)

        self.classifier = KNNClassifier(exact=True,
                                        distanceMethod="rawOverlap",
                                        k=numLabels,
                                        verbosity=verbosity - 1)
Ejemplo n.º 10
0
  def testOverlapDistanceMethodInconsistentDimensionality(self):
    """Inconsistent sparsity (input dimensionality)"""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)

    # Learn with incorrect dimensionality, greater than largest ON bit, but
    # inconsistent when inferring
    numPatterns = classifier.learn(a, 0, isSparse=31)
    self.assertEquals(numPatterns, 1)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0

    cat, _, _, _ = classifier.infer(denseA)
    self.assertEquals(cat, 0)
Ejemplo n.º 11
0
  def testOverlapDistanceMethod_ClassifySparse(self):
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)

    classifier.learn(a, 0, isSparse=dimensionality)
    classifier.learn(b, 1, isSparse=dimensionality)

    # TODO Test case where infer is passed a sparse representation after
    # infer() has been extended to handle sparse and dense
    cat, _, _, _ = classifier.infer(a)
    self.assertEquals(cat, 0)

    cat, _, _, _ = classifier.infer(b)
    self.assertEquals(cat, 1)
Ejemplo n.º 12
0
def simulateKMoreThanOne():
  """A small test with k=3"""

  failures = ""
  LOGGER.info("Testing the sparse KNN Classifier with k=3")
  knn = KNNClassifier(k=3)

  v = numpy.zeros((6, 2))
  v[0] = [1.0, 0.0]
  v[1] = [1.0, 0.2]
  v[2] = [1.0, 0.2]
  v[3] = [1.0, 2.0]
  v[4] = [1.0, 4.0]
  v[5] = [1.0, 4.5]
  knn.learn(v[0], 0)
  knn.learn(v[1], 0)
  knn.learn(v[2], 0)
  knn.learn(v[3], 1)
  knn.learn(v[4], 1)
  knn.learn(v[5], 1)

  winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[0])
  if winner != 0:
    failures += "Inference failed with k=3\n"

  winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[2])
  if winner != 0:
    failures += "Inference failed with k=3\n"

  winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[3])
  if winner != 0:
    failures += "Inference failed with k=3\n"

  winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[5])
  if winner != 1:
    failures += "Inference failed with k=3\n"

  if len(failures) == 0:
    LOGGER.info("Tests passed.")

  return failures
Ejemplo n.º 13
0
  def testPartitionIdExcluded(self):
    """
    Tests that paritionId properly excludes training data points during
    inference
    """
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0

    denseB = np.zeros(dimensionality)
    denseB[b] = 1.0

    classifier.learn(a, 0, isSparse=dimensionality, partitionId=0)
    classifier.learn(b, 1, isSparse=dimensionality, partitionId=1)

    cat, _, _, _ = classifier.infer(denseA, partitionId=1)
    self.assertEquals(cat, 0)

    cat, _, _, _ = classifier.infer(denseA, partitionId=0)
    self.assertEquals(cat, 1)

    cat, _, _, _ = classifier.infer(denseB, partitionId=0)
    self.assertEquals(cat, 1)

    cat, _, _, _ = classifier.infer(denseB, partitionId=1)
    self.assertEquals(cat, 0)

    # Ensure it works even if you invoke learning again. To make it a bit more
    # complex this time we insert A again but now with Id=2
    classifier.learn(a, 0, isSparse=dimensionality, partitionId=2)

    # Even though first A should be ignored, the second instance of A should
    # not be ignored.
    cat, _, _, _ = classifier.infer(denseA, partitionId=0)
    self.assertEquals(cat, 0)
    def __init__(self,
                 verbosity=1,
                 numLabels=3,
                 fingerprintType=EncoderTypes.document):

        super(ClassificationModelFingerprint,
              self).__init__(verbosity, numLabels)

        # Init kNN classifier and Cortical.io encoder; need valid API key (see
        # CioEncoder init for details).
        self.classifier = KNNClassifier(k=numLabels,
                                        distanceMethod='rawOverlap',
                                        exact=False,
                                        verbosity=verbosity - 1)

        if fingerprintType is (not EncoderTypes.document
                               or not EncoderTypes.word):
            raise ValueError("Invaid type of fingerprint encoding; see the "
                             "EncoderTypes class for eligble types.")
        self.encoder = CioEncoder(cacheDir="./fluent/experiments/cioCache",
                                  fingerprintType=fingerprintType)
        self.n = self.encoder.n
        self.w = int((self.encoder.targetSparsity / 100) * self.n)
  def __init__(self, tmOverrides=None, upOverrides=None,
               classifierOverrides=None, seed=42, consoleVerbosity=0):
    print "Initializing Temporal Memory..."
    params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS)
    params.update(tmOverrides or {})
    params["seed"] = seed
    self.tm = MonitoredFastExtendedTemporalMemory(mmName="TM", **params)

    print "Initializing Union Temporal Pooler..."
    start = time.time()
    params = dict(self.DEFAULT_UNION_POOLER_PARAMS)
    params.update(upOverrides or {})
    params["inputDimensions"] = [self.tm.numberOfCells()]
    params["potentialRadius"] = self.tm.numberOfCells()
    params["seed"] = seed
    self.up = MonitoredUnionTemporalPooler(mmName="UP", **params)
    elapsed = int(time.time() - start)
    print "Total time: {0:2} seconds.".format(elapsed)

    print "Initializing KNN Classifier..."
    params = dict(self.DEFAULT_CLASSIFIER_PARAMS)
    params.update(classifierOverrides or {})
    self.classifier = KNNClassifier(**params)
Ejemplo n.º 16
0
    def __init__(self,
                 verbosity=1,
                 numLabels=3,
                 modelDir="ClassificationModelFingerprint",
                 fingerprintType=EncoderTypes.word,
                 unionSparsity=0.20,
                 retinaScaling=1.0,
                 retina="en_associative",
                 apiKey=None,
                 classifierMetric="rawOverlap",
                 cacheRoot=None):

        super(ClassificationModelFingerprint,
              self).__init__(verbosity=verbosity,
                             numLabels=numLabels,
                             modelDir=modelDir)

        # Init kNN classifier and Cortical.io encoder; need valid API key (see
        # CioEncoder init for details).
        self.classifier = KNNClassifier(k=numLabels,
                                        distanceMethod=classifierMetric,
                                        exact=False,
                                        verbosity=verbosity - 1)

        if fingerprintType is (not EncoderTypes.document
                               or not EncoderTypes.word):
            raise ValueError("Invaid type of fingerprint encoding; see the "
                             "EncoderTypes class for eligble types.")

        cacheRoot = cacheRoot or os.path.dirname(os.path.realpath(__file__))

        self.encoder = CioEncoder(retinaScaling=retinaScaling,
                                  cacheDir=os.path.join(cacheRoot, "CioCache"),
                                  fingerprintType=fingerprintType,
                                  unionSparsity=unionSparsity,
                                  retina=retina,
                                  apiKey=apiKey)
Ejemplo n.º 17
0
  def testOverlapDistanceMethodStandard(self):
    """Tests standard learning case for raw overlap"""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)

    numPatterns = classifier.learn(a, 0, isSparse=dimensionality)
    self.assertEquals(numPatterns, 1)

    numPatterns = classifier.learn(b, 1, isSparse=dimensionality)
    self.assertEquals(numPatterns, 2)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0
    cat, _, _, _ = classifier.infer(denseA)
    self.assertEquals(cat, 0)

    denseB = np.zeros(dimensionality)
    denseB[b] = 1.0
    cat, _, _, _ = classifier.infer(denseB)
    self.assertEquals(cat, 1)
Ejemplo n.º 18
0
    def __init__(self,
                 tmOverrides=None,
                 upOverrides=None,
                 classifierOverrides=None,
                 seed=42,
                 consoleVerbosity=0):
        print "Initializing Temporal Memory..."
        params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS)
        params.update(tmOverrides or {})
        params["seed"] = seed
        self.tm = MonitoredFastGeneralTemporalMemory(mmName="TM", **params)

        print "Initializing Union Pooler..."
        params = dict(self.DEFAULT_UNION_POOLER_PARAMS)
        params.update(upOverrides or {})
        params["inputDimensions"] = [self.tm.numberOfCells()]
        params["potentialRadius"] = self.tm.numberOfCells()
        params["seed"] = seed
        self.up = MonitoredUnionPooler(mmName="UP", **params)

        print "Initializing KNN Classifier..."
        params = dict(self.DEFAULT_CLASSIFIER_PARAMS)
        params.update(classifierOverrides or {})
        self.classifier = KNNClassifier(**params)
Ejemplo n.º 19
0
    def runTestKNNClassifier(self, short=0):
        """ Test the KNN classifier in this module. short can be:
        0 (short), 1 (medium), or 2 (long)
    """

        failures = ""
        if short != 2:
            numpy.random.seed(42)
        else:
            seed_value = int(time.time())
            # seed_value = 1276437656
            #seed_value = 1277136651
            numpy.random.seed(seed_value)
            LOGGER.info('Seed used: %d', seed_value)
            f = open('seedval', 'a')
            f.write(str(seed_value))
            f.write('\n')
            f.close()
        failures += simulateKMoreThanOne()

        LOGGER.info("\nTesting KNN Classifier on dense patterns")
        numPatterns, numClasses = getNumTestPatterns(short)
        patterns = numpy.random.rand(numPatterns, 100)
        patternDict = dict()

        # Assume there are no repeated patterns -- if there are, then
        # numpy.random would be completely broken.
        for i in xrange(numPatterns):
            randCategory = numpy.random.randint(0, numClasses - 1)
            patternDict[i] = dict()
            patternDict[i]['pattern'] = patterns[i]
            patternDict[i]['category'] = randCategory

        LOGGER.info("\nTesting KNN Classifier with L2 norm")

        knn = KNNClassifier(k=1)
        failures += simulateClassifier(knn, patternDict, \
          "KNN Classifier with L2 norm test")

        LOGGER.info("\nTesting KNN Classifier with L1 norm")

        knnL1 = KNNClassifier(k=1, distanceNorm=1.0)
        failures += simulateClassifier(knnL1, patternDict, \
          "KNN Classifier with L1 norm test")

        numPatterns, numClasses = getNumTestPatterns(short)
        patterns = (numpy.random.rand(numPatterns, 25) >
                    0.7).astype(RealNumpyDType)
        patternDict = dict()

        for i in patterns:
            iString = str(i.tolist())
            if not patternDict.has_key(iString):
                randCategory = numpy.random.randint(0, numClasses - 1)
                patternDict[iString] = dict()
                patternDict[iString]['pattern'] = i
                patternDict[iString]['category'] = randCategory

        LOGGER.info("\nTesting KNN on sparse patterns")

        knnDense = KNNClassifier(k=1)
        failures += simulateClassifier(knnDense, patternDict, \
          "KNN Classifier on sparse pattern test")

        self.assertEqual(len(failures), 0, "Tests failed: \n" + failures)

        if short == 2:
            f = open('seedval', 'a')
            f.write('Pass\n')
            f.close()
Ejemplo n.º 20
0
  def testGetPartitionId(self):
    """
    Test a sequence of calls to KNN to ensure we can retrieve partition Id:
        - We first learn on some patterns (including one pattern with no
          partitionId in the middle) and test that we can retrieve Ids.
        - We then invoke inference and then check partitionId again.
        - We check incorrect indices to ensure we get an exception.
        - We check the case where the partitionId to be ignored is not in
          the list.
        - We learn on one more pattern and check partitionIds again
        - We remove rows and ensure partitionIds still work
    """
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)
    c = np.array([1, 2, 3, 14, 16, 19, 22, 24, 33], dtype=np.int32)
    d = np.array([2, 4, 8, 12, 14, 19, 22, 24, 33], dtype=np.int32)
    e = np.array([1, 3, 7, 12, 14, 19, 22, 24, 33], dtype=np.int32)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0

    classifier.learn(a, 0, isSparse=dimensionality, partitionId=433)
    classifier.learn(b, 1, isSparse=dimensionality, partitionId=213)
    classifier.learn(c, 1, isSparse=dimensionality, partitionId=None)
    classifier.learn(d, 1, isSparse=dimensionality, partitionId=433)

    self.assertEquals(classifier.getPartitionId(0), 433)
    self.assertEquals(classifier.getPartitionId(1), 213)
    self.assertEquals(classifier.getPartitionId(2), None)
    self.assertEquals(classifier.getPartitionId(3), 433)

    cat, _, _, _ = classifier.infer(denseA, partitionId=213)
    self.assertEquals(cat, 0)

    # Test with patternId not in classifier
    cat, _, _, _ = classifier.infer(denseA, partitionId=666)
    self.assertEquals(cat, 0)

    # Partition Ids should be maintained after inference
    self.assertEquals(classifier.getPartitionId(0), 433)
    self.assertEquals(classifier.getPartitionId(1), 213)
    self.assertEquals(classifier.getPartitionId(2), None)
    self.assertEquals(classifier.getPartitionId(3), 433)

    # Should return exceptions if we go out of bounds
    with self.assertRaises(RuntimeError):
      classifier.getPartitionId(4)
    with self.assertRaises(RuntimeError):
      classifier.getPartitionId(-1)

    # Learn again
    classifier.learn(e, 4, isSparse=dimensionality, partitionId=413)
    self.assertEquals(classifier.getPartitionId(4), 413)

    # Test getPatternIndicesWithPartitionId
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(433),
                          [0, 3])
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(666),
                          [])
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(413),
                          [4])

    self.assertEquals(classifier.getNumPartitionIds(), 3)

    # Check that the full set of partition ids is what we expect
    self.assertItemsEqual(classifier.getPartitionIdPerPattern(),
                          [433, 213, np.inf, 433, 413])
    self.assertItemsEqual(classifier.getPartitionIdList(),[433, 413, 213])

    # Remove two rows - all indices shift down
    self.assertEquals(classifier._removeRows([0,2]), 2)
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(433),
                          [1])
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(413),
                          [2])

    # Remove another row and check number of partitions have decreased
    classifier._removeRows([0])
    self.assertEquals(classifier.getNumPartitionIds(), 2)

    # Check that the full set of partition ids is what we expect
    self.assertItemsEqual(classifier.getPartitionIdPerPattern(), [433, 413])
    self.assertItemsEqual(classifier.getPartitionIdList(),[433, 413])
Ejemplo n.º 21
0
  def runTestPCAKNN(self, short = 0):

    LOGGER.info('\nTesting PCA/k-NN classifier')
    LOGGER.info('Mode=%s', short)

    numDims = 10
    numClasses = 10
    k = 10
    numPatternsPerClass = 100
    numPatterns = int(.9 * numClasses * numPatternsPerClass)
    numTests = numClasses * numPatternsPerClass - numPatterns
    numSVDSamples = int(.1 * numPatterns)
    keep = 1

    train_data, train_class, test_data, test_class = \
        pca_knn_data.generate(numDims, numClasses, k, numPatternsPerClass,
                              numPatterns, numTests, numSVDSamples, keep)

    pca_knn = KNNClassifier(k=k,numSVDSamples=numSVDSamples,
                            numSVDDims=keep)

    knn = KNNClassifier(k=k)


    LOGGER.info('Training PCA k-NN')

    for i in range(numPatterns):
      knn.learn(train_data[i], train_class[i])
      pca_knn.learn(train_data[i], train_class[i])


    LOGGER.info('Testing PCA k-NN')

    numWinnerFailures = 0
    numInferenceFailures = 0
    numDistFailures = 0
    numAbsErrors = 0

    for i in range(numTests):

      winner, inference, dist, categoryDist = knn.infer(test_data[i])
      pca_winner, pca_inference, pca_dist, pca_categoryDist \
        = pca_knn.infer(test_data[i])

      if winner != test_class[i]:
        numAbsErrors += 1

      if pca_winner != winner:
        numWinnerFailures += 1

      if (numpy.abs(pca_inference - inference) > 1e-4).any():
        numInferenceFailures += 1

      if (numpy.abs(pca_dist - dist) > 1e-4).any():
        numDistFailures += 1

    s0 = 100*float(numTests - numAbsErrors) / float(numTests)
    s1 = 100*float(numTests - numWinnerFailures) / float(numTests)
    s2 = 100*float(numTests - numInferenceFailures) / float(numTests)
    s3 = 100*float(numTests - numDistFailures) / float(numTests)

    LOGGER.info('PCA/k-NN success rate=%s%s', s0, '%')
    LOGGER.info('Winner success=%s%s', s1, '%')
    LOGGER.info('Inference success=%s%s', s2, '%')
    LOGGER.info('Distance success=%s%s', s3, '%')

    self.assertEqual(s1, 100.0,
      "PCA/k-NN test failed")
Ejemplo n.º 22
0
  def testDistanceMetrics(self):
    classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0)

    dimensionality = 40
    protoA = np.array([0, 1, 3, 7, 11], dtype=np.int32)
    protoB = np.array([20, 28, 30], dtype=np.int32)

    classifier.learn(protoA, 0, isSparse=dimensionality)
    classifier.learn(protoB, 0, isSparse=dimensionality)

    # input is an arbitrary point, close to protoA, orthogonal to protoB
    input = np.zeros(dimensionality)
    input[:4] = 1.0
    # input0 is used to test that the distance from a point to itself is 0
    input0 = np.zeros(dimensionality)
    input0[protoA] = 1.0

    # Test l2 norm metric
    _, _, dist, _ = classifier.infer(input)
    l2Distances = [0.65465367,  1.0]
    for actual, predicted in zip(l2Distances, dist):
      self.assertAlmostEqual(
        actual, predicted, places=5,
        msg="l2 distance norm is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0], msg="l2 norm did not calculate 0 distance as expected.")

    # Test l1 norm metric
    classifier.distanceNorm = 1.0
    _, _, dist, _ = classifier.infer(input)
    l1Distances = [0.42857143,  1.0]
    for actual, predicted in zip(l1Distances, dist):
      self.assertAlmostEqual(
        actual, predicted, places=5,
        msg="l1 distance norm is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0], msg="l1 norm did not calculate 0 distance as expected.")

    # Test raw overlap metric
    classifier.distanceMethod = "rawOverlap"
    _, _, dist, _ = classifier.infer(input)
    rawOverlaps = [1, 4]
    for actual, predicted in zip(rawOverlaps, dist):
      self.assertEqual(
        actual, predicted, msg="Raw overlap is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0],
      msg="Raw overlap did not calculate 0 distance as expected.")

    # Test pctOverlapOfInput metric
    classifier.distanceMethod = "pctOverlapOfInput"
    _, _, dist, _ = classifier.infer(input)
    pctOverlaps = [0.25, 1.0]
    for actual, predicted in zip(pctOverlaps, dist):
      self.assertAlmostEqual(
        actual, predicted, places=5,
        msg="pctOverlapOfInput is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0],
      msg="pctOverlapOfInput did not calculate 0 distance as expected.")

    # Test pctOverlapOfProto metric
    classifier.distanceMethod = "pctOverlapOfProto"
    _, _, dist, _ = classifier.infer(input)
    pctOverlaps = [0.40, 1.0]
    for actual, predicted in zip(pctOverlaps, dist):
      self.assertAlmostEqual(
        actual, predicted, places=5,
        msg="pctOverlapOfProto is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0],
      msg="pctOverlapOfProto did not calculate 0 distance as expected.")

    # Test pctOverlapOfLarger metric
    classifier.distanceMethod = "pctOverlapOfLarger"
    _, _, dist, _ = classifier.infer(input)
    pctOverlaps = [0.40, 1.0]
    for actual, predicted in zip(pctOverlaps, dist):
      self.assertAlmostEqual(
      actual, predicted, places=5,
        msg="pctOverlapOfLarger is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0],
      msg="pctOverlapOfLarger did not calculate 0 distance as expected.")
Ejemplo n.º 23
0
  def runTestKNNClassifier(self, short = 0):
    """ Test the KNN classifier in this module. short can be:
        0 (short), 1 (medium), or 2 (long)
    """

    failures = ""
    if short != 2:
      numpy.random.seed(42)
    else:
      seed_value = int(time.time())
      numpy.random.seed(seed_value)
      LOGGER.info('Seed used: %d', seed_value)
      f = open('seedval', 'a')
      f.write(str(seed_value))
      f.write('\n')
      f.close()
    failures += simulateKMoreThanOne()

    LOGGER.info("\nTesting KNN Classifier on dense patterns")
    numPatterns, numClasses = getNumTestPatterns(short)
    patternSize = 100
    patterns = numpy.random.rand(numPatterns, patternSize)
    patternDict = dict()
    testDict = dict()

    # Assume there are no repeated patterns -- if there are, then
    # numpy.random would be completely broken.
    # Patterns in testDict are identical to those in patternDict but for the
    # first 2% of items.
    for i in xrange(numPatterns):
      patternDict[i] = dict()
      patternDict[i]['pattern'] = patterns[i]
      patternDict[i]['category'] = numpy.random.randint(0, numClasses-1)
      testDict[i] = copy.deepcopy(patternDict[i])
      testDict[i]['pattern'][:0.02*patternSize] = numpy.random.rand()
      testDict[i]['category'] = None

    LOGGER.info("\nTesting KNN Classifier with L2 norm")

    knn = KNNClassifier(k=1)
    failures += simulateClassifier(knn, patternDict, \
      "KNN Classifier with L2 norm test")

    LOGGER.info("\nTesting KNN Classifier with L1 norm")

    knnL1 = KNNClassifier(k=1, distanceNorm=1.0)
    failures += simulateClassifier(knnL1, patternDict, \
      "KNN Classifier with L1 norm test")

    # Test with exact matching classifications.
    LOGGER.info("\nTesting KNN Classifier with exact matching. For testing we "
      "slightly alter the training data and expect None to be returned for the "
      "classifications.")
    knnExact = KNNClassifier(k=1, exact=True)
    failures += simulateClassifier(knnExact, 
                                   patternDict,
                                   "KNN Classifier with exact matching test",
                                   testDict=testDict)

    numPatterns, numClasses = getNumTestPatterns(short)
    patterns = (numpy.random.rand(numPatterns, 25) > 0.7).astype(RealNumpyDType)
    patternDict = dict()

    for i in patterns:
      iString = str(i.tolist())
      if not patternDict.has_key(iString):
        randCategory = numpy.random.randint(0, numClasses-1)
        patternDict[iString] = dict()
        patternDict[iString]['pattern'] = i
        patternDict[iString]['category'] = randCategory

    LOGGER.info("\nTesting KNN on sparse patterns")

    knnDense = KNNClassifier(k=1)
    failures += simulateClassifier(knnDense, patternDict, \
      "KNN Classifier on sparse pattern test")

    self.assertEqual(len(failures), 0,
      "Tests failed: \n" + failures)

    if short == 2:
      f = open('seedval', 'a')
      f.write('Pass\n')
      f.close()