def __init__(self,
                 verbosity=1,
                 numLabels=3,
                 modelDir="ClassificationModelFingerprint",
                 fingerprintType=EncoderTypes.word,
                 unionSparsity=20.0):

        super(ClassificationModelFingerprint,
              self).__init__(verbosity=verbosity,
                             numLabels=numLabels,
                             modelDir=modelDir)

        # Init kNN classifier and Cortical.io encoder; need valid API key (see
        # CioEncoder init for details).
        self.classifier = KNNClassifier(k=numLabels,
                                        distanceMethod='rawOverlap',
                                        exact=False,
                                        verbosity=verbosity - 1)

        if fingerprintType is (not EncoderTypes.document
                               or not EncoderTypes.word):
            raise ValueError("Invaid type of fingerprint encoding; see the "
                             "EncoderTypes class for eligble types.")
        self.encoder = CioEncoder(cacheDir="./fluent/experiments/cioCache",
                                  fingerprintType=fingerprintType,
                                  unionSparsity=unionSparsity)
        self.n = self.encoder.n
        self.w = int((self.encoder.targetSparsity / 100) * self.n)
    def __init__(
        self,
        n=100,
        w=20,
        verbosity=1,
        numLabels=3,
        modelDir="ClassificationModelKeywords",
        classifierMetric="rawOverlap",
        k=None,
    ):

        super(ClassificationModelKeywords, self).__init__(verbosity=verbosity,
                                                          numLabels=numLabels,
                                                          modelDir=modelDir)

        # Backward compatibility to support previous odd behavior
        if k == None:
            k = numLabels

        # We use the pctOverlapOfInput distance metric for this model so the
        # queryModel() output is consistent (i.e. 0.0-1.0). The KNN classifications
        # aren't affected b/c the raw overlap distance is still used under the hood.
        self.classifier = KNNClassifier(exact=True,
                                        distanceMethod=classifierMetric,
                                        k=k,
                                        verbosity=verbosity - 1)

        self.n = n
        self.w = w
    def __init__(self,
                 verbosity=1,
                 numLabels=3,
                 modelDir="ClassificationModelWindow",
                 unionSparsity=0.20,
                 retinaScaling=1.0,
                 retina="en_associative",
                 apiKey=None,
                 classifierMetric="rawOverlap"):

        super(ClassificationModelWindows, self).__init__(verbosity=verbosity,
                                                         numLabels=numLabels,
                                                         modelDir=modelDir)

        # window patterns below minSparsity will be skipped over
        self.minSparsity = 0.9 * unionSparsity

        self.classifier = KNNClassifier(k=numLabels,
                                        distanceMethod=classifierMetric,
                                        exact=False,
                                        verbosity=verbosity - 1)

        # need valid API key (see CioEncoder init for details)
        root = os.path.dirname(os.path.realpath(__file__))
        self.encoder = CioEncoder(retinaScaling=retinaScaling,
                                  cacheDir=os.path.join(root, "CioCache"),
                                  fingerprintType=EncoderTypes.word,
                                  unionSparsity=unionSparsity,
                                  retina=retina,
                                  apiKey=apiKey)
Beispiel #4
0
    def __init__(self,
                 tmOverrides=None,
                 upOverrides=None,
                 classifierOverrides=None,
                 seed=42,
                 consoleVerbosity=0):
        print "Initializing Temporal Memory..."
        params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS)
        params.update(tmOverrides or {})
        params["seed"] = seed
        print "params: "
        print params
        self.tm = MonitoredFastExtendedTemporalMemory(mmName="TM", **params)

        print "Initializing Union Temporal Pooler..."
        start = time.time()
        params = dict(self.DEFAULT_UNION_POOLER_PARAMS)
        params.update(upOverrides or {})
        params["inputDimensions"] = [self.tm.numberOfCells()]
        params["potentialRadius"] = self.tm.numberOfCells()
        params["seed"] = seed
        self.up = MonitoredUnionTemporalPooler(mmName="UP", **params)
        elapsed = int(time.time() - start)
        print "Total time: {0:2} seconds.".format(elapsed)

        print "Initializing KNN Classifier..."
        params = dict(self.DEFAULT_CLASSIFIER_PARAMS)
        params.update(classifierOverrides or {})
        self.classifier = KNNClassifier(**params)
  def __init__(self, netInfo, options, baseline, logNames):
    """ Instantiate data structures for training the input and output
    classifiers

    Parameters:
    ------------------------------------------------------------
    netInfo:      trained network info
    options:      object containing all "command-line" options for post-processsing
    baseline:     dictionary of information from the corresponding baseline test set
                    if any, or None
    logNames:     Names of the available log files
    """

    # Save arguments
    self.netInfo = netInfo
    self.options = options

    # Create KNNs
    self.inputCl = KNNClassifier(k=1, distanceNorm=1.0,
                          distThreshold=0.0, useSparseMemory = True)

    self.outputCl = KNNClassifier(k=1, distanceNorm=1.0,
                          distThreshold=0.0, useSparseMemory = True)


    # Init vars
    self.uniqueInput = []
    self.category = []
    self.numSamples = 0
    def __init__(self, netInfo, options, baseline, logNames):
        """ Instantiate data structures for training the input and output
    classifiers

    Parameters:
    ------------------------------------------------------------
    netInfo:      trained network info
    options:      object containing all "command-line" options for post-processsing
    baseline:     dictionary of information from the corresponding baseline test set
                    if any, or None
    logNames:     Names of the available log files
    """

        # Save arguments
        self.netInfo = netInfo
        self.options = options

        # Create KNNs
        self.inputCl = KNNClassifier(k=1,
                                     distanceNorm=1.0,
                                     distThreshold=0.0,
                                     useSparseMemory=True)

        self.outputCl = KNNClassifier(k=1,
                                      distanceNorm=1.0,
                                      distThreshold=0.0,
                                      useSparseMemory=True)

        # Init vars
        self.uniqueInput = []
        self.category = []
        self.numSamples = 0
    def __init__(self, netInfo, options, baseline, logNames):
        """ Instantiate data structures for training the input and output
    classifiers

    Parameters:
    ------------------------------------------------------------
    netInfo:      trained network info
    options:      object containing all "command-line" options for post-processsing
    baseline:     dictionary of information from the corresponding baseline test set
                    if any, or None
    logNames:     Names of the available log files
    """

        # Save net info
        self.netInfo = netInfo
        self.options = options
        self.trainedClassificationStats = baseline['classificationStats']

        # Init member vars
        self.numSamples = 0
        self.classificationSamples = 0
        self.classificationErrs = 0

        # We use this classifier to detect which input samples are unique. This
        #  way we don't get classification accuracy skewed by multiple instances
        #  of the same input.
        self.inputCl = KNNClassifier(k=1,
                                     distanceNorm=1.0,
                                     distThreshold=0.0,
                                     useSparseMemory=True)

        # This array is used to recored the Temporal Pooler fitness score for each
        #  sample.
        self.tpFitnessScores = []
    def __init__(self,
                 fingerprintType=EncoderTypes.word,
                 unionSparsity=0.20,
                 retinaScaling=1.0,
                 retina="en_associative",
                 apiKey=None,
                 k=1,
                 classifierMetric="rawOverlap",
                 cacheRoot=None,
                 **kwargs):

        super(ClassificationModelFingerprint, self).__init__(**kwargs)

        self.classifier = KNNClassifier(k=k,
                                        distanceMethod=classifierMetric,
                                        exact=False,
                                        verbosity=self.verbosity - 1)

        # Need a valid API key for the Cortical.io encoder (see CioEncoder
        # constructor for details).
        if fingerprintType is (not EncoderTypes.document
                               or not EncoderTypes.word):
            raise ValueError("Invalid type of fingerprint encoding; see the "
                             "EncoderTypes class for eligble types.")

        self.encoder = CioEncoder(retinaScaling=retinaScaling,
                                  fingerprintType=fingerprintType,
                                  unionSparsity=unionSparsity,
                                  retina=retina,
                                  apiKey=apiKey,
                                  cacheDir=cacheRoot)

        self.currentDocument = None
  def __init__(self, n=100, w=20, verbosity=1, numLabels=3):
    super(ClassificationModelKeywords, self).__init__(n, w, verbosity,
                                                       numLabels)

    self.classifier = KNNClassifier(exact=True,
                                    distanceMethod='rawOverlap',
                                    k=numLabels,
                                    verbosity=verbosity-1)
Beispiel #10
0
  def testOverlapDistanceMethodBadSparsity(self):
    """Sparsity (input dimensionality) less than input array"""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)

    # Learn with incorrect dimensionality, less than some bits (23, 29)
    with self.assertRaises(RuntimeError):
      classifier.learn(a, 0, isSparse=20)
Beispiel #11
0
  def testGetPartitionIdWithNoIdsAtFirst(self):
    """
    Tests that we can correctly retrieve partition Id even if the first few
    vectors do not have Ids
    """
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)
    c = np.array([1, 2, 3, 14, 16, 19, 22, 24, 33], dtype=np.int32)
    d = np.array([2, 4, 8, 12, 14, 19, 22, 24, 33], dtype=np.int32)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0

    denseD = np.zeros(dimensionality)
    denseD[d] = 1.0

    classifier.learn(a, 0, isSparse=dimensionality, partitionId=None)
    classifier.learn(b, 1, isSparse=dimensionality, partitionId=None)
    classifier.learn(c, 2, isSparse=dimensionality, partitionId=211)
    classifier.learn(d, 1, isSparse=dimensionality, partitionId=405)

    cat, _, _, _ = classifier.infer(denseA, partitionId=405)
    self.assertEquals(cat, 0)

    cat, _, _, _ = classifier.infer(denseD, partitionId=405)
    self.assertEquals(cat, 2)

    cat, _, _, _ = classifier.infer(denseD)
    self.assertEquals(cat, 1)
Beispiel #12
0
    def __init__(self, verbosity=1):
        super(ClassificationModelRandomSDR, self).__init__(verbosity)

        # Init kNN classifier:
        #   specify 'distanceMethod'='rawOverlap' for overlap; Euclidean is std.
        #   verbosity=1 for debugging
        #   standard k is 1
        self.classifier = KNNClassifier(exact=True, verbosity=verbosity - 1)

        # SDR dimensions:
        self.n = 100
        self.w = 20
Beispiel #13
0
    def testPartitionId(self):
        """Tests that paritionId properly excludes training data points"""
        params = {"distanceMethod": "rawOverlap"}
        classifier = KNNClassifier(**params)

        dimensionality = 40
        a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
        b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)

        denseA = np.zeros(dimensionality)
        denseA[a] = 1.0

        denseB = np.zeros(dimensionality)
        denseB[b] = 1.0

        classifier.learn(a, 0, isSparse=dimensionality, partitionId=0)
        classifier.learn(b, 1, isSparse=dimensionality, partitionId=1)

        cat, _, _, _ = classifier.infer(denseA, partitionId=1)
        self.assertEquals(cat, 0)

        cat, _, _, _ = classifier.infer(denseA, partitionId=0)
        self.assertEquals(cat, 1)

        cat, _, _, _ = classifier.infer(denseB, partitionId=0)
        self.assertEquals(cat, 1)

        cat, _, _, _ = classifier.infer(denseB, partitionId=1)
        self.assertEquals(cat, 0)
Beispiel #14
0
  def testOverlapDistanceMethodEmptyArray(self):
    """Tests case where pattern has no ON bits"""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([], dtype=np.int32)

    numPatterns = classifier.learn(a, 0, isSparse=dimensionality)
    self.assertEquals(numPatterns, 1)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0
    cat, _, _, _ = classifier.infer(denseA)
    self.assertEquals(cat, 0)
Beispiel #15
0
  def testExtractVectorsFromKNN(self):
    vectors = numpy.random.rand(10, 25) < 0.1

    # Populate KNN
    knn = KNNClassifier()
    for i in xrange(vectors.shape[0]):
      knn.learn(vectors[i], 0)

    # Extract vectors from KNN
    sparseDataMatrix = HierarchicalClustering._extractVectorsFromKNN(knn)

    self.assertEqual(
      sorted(sparseDataMatrix.todense().tolist()), 
      sorted(vectors.tolist())
    )
  def __init__(self,
               n=100,
               w=20,
               verbosity=1,
               numLabels=3,
               modelDir="ClassificationModelKeywords",
               classifierMetric="rawOverlap",
               k=None,
               ):

    super(ClassificationModelKeywords, self).__init__(
      verbosity=verbosity, numLabels=numLabels, modelDir=modelDir)

    # Backward compatibility to support previous odd behavior
    if k == None:
      k = numLabels

    # We use the pctOverlapOfInput distance metric for this model so the
    # queryModel() output is consistent (i.e. 0.0-1.0). The KNN classifications
    # aren't affected b/c the raw overlap distance is still used under the hood.
    self.classifier = KNNClassifier(exact=True,
                                    distanceMethod=classifierMetric,
                                    k=k,
                                    verbosity=verbosity-1)

    self.n = n
    self.w = w
  def __init__(self, netInfo, options, baseline, logNames):
    """ Instantiate data structures for training the input and output
    classifiers

    Parameters:
    ------------------------------------------------------------
    netInfo:      trained network info
    options:      object containing all "command-line" options for post-processsing
    baseline:     dictionary of information from the corresponding baseline test set
                    if any, or None
    logNames:     Names of the available log files
    """

    # Save net info
    self.netInfo = netInfo
    self.options = options
    self.trainedClassificationStats = baseline['classificationStats']

    # Init member vars
    self.numSamples = 0
    self.classificationSamples = 0
    self.classificationErrs = 0

    # We use this classifier to detect which input samples are unique. This
    #  way we don't get classification accuracy skewed by multiple instances
    #  of the same input.
    self.inputCl = KNNClassifier(k=1, distanceNorm=1.0,
                          distThreshold=0.0, useSparseMemory = True)


    # This array is used to recored the Temporal Pooler fitness score for each
    #  sample.
    self.tpFitnessScores = []
  def __init__(self,
               verbosity=1,
               numLabels=3,
               modelDir="ClassificationModelWindow",
               unionSparsity=0.20,
               retinaScaling=1.0,
               retina="en_associative",
               apiKey=None,
               classifierMetric="rawOverlap",
               cacheRoot=None):

    super(ClassificationModelWindows, self).__init__(
      verbosity=verbosity, numLabels=numLabels, modelDir=modelDir)

    # window patterns below minSparsity will be skipped over
    self.minSparsity = 0.9 * unionSparsity

    self.classifier = KNNClassifier(k=numLabels,
                                    distanceMethod=classifierMetric,
                                    exact=False,
                                    verbosity=verbosity-1)

    # need valid API key (see CioEncoder init for details)
    cacheRoot = cacheRoot or os.path.dirname(os.path.realpath(__file__))
    self.encoder = CioEncoder(retinaScaling=retinaScaling,
                              cacheDir=os.path.join(cacheRoot, "CioCache"),
                              fingerprintType=EncoderTypes.word,
                              unionSparsity=unionSparsity,
                              retina=retina,
                              apiKey=apiKey)
  def __init__(self, tmOverrides=None, upOverrides=None,
               classifierOverrides=None, seed=42, consoleVerbosity=0):
    print "Initializing Temporal Memory..."
    params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS)
    params.update(tmOverrides or {})
    params["seed"] = seed
    print "params: "
    print params
    self.tm = MonitoredFastExtendedTemporalMemory(mmName="TM", **params)

    print "Initializing Union Temporal Pooler..."
    start = time.time()
    params = dict(self.DEFAULT_UNION_POOLER_PARAMS)
    params.update(upOverrides or {})
    params["inputDimensions"] = [self.tm.numberOfCells()]
    params["potentialRadius"] = self.tm.numberOfCells()
    params["seed"] = seed
    self.up = MonitoredUnionTemporalPooler(mmName="UP", **params)
    elapsed = int(time.time() - start)
    print "Total time: {0:2} seconds.".format(elapsed)

    print "Initializing KNN Classifier..."
    params = dict(self.DEFAULT_CLASSIFIER_PARAMS)
    params.update(classifierOverrides or {})
    self.classifier = KNNClassifier(**params)
  def __init__(self,
               verbosity=1,
               numLabels=3,
               modelDir="ClassificationModelFingerprint",
               fingerprintType=EncoderTypes.word,
               unionSparsity=0.20,
               retinaScaling=1.0,
               retina="en_associative",
               apiKey=None,
               classifierMetric="rawOverlap",
               cacheRoot=None):

    super(ClassificationModelFingerprint, self).__init__(
      verbosity=verbosity, numLabels=numLabels, modelDir=modelDir)

    # Init kNN classifier and Cortical.io encoder; need valid API key (see
    # CioEncoder init for details).
    self.classifier = KNNClassifier(k=numLabels,
                                    distanceMethod=classifierMetric,
                                    exact=False,
                                    verbosity=verbosity-1)

    if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word):
      raise ValueError("Invaid type of fingerprint encoding; see the "
                       "EncoderTypes class for eligble types.")

    cacheRoot = cacheRoot or os.path.dirname(os.path.realpath(__file__))

    self.encoder = CioEncoder(retinaScaling=retinaScaling,
                              cacheDir=os.path.join(cacheRoot, "CioCache"),
                              fingerprintType=fingerprintType,
                              unionSparsity=unionSparsity,
                              retina=retina,
                              apiKey=apiKey)
  def __init__(self,
               fingerprintType=EncoderTypes.word,
               unionSparsity=0.20,
               retinaScaling=1.0,
               retina="en_associative",
               apiKey=None,
               k=1,
               classifierMetric="rawOverlap",
               cacheRoot=None,
               **kwargs):

    super(ClassificationModelFingerprint, self).__init__(**kwargs)

    self.classifier = KNNClassifier(k=k,
                                    distanceMethod=classifierMetric,
                                    exact=False,
                                    verbosity=self.verbosity-1)

    # Need a valid API key for the Cortical.io encoder (see CioEncoder
    # constructor for details).
    if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word):
      raise ValueError("Invalid type of fingerprint encoding; see the "
                       "EncoderTypes class for eligble types.")

    self.encoder = CioEncoder(retinaScaling=retinaScaling,
                              fingerprintType=fingerprintType,
                              unionSparsity=unionSparsity,
                              retina=retina,
                              apiKey=apiKey,
                              cacheDir=cacheRoot)

    self.currentDocument = None
  def __init__(self,
               verbosity=1,
               numLabels=3,
               modelDir="ClassificationModelFingerprint",
               fingerprintType=EncoderTypes.word,
               unionSparsity=20.0):

    super(ClassificationModelFingerprint, self).__init__(
      verbosity=verbosity, numLabels=numLabels, modelDir=modelDir)

    # Init kNN classifier and Cortical.io encoder; need valid API key (see
    # CioEncoder init for details).
    self.classifier = KNNClassifier(k=numLabels,
                                    distanceMethod='rawOverlap',
                                    exact=False,
                                    verbosity=verbosity-1)

    if fingerprintType is (not EncoderTypes.document or not EncoderTypes.word):
      raise ValueError("Invaid type of fingerprint encoding; see the "
                       "EncoderTypes class for eligble types.")
    self.encoder = CioEncoder(cacheDir="./fluent/experiments/cioCache",
                              fingerprintType=fingerprintType,
                              unionSparsity=unionSparsity)
    self.n = self.encoder.n
    self.w = int((self.encoder.targetSparsity/100)*self.n)
Beispiel #23
0
    def testMinSparsity(self):
        """Tests overlap distance with min sparsity"""

        # Require sparsity >= 20%
        params = {"distanceMethod": "rawOverlap", "minSparsity": 0.2}
        classifier = KNNClassifier(**params)

        dimensionality = 30
        a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
        b = np.array([2, 4, 8, 12, 14, 18, 20, 21, 28], dtype=np.int32)

        # This has 20% sparsity and should be inserted
        c = np.array([2, 3, 8, 11, 14, 18], dtype=np.int32)

        # This has 17% sparsity and should NOT be inserted
        d = np.array([2, 3, 8, 11, 18], dtype=np.int32)

        numPatterns = classifier.learn(a, 0, isSparse=dimensionality)
        self.assertEquals(numPatterns, 1)

        numPatterns = classifier.learn(b, 1, isSparse=dimensionality)
        self.assertEquals(numPatterns, 2)

        numPatterns = classifier.learn(c, 1, isSparse=dimensionality)
        self.assertEquals(numPatterns, 3)

        numPatterns = classifier.learn(d, 1, isSparse=dimensionality)
        self.assertEquals(numPatterns, 3)

        # Test that inference ignores low sparsity vectors but not others
        e = np.array([2, 4, 5, 6, 8, 12, 14, 18, 20], dtype=np.int32)
        dense = np.zeros(dimensionality)
        dense[e] = 1.0
        cat, inference, _, _ = classifier.infer(dense)
        self.assertIsNotNone(cat)
        self.assertGreater(inference.sum(), 0.0)

        # This has 20% sparsity and should be used for inference
        f = np.array([2, 5, 8, 11, 14, 18], dtype=np.int32)
        dense = np.zeros(dimensionality)
        dense[f] = 1.0
        cat, inference, _, _ = classifier.infer(dense)
        self.assertIsNotNone(cat)
        self.assertGreater(inference.sum(), 0.0)

        # This has 17% sparsity and should return null inference results
        g = np.array([2, 3, 8, 11, 19], dtype=np.int32)
        dense = np.zeros(dimensionality)
        dense[g] = 1.0
        cat, inference, _, _ = classifier.infer(dense)
        self.assertIsNone(cat)
        self.assertEqual(inference.sum(), 0.0)
Beispiel #24
0
    def __init__(self,
                 n=100,
                 w=20,
                 verbosity=1,
                 classifierMetric="rawOverlap",
                 k=1,
                 **kwargs):

        super(ClassificationModelKeywords, self).__init__(**kwargs)

        self.classifier = KNNClassifier(exact=True,
                                        distanceMethod=classifierMetric,
                                        k=k,
                                        verbosity=verbosity - 1)

        self.n = n
        self.w = w
Beispiel #25
0
    def __init__(self, netInfo, options, baseline, logNames):
        """ Instantiate data structures for calculating the knn regression stats on
    a given data set.

    Parameters:
    ------------------------------------------------------------
    netInfo:      trained network info
    options:      object containing all "command-line" options for post-processsing
    baseline:     dictionary of information from the corresponding baseline test set
                    if any, or None
    logNames:     Names of the available log files
    """

        # -----------------------------------------------------------------------
        # Get info about the network
        self.netInfo = netInfo
        self.options = options
        self.verbosity = self.options['verbosity']
        #self.verbosity = 3    # Uncomment for verbosity only in this module

        self.computeMode, self.regressionField = \
                    self._getComputeModeAndRegressionField(options['knnRegression'])

        # If testing, make sure we have a trained classifier state
        if self.computeMode == 'test':
            self.training = False
            global gTrainedKNNClassifierState
            if gTrainedKNNClassifierState is None:
                print "\nWARNING: You are using the option 'knnRegression=test,FIELD', "\
                      " but haven't trained the classifier using "\
                      " 'knnRegression=train,FIELD', "\
                      " No regression will be performed."
            else:
                if gTrainedKNNClassifierState[
                        'regressionField'] != self.regressionField:
                    print "\nWARNING: You are using different regression fields for testing,"\
                          " and training. No regression will be performed."
            self.trainedState = gTrainedKNNClassifierState
        else:
            self.training = True
            self.trainedState = dict()
            self.trainedState['classifier'] = KNNClassifier(
                k=1, distanceNorm=1.0, distThreshold=0.0, useSparseMemory=True)
            self.trainedState['regressionField'] = self.regressionField
            self.trainedState['categoryMap'] = []

        # -----------------------------------------------------------------------
        # Init variables
        self.sourceFieldNames = self.netInfo['encoder'].getScalarNames()
        self.numFields = len(self.sourceFieldNames)
        self.regressionFieldIdx = self.sourceFieldNames.index(
            self.regressionField)

        # Accumuated closeness scores
        self.numSamples = 0
        self.sourceClosenessSum = 0.0
        self.absSourceClosenessSum = 0.0
        self.rmseSourceClosenessSum = 0.0
Beispiel #26
0
def simulateCategories(numSamples=100, numDimensions=500):
    """Simulate running KNN classifier on many disjoint categories"""

    failures = ""
    LOGGER.info(
        "Testing the sparse KNN Classifier on many disjoint categories")
    knn = KNNClassifier(k=1, distanceNorm=1.0, useSparseMemory=True)

    for i in range(0, numSamples):

        # select category randomly and generate vector
        c = 2 * numpy.random.randint(0, 50) + 50
        v = createPattern(c, numDimensions)
        knn.learn(v, c)

    # Go through each category and ensure we have at least one from each!
    for i in range(0, 50):
        c = 2 * i + 50
        v = createPattern(c, numDimensions)
        knn.learn(v, c)

    errors = 0
    for i in range(0, numSamples):

        # select category randomly and generate vector
        c = 2 * numpy.random.randint(0, 50) + 50
        v = createPattern(c, numDimensions)

        inferCat, _kir, _kd, _kcd = knn.infer(v)
        if inferCat != c:
            LOGGER.info("Mistake with %s %s %s %s %s", v[v.nonzero()], \
              "mapped to category", inferCat, "instead of category", c)
            LOGGER.info("   %s", v.nonzero())
            errors += 1
    if errors != 0:
        failures += "Failure in handling non-consecutive category indices\n"

    # Test closest methods
    errors = 0
    for i in range(0, 10):

        # select category randomly and generate vector
        c = 2 * numpy.random.randint(0, 50) + 50
        v = createPattern(c, numDimensions)

        p = knn.closestTrainingPattern(v, c)
        if not (c in p.nonzero()[0]):
            LOGGER.info("Mistake %s %s", p.nonzero(), v.nonzero())
            LOGGER.info("%s %s", p[p.nonzero()], v[v.nonzero()])
            errors += 1

    if errors != 0:
        failures += "Failure in closestTrainingPattern method\n"

    return failures, knn
Beispiel #27
0
  def testPartitionId(self):
    """Tests that paritionId properly excludes training data points"""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0

    denseB = np.zeros(dimensionality)
    denseB[b] = 1.0

    classifier.learn(a, 0, isSparse=dimensionality, partitionId=0)
    classifier.learn(b, 1, isSparse=dimensionality, partitionId=1)

    cat, _, _, _ = classifier.infer(denseA, partitionId=1)
    self.assertEquals(cat, 0)

    cat, _, _, _ = classifier.infer(denseA, partitionId=0)
    self.assertEquals(cat, 1)

    cat, _, _, _ = classifier.infer(denseB, partitionId=0)
    self.assertEquals(cat, 1)

    cat, _, _, _ = classifier.infer(denseB, partitionId=1)
    self.assertEquals(cat, 0)
  def __init__(self, verbosity=1):
    super(ClassificationModelFingerprint, self).__init__(verbosity)

    # Init kNN classifier and Cortical.io encoder; need valid API key (see
    # CioEncoder init for details).
    self.classifier = KNNClassifier(k=1, exact=False, verbosity=verbosity-1)
    self.encoder = CioEncoder(cacheDir="./experiments/cache")
    self.n = self.encoder.n
    self.w = int((self.encoder.targetSparsity/100)*self.n)
Beispiel #29
0
  def testOverlapDistanceMethodInconsistentDimensionality(self):
    """Inconsistent sparsity (input dimensionality)"""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)

    # Learn with incorrect dimensionality, greater than largest ON bit, but
    # inconsistent when inferring
    numPatterns = classifier.learn(a, 0, isSparse=31)
    self.assertEquals(numPatterns, 1)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0

    cat, _, _, _ = classifier.infer(denseA)
    self.assertEquals(cat, 0)
Beispiel #30
0
  def testMinSparsity(self):
    """Tests overlap distance with min sparsity"""

    # Require sparsity >= 20%
    params = {"distanceMethod": "rawOverlap", "minSparsity": 0.2}
    classifier = KNNClassifier(**params)

    dimensionality = 30
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 21, 28], dtype=np.int32)

    # This has 20% sparsity and should be inserted
    c = np.array([2, 3, 8, 11, 14, 18], dtype=np.int32)

    # This has 17% sparsity and should NOT be inserted
    d = np.array([2, 3, 8, 11, 18], dtype=np.int32)

    numPatterns = classifier.learn(a, 0, isSparse=dimensionality)
    self.assertEquals(numPatterns, 1)

    numPatterns = classifier.learn(b, 1, isSparse=dimensionality)
    self.assertEquals(numPatterns, 2)

    numPatterns = classifier.learn(c, 1, isSparse=dimensionality)
    self.assertEquals(numPatterns, 3)

    numPatterns = classifier.learn(d, 1, isSparse=dimensionality)
    self.assertEquals(numPatterns, 3)

    # Test that inference ignores low sparsity vectors but not others
    e = np.array([2, 4, 5, 6, 8, 12, 14, 18, 20], dtype=np.int32)
    dense= np.zeros(dimensionality)
    dense[e] = 1.0
    cat, inference, _, _ = classifier.infer(dense)
    self.assertIsNotNone(cat)
    self.assertGreater(inference.sum(),0.0)

    # This has 20% sparsity and should be used for inference
    f = np.array([2, 5, 8, 11, 14, 18], dtype=np.int32)
    dense= np.zeros(dimensionality)
    dense[f] = 1.0
    cat, inference, _, _ = classifier.infer(dense)
    self.assertIsNotNone(cat)
    self.assertGreater(inference.sum(),0.0)

    # This has 17% sparsity and should return null inference results
    g = np.array([2, 3, 8, 11, 19], dtype=np.int32)
    dense= np.zeros(dimensionality)
    dense[g] = 1.0
    cat, inference, _, _ = classifier.infer(dense)
    self.assertIsNone(cat)
    self.assertEqual(inference.sum(),0.0)
  def __init__(self, verbosity=1):
    super(ClassificationModelRandomSDR, self).__init__(verbosity)

    # Init kNN classifier:
    #   specify 'distanceMethod'='rawOverlap' for overlap; Euclidean is std.
    #   verbosity=1 for debugging
    #   standard k is 1
    self.classifier = KNNClassifier(exact=True, verbosity=verbosity-1)

    # SDR dimensions:
    self.n = 100
    self.w = 20
Beispiel #32
0
def simulateCategories(numSamples=100, numDimensions=500):
  """Simulate running KNN classifier on many disjoint categories"""

  failures = ""
  LOGGER.info("Testing the sparse KNN Classifier on many disjoint categories")
  knn = KNNClassifier(k=1, distanceNorm=1.0, useSparseMemory=True)

  for i in range(0, numSamples):

    # select category randomly and generate vector
    c = 2*numpy.random.randint(0, 50) + 50
    v = createPattern(c, numDimensions)
    knn.learn(v, c)

  # Go through each category and ensure we have at least one from each!
  for i in range(0, 50):
    c = 2*i+50
    v = createPattern(c, numDimensions)
    knn.learn(v, c)

  errors = 0
  for i in range(0, numSamples):

    # select category randomly and generate vector
    c = 2*numpy.random.randint(0, 50) + 50
    v = createPattern(c, numDimensions)

    inferCat, _kir, _kd, _kcd = knn.infer(v)
    if inferCat != c:
      LOGGER.info("Mistake with %s %s %s %s %s", v[v.nonzero()], \
        "mapped to category", inferCat, "instead of category", c)
      LOGGER.info("   %s", v.nonzero())
      errors += 1
  if errors != 0:
    failures += "Failure in handling non-consecutive category indices\n"

  # Test closest methods
  errors = 0
  for i in range(0, 10):

    # select category randomly and generate vector
    c = 2*numpy.random.randint(0, 50) + 50
    v = createPattern(c, numDimensions)

    p = knn.closestTrainingPattern(v, c)
    if not (c in p.nonzero()[0]):
      LOGGER.info("Mistake %s %s", p.nonzero(), v.nonzero())
      LOGGER.info("%s %s", p[p.nonzero()], v[v.nonzero()])
      errors += 1

  if errors != 0:
    failures += "Failure in closestTrainingPattern method\n"

  return failures, knn
    def __init__(self, n=100, w=20, verbosity=1, numLabels=3, modelDir="ClassificationModelKeywords"):

        super(ClassificationModelKeywords, self).__init__(verbosity=verbosity, numLabels=numLabels, modelDir=modelDir)

        # We use the pctOverlapOfInput distance metric for this model so the
        # queryModel() output is consistent (i.e. 0.0-1.0). The KNN classifications
        # aren't affected b/c the raw overlap distance is still used under the hood.
        self.classifier = KNNClassifier(
            exact=True, distanceMethod="pctOverlapOfInput", k=numLabels, verbosity=verbosity - 1
        )

        self.n = n
        self.w = w
  def __init__(self,
               n=100,
               w=20,
               verbosity=1,
               numLabels=3,
               modelDir="ClassificationModelKeywords"):

    super(ClassificationModelKeywords, self).__init__(
        n, w, verbosity=verbosity, numLabels=numLabels, modelDir=modelDir)

    self.classifier = KNNClassifier(exact=True,
                                    distanceMethod='rawOverlap',
                                    k=numLabels,
                                    verbosity=verbosity-1)
Beispiel #35
0
    def __init__(self,
                 verbosity=1,
                 numLabels=3,
                 modelDir="ClassificationModelFingerprint",
                 fingerprintType=EncoderTypes.word,
                 unionSparsity=0.20,
                 retinaScaling=1.0,
                 retina="en_associative",
                 apiKey=None,
                 classifierMetric="rawOverlap",
                 cacheRoot=None):

        super(ClassificationModelFingerprint,
              self).__init__(verbosity=verbosity,
                             numLabels=numLabels,
                             modelDir=modelDir)

        # Init kNN classifier and Cortical.io encoder; need valid API key (see
        # CioEncoder init for details).
        self.classifier = KNNClassifier(k=numLabels,
                                        distanceMethod=classifierMetric,
                                        exact=False,
                                        verbosity=verbosity - 1)

        if fingerprintType is (not EncoderTypes.document
                               or not EncoderTypes.word):
            raise ValueError("Invaid type of fingerprint encoding; see the "
                             "EncoderTypes class for eligble types.")

        cacheRoot = cacheRoot or os.path.dirname(os.path.realpath(__file__))

        self.encoder = CioEncoder(retinaScaling=retinaScaling,
                                  cacheDir=os.path.join(cacheRoot, "CioCache"),
                                  fingerprintType=fingerprintType,
                                  unionSparsity=unionSparsity,
                                  retina=retina,
                                  apiKey=apiKey)
Beispiel #36
0
    def __init__(self,
                 tmOverrides=None,
                 upOverrides=None,
                 classifierOverrides=None,
                 seed=42,
                 consoleVerbosity=0):
        print "Initializing Temporal Memory..."
        params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS)
        params.update(tmOverrides or {})
        params["seed"] = seed
        self.tm = MonitoredFastGeneralTemporalMemory(mmName="TM", **params)

        print "Initializing Union Pooler..."
        params = dict(self.DEFAULT_UNION_POOLER_PARAMS)
        params.update(upOverrides or {})
        params["inputDimensions"] = [self.tm.numberOfCells()]
        params["potentialRadius"] = self.tm.numberOfCells()
        params["seed"] = seed
        self.up = MonitoredUnionPooler(mmName="UP", **params)

        print "Initializing KNN Classifier..."
        params = dict(self.DEFAULT_CLASSIFIER_PARAMS)
        params.update(classifierOverrides or {})
        self.classifier = KNNClassifier(**params)
  def __init__(self,
               n=100,
               w=20,
               verbosity=1,
               classifierMetric="rawOverlap",
               k=1,
               **kwargs
               ):

    super(ClassificationModelKeywords, self).__init__(**kwargs)

    self.classifier = KNNClassifier(exact=True,
                                    distanceMethod=classifierMetric,
                                    k=k,
                                    verbosity=verbosity-1)

    self.n = n
    self.w = w
Beispiel #38
0
  def testOverlapDistanceMethodStandardUnsorted(self):
    """If sparse representation indices are unsorted expect error."""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([29, 3, 7, 11, 13, 17, 19, 23, 1], dtype=np.int32)
    b = np.array([2, 4, 20, 12, 14, 18, 8, 28, 30], dtype=np.int32)

    with self.assertRaises(RuntimeError):
      classifier.learn(a, 0, isSparse=dimensionality)

    with self.assertRaises(RuntimeError):
      classifier.learn(b, 1, isSparse=dimensionality)
Beispiel #39
0
  def testOverlapDistanceMethod_ClassifySparse(self):
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)

    classifier.learn(a, 0, isSparse=dimensionality)
    classifier.learn(b, 1, isSparse=dimensionality)

    # TODO Test case where infer is passed a sparse representation after
    # infer() has been extended to handle sparse and dense
    cat, _, _, _ = classifier.infer(a)
    self.assertEquals(cat, 0)

    cat, _, _, _ = classifier.infer(b)
    self.assertEquals(cat, 1)
Beispiel #40
0
  def testOverlapDistanceMethodStandard(self):
    """Tests standard learning case for raw overlap"""
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)

    numPatterns = classifier.learn(a, 0, isSparse=dimensionality)
    self.assertEquals(numPatterns, 1)

    numPatterns = classifier.learn(b, 1, isSparse=dimensionality)
    self.assertEquals(numPatterns, 2)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0
    cat, _, _, _ = classifier.infer(denseA)
    self.assertEquals(cat, 0)

    denseB = np.zeros(dimensionality)
    denseB[b] = 1.0
    cat, _, _, _ = classifier.infer(denseB)
    self.assertEquals(cat, 1)
Beispiel #41
0
  def testSparsifyVector(self):
    classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0)
    inputPattern = np.array([0, 1, 3, 7, 11], dtype=np.int32)

    # Each of the 4 tests correspond with the each decisional branch in the
    # sparsifyVector method
    #
    # tests: if not self.relativeThreshold:
    outputPattern = classifier._sparsifyVector(inputPattern, doWinners=True)
    self.assertTrue(np.array_equal(np.array([0, 1, 3, 7, 11], dtype=np.int32),
      outputPattern))

    # tests: elif self.sparseThreshold > 0:
    classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0,
      relativeThreshold=True, sparseThreshold=.2)
    outputPattern = classifier._sparsifyVector(inputPattern, doWinners=True)
    self.assertTrue(np.array_equal(np.array([0, 0, 3, 7, 11], dtype=np.int32),
      outputPattern))

    # tests: if doWinners:
    classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0,
      relativeThreshold=True, sparseThreshold=.2, numWinners=2)
    outputPattern = classifier._sparsifyVector(inputPattern, doWinners=True)
    self.assertTrue(np.array_equal(np.array([0, 0, 0, 0, 0], dtype=np.int32),
      outputPattern))

    # tests: Do binarization
    classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0,
      relativeThreshold=True, sparseThreshold=.2, doBinarization=True)
    outputPattern = classifier._sparsifyVector(inputPattern, doWinners=True)
    self.assertTrue(np.array_equal(np.array(
      [0., 0., 1., 1., 1.], dtype=np.float32), outputPattern))
Beispiel #42
0
class UnionTemporalPoolerExperiment(object):
    """
  This class defines a Temporal Memory-Union Temporal Pooler network and provides methods
  to run the network on data sequences.
  """

    DEFAULT_TEMPORAL_MEMORY_PARAMS = {
        "columnCount": 1024,
        "cellsPerColumn": 8,
        "activationThreshold": 20,
        "initialPermanence": 0.5,
        "connectedPermanence": 0.6,
        "minThreshold": 20,
        "sampleSize": 30,
        "permanenceIncrement": 0.10,
        "permanenceDecrement": 0.02,
        "seed": 42,
        "learnOnOneCell": False
    }

    DEFAULT_UNION_POOLER_PARAMS = {  # Spatial Pooler Params
        # inputDimensions set to TM cell count
        # potentialRadius set to TM cell count
        "columnDimensions": [1024],
        "numActiveColumnsPerInhArea": 20,
        "stimulusThreshold": 0,
        "synPermInactiveDec": 0.01,
        "synPermActiveInc": 0.1,
        "synPermConnected": 0.1,
        "potentialPct": 0.5,
        "globalInhibition": True,
        "localAreaDensity": -1,
        "minPctOverlapDutyCycle": 0.001,
        "dutyCyclePeriod": 1000,
        "boostStrength": 10.0,
        "seed": 42,
        "spVerbosity": 0,
        "wrapAround": True,

        # Union Temporal Pooler Params
        "activeOverlapWeight": 1.0,
        "predictedActiveOverlapWeight": 10.0,
        "maxUnionActivity": 0.20,
        "exciteFunctionType": 'Fixed',
        "decayFunctionType": 'NoDecay'
    }

    DEFAULT_CLASSIFIER_PARAMS = {
        "k": 1,
        "distanceMethod": "rawOverlap",
        "distThreshold": 0
    }

    def __init__(self,
                 tmOverrides=None,
                 upOverrides=None,
                 classifierOverrides=None,
                 seed=42,
                 consoleVerbosity=0):
        print "Initializing Temporal Memory..."
        params = dict(self.DEFAULT_TEMPORAL_MEMORY_PARAMS)
        params.update(tmOverrides or {})
        params["seed"] = seed
        print "params: "
        print params
        self.tm = MonitoredFastExtendedTemporalMemory(mmName="TM", **params)

        print "Initializing Union Temporal Pooler..."
        start = time.time()
        params = dict(self.DEFAULT_UNION_POOLER_PARAMS)
        params.update(upOverrides or {})
        params["inputDimensions"] = [self.tm.numberOfCells()]
        params["potentialRadius"] = self.tm.numberOfCells()
        params["seed"] = seed
        self.up = MonitoredUnionTemporalPooler(mmName="UP", **params)
        elapsed = int(time.time() - start)
        print "Total time: {0:2} seconds.".format(elapsed)

        print "Initializing KNN Classifier..."
        params = dict(self.DEFAULT_CLASSIFIER_PARAMS)
        params.update(classifierOverrides or {})
        self.classifier = KNNClassifier(**params)

    def runNetworkOnSequences(self,
                              inputSequences,
                              inputCategories,
                              tmLearn=True,
                              upLearn=None,
                              classifierLearn=False,
                              verbosity=0,
                              progressInterval=None):
        """
    Runs Union Temporal Pooler network on specified sequence.

    @param inputSequences           One or more sequences of input patterns.
                                    Each should be terminated with None.

    @param inputCategories          A sequence of category representations
                                    for each element in inputSequences
                                    Each should be terminated with None.

    @param tmLearn:   (bool)        Temporal Memory learning mode
    @param upLearn:   (None, bool)  Union Temporal Pooler learning mode. If None,
                                    Union Temporal Pooler will not be run.
    @param classifierLearn: (bool)  Classifier learning mode

    @param progressInterval: (int)  Interval of console progress updates
                                    in terms of timesteps.
    """

        currentTime = time.time()
        for i in xrange(len(inputSequences)):
            sensorPattern = inputSequences[i]
            inputCategory = inputCategories[i]

            self.runNetworkOnPattern(sensorPattern,
                                     tmLearn=tmLearn,
                                     upLearn=upLearn,
                                     sequenceLabel=inputCategory)

            if classifierLearn and sensorPattern is not None:
                unionSDR = self.up.getUnionSDR()
                upCellCount = self.up.getColumnDimensions()
                self.classifier.learn(unionSDR,
                                      inputCategory,
                                      isSparse=upCellCount)
                if verbosity > 1:
                    pprint.pprint("{0} is category {1}".format(
                        unionSDR, inputCategory))

            if progressInterval is not None and i > 0 and i % progressInterval == 0:
                elapsed = (time.time() - currentTime) / 60.0
                print(
                    "Ran {0} / {1} elements of sequence in "
                    "{2:0.2f} minutes.".format(i, len(inputSequences),
                                               elapsed))
                currentTime = time.time()
                print MonitorMixinBase.mmPrettyPrintMetrics(
                    self.tm.mmGetDefaultMetrics())

        if verbosity >= 2:
            traces = self.tm.mmGetDefaultTraces(verbosity=verbosity)
            print MonitorMixinBase.mmPrettyPrintTraces(
                traces, breakOnResets=self.tm.mmGetTraceResets())

            if upLearn is not None:
                traces = self.up.mmGetDefaultTraces(verbosity=verbosity)
                print MonitorMixinBase.mmPrettyPrintTraces(
                    traces, breakOnResets=self.up.mmGetTraceResets())
            print

    def runNetworkOnPattern(self,
                            sensorPattern,
                            tmLearn=True,
                            upLearn=None,
                            sequenceLabel=None):
        if sensorPattern is None:
            self.tm.reset()
            self.up.reset()
        else:
            self.tm.compute(sensorPattern,
                            learn=tmLearn,
                            sequenceLabel=sequenceLabel)

            if upLearn is not None:
                activeCells, predActiveCells, burstingCols, = self.getUnionTemporalPoolerInput(
                )
                self.up.compute(activeCells,
                                predActiveCells,
                                learn=upLearn,
                                sequenceLabel=sequenceLabel)

    def getUnionTemporalPoolerInput(self):
        """
    Gets the Union Temporal Pooler input from the Temporal Memory
    """
        activeCells = numpy.zeros(self.tm.numberOfCells()).astype(realDType)
        activeCells[list(self.tm.activeCellsIndices())] = 1

        predictedActiveCells = numpy.zeros(
            self.tm.numberOfCells()).astype(realDType)
        predictedActiveCells[list(self.tm.predictedActiveCellsIndices())] = 1

        burstingColumns = numpy.zeros(
            self.tm.numberOfColumns()).astype(realDType)
        burstingColumns[list(self.tm.unpredictedActiveColumns)] = 1

        return activeCells, predictedActiveCells, burstingColumns

    def getBurstingColumnsStats(self):
        """
    Gets statistics on the Temporal Memory's bursting columns. Used as a metric
    of Temporal Memory's learning performance.
    :return: mean, standard deviation, and max of Temporal Memory's bursting
    columns over time
    """
        traceData = self.tm.mmGetTraceUnpredictedActiveColumns().data
        resetData = self.tm.mmGetTraceResets().data
        countTrace = []
        for x in xrange(len(traceData)):
            if not resetData[x]:
                countTrace.append(len(traceData[x]))

        mean = numpy.mean(countTrace)
        stdDev = numpy.std(countTrace)
        maximum = max(countTrace)
        return mean, stdDev, maximum
Beispiel #43
0
  def testPartitionIdExcluded(self):
    """
    Tests that paritionId properly excludes training data points during
    inference
    """
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0

    denseB = np.zeros(dimensionality)
    denseB[b] = 1.0

    classifier.learn(a, 0, isSparse=dimensionality, partitionId=0)
    classifier.learn(b, 1, isSparse=dimensionality, partitionId=1)

    cat, _, _, _ = classifier.infer(denseA, partitionId=1)
    self.assertEquals(cat, 0)

    cat, _, _, _ = classifier.infer(denseA, partitionId=0)
    self.assertEquals(cat, 1)

    cat, _, _, _ = classifier.infer(denseB, partitionId=0)
    self.assertEquals(cat, 1)

    cat, _, _, _ = classifier.infer(denseB, partitionId=1)
    self.assertEquals(cat, 0)

    # Ensure it works even if you invoke learning again. To make it a bit more
    # complex this time we insert A again but now with Id=2
    classifier.learn(a, 0, isSparse=dimensionality, partitionId=2)

    # Even though first A should be ignored, the second instance of A should
    # not be ignored.
    cat, _, _, _ = classifier.infer(denseA, partitionId=0)
    self.assertEquals(cat, 0)
Beispiel #44
0
def simulateKMoreThanOne():
  """A small test with k=3"""

  failures = ""
  LOGGER.info("Testing the sparse KNN Classifier with k=3")
  knn = KNNClassifier(k=3)

  v = numpy.zeros((6, 2))
  v[0] = [1.0, 0.0]
  v[1] = [1.0, 0.2]
  v[2] = [1.0, 0.2]
  v[3] = [1.0, 2.0]
  v[4] = [1.0, 4.0]
  v[5] = [1.0, 4.5]
  knn.learn(v[0], 0)
  knn.learn(v[1], 0)
  knn.learn(v[2], 0)
  knn.learn(v[3], 1)
  knn.learn(v[4], 1)
  knn.learn(v[5], 1)

  winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[0])
  if winner != 0:
    failures += "Inference failed with k=3\n"

  winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[2])
  if winner != 0:
    failures += "Inference failed with k=3\n"

  winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[3])
  if winner != 0:
    failures += "Inference failed with k=3\n"

  winner, _inferenceResult, _dist, _categoryDist = knn.infer(v[5])
  if winner != 1:
    failures += "Inference failed with k=3\n"

  if len(failures) == 0:
    LOGGER.info("Tests passed.")

  return failures
Beispiel #45
0
  def runTestPCAKNN(self, short = 0):

    LOGGER.info('\nTesting PCA/k-NN classifier')
    LOGGER.info('Mode=%s', short)

    numDims = 10
    numClasses = 10
    k = 10
    numPatternsPerClass = 100
    numPatterns = int(.9 * numClasses * numPatternsPerClass)
    numTests = numClasses * numPatternsPerClass - numPatterns
    numSVDSamples = int(.1 * numPatterns)
    keep = 1

    train_data, train_class, test_data, test_class = \
        pca_knn_data.generate(numDims, numClasses, k, numPatternsPerClass,
                              numPatterns, numTests, numSVDSamples, keep)

    pca_knn = KNNClassifier(k=k,numSVDSamples=numSVDSamples,
                            numSVDDims=keep)

    knn = KNNClassifier(k=k)


    LOGGER.info('Training PCA k-NN')

    for i in range(numPatterns):
      knn.learn(train_data[i], train_class[i])
      pca_knn.learn(train_data[i], train_class[i])


    LOGGER.info('Testing PCA k-NN')

    numWinnerFailures = 0
    numInferenceFailures = 0
    numDistFailures = 0
    numAbsErrors = 0

    for i in range(numTests):

      winner, inference, dist, categoryDist = knn.infer(test_data[i])
      pca_winner, pca_inference, pca_dist, pca_categoryDist \
        = pca_knn.infer(test_data[i])

      if winner != test_class[i]:
        numAbsErrors += 1

      if pca_winner != winner:
        numWinnerFailures += 1

      if (numpy.abs(pca_inference - inference) > 1e-4).any():
        numInferenceFailures += 1

      if (numpy.abs(pca_dist - dist) > 1e-4).any():
        numDistFailures += 1

    s0 = 100*float(numTests - numAbsErrors) / float(numTests)
    s1 = 100*float(numTests - numWinnerFailures) / float(numTests)
    s2 = 100*float(numTests - numInferenceFailures) / float(numTests)
    s3 = 100*float(numTests - numDistFailures) / float(numTests)

    LOGGER.info('PCA/k-NN success rate=%s%s', s0, '%')
    LOGGER.info('Winner success=%s%s', s1, '%')
    LOGGER.info('Inference success=%s%s', s2, '%')
    LOGGER.info('Distance success=%s%s', s3, '%')

    self.assertEqual(s1, 100.0,
      "PCA/k-NN test failed")
Beispiel #46
0
  def testGetPartitionId(self):
    """
    Test a sequence of calls to KNN to ensure we can retrieve partition Id:
        - We first learn on some patterns (including one pattern with no
          partitionId in the middle) and test that we can retrieve Ids.
        - We then invoke inference and then check partitionId again.
        - We check incorrect indices to ensure we get an exception.
        - We check the case where the partitionId to be ignored is not in
          the list.
        - We learn on one more pattern and check partitionIds again
        - We remove rows and ensure partitionIds still work
    """
    params = {"distanceMethod": "rawOverlap"}
    classifier = KNNClassifier(**params)

    dimensionality = 40
    a = np.array([1, 3, 7, 11, 13, 17, 19, 23, 29], dtype=np.int32)
    b = np.array([2, 4, 8, 12, 14, 18, 20, 28, 30], dtype=np.int32)
    c = np.array([1, 2, 3, 14, 16, 19, 22, 24, 33], dtype=np.int32)
    d = np.array([2, 4, 8, 12, 14, 19, 22, 24, 33], dtype=np.int32)
    e = np.array([1, 3, 7, 12, 14, 19, 22, 24, 33], dtype=np.int32)

    denseA = np.zeros(dimensionality)
    denseA[a] = 1.0

    classifier.learn(a, 0, isSparse=dimensionality, partitionId=433)
    classifier.learn(b, 1, isSparse=dimensionality, partitionId=213)
    classifier.learn(c, 1, isSparse=dimensionality, partitionId=None)
    classifier.learn(d, 1, isSparse=dimensionality, partitionId=433)

    self.assertEquals(classifier.getPartitionId(0), 433)
    self.assertEquals(classifier.getPartitionId(1), 213)
    self.assertEquals(classifier.getPartitionId(2), None)
    self.assertEquals(classifier.getPartitionId(3), 433)

    cat, _, _, _ = classifier.infer(denseA, partitionId=213)
    self.assertEquals(cat, 0)

    # Test with patternId not in classifier
    cat, _, _, _ = classifier.infer(denseA, partitionId=666)
    self.assertEquals(cat, 0)

    # Partition Ids should be maintained after inference
    self.assertEquals(classifier.getPartitionId(0), 433)
    self.assertEquals(classifier.getPartitionId(1), 213)
    self.assertEquals(classifier.getPartitionId(2), None)
    self.assertEquals(classifier.getPartitionId(3), 433)

    # Should return exceptions if we go out of bounds
    with self.assertRaises(RuntimeError):
      classifier.getPartitionId(4)
    with self.assertRaises(RuntimeError):
      classifier.getPartitionId(-1)

    # Learn again
    classifier.learn(e, 4, isSparse=dimensionality, partitionId=413)
    self.assertEquals(classifier.getPartitionId(4), 413)

    # Test getPatternIndicesWithPartitionId
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(433),
                          [0, 3])
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(666),
                          [])
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(413),
                          [4])

    self.assertEquals(classifier.getNumPartitionIds(), 3)

    # Check that the full set of partition ids is what we expect
    self.assertItemsEqual(classifier.getPartitionIdPerPattern(),
                          [433, 213, np.inf, 433, 413])
    self.assertItemsEqual(classifier.getPartitionIdList(),[433, 413, 213])

    # Remove two rows - all indices shift down
    self.assertEquals(classifier._removeRows([0,2]), 2)
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(433),
                          [1])
    self.assertItemsEqual(classifier.getPatternIndicesWithPartitionId(413),
                          [2])

    # Remove another row and check number of partitions have decreased
    classifier._removeRows([0])
    self.assertEquals(classifier.getNumPartitionIds(), 2)

    # Check that the full set of partition ids is what we expect
    self.assertItemsEqual(classifier.getPartitionIdPerPattern(), [433, 413])
    self.assertItemsEqual(classifier.getPartitionIdList(),[433, 413])
Beispiel #47
0
  def testDistanceMetrics(self):
    classifier = KNNClassifier(distanceMethod="norm", distanceNorm=2.0)

    dimensionality = 40
    protoA = np.array([0, 1, 3, 7, 11], dtype=np.int32)
    protoB = np.array([20, 28, 30], dtype=np.int32)

    classifier.learn(protoA, 0, isSparse=dimensionality)
    classifier.learn(protoB, 0, isSparse=dimensionality)

    # input is an arbitrary point, close to protoA, orthogonal to protoB
    input = np.zeros(dimensionality)
    input[:4] = 1.0
    # input0 is used to test that the distance from a point to itself is 0
    input0 = np.zeros(dimensionality)
    input0[protoA] = 1.0

    # Test l2 norm metric
    _, _, dist, _ = classifier.infer(input)
    l2Distances = [0.65465367,  1.0]
    for actual, predicted in zip(l2Distances, dist):
      self.assertAlmostEqual(
        actual, predicted, places=5,
        msg="l2 distance norm is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0], msg="l2 norm did not calculate 0 distance as expected.")

    # Test l1 norm metric
    classifier.distanceNorm = 1.0
    _, _, dist, _ = classifier.infer(input)
    l1Distances = [0.42857143,  1.0]
    for actual, predicted in zip(l1Distances, dist):
      self.assertAlmostEqual(
        actual, predicted, places=5,
        msg="l1 distance norm is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0], msg="l1 norm did not calculate 0 distance as expected.")

    # Test raw overlap metric
    classifier.distanceMethod = "rawOverlap"
    _, _, dist, _ = classifier.infer(input)
    rawOverlaps = [1, 4]
    for actual, predicted in zip(rawOverlaps, dist):
      self.assertEqual(
        actual, predicted, msg="Raw overlap is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0],
      msg="Raw overlap did not calculate 0 distance as expected.")

    # Test pctOverlapOfInput metric
    classifier.distanceMethod = "pctOverlapOfInput"
    _, _, dist, _ = classifier.infer(input)
    pctOverlaps = [0.25, 1.0]
    for actual, predicted in zip(pctOverlaps, dist):
      self.assertAlmostEqual(
        actual, predicted, places=5,
        msg="pctOverlapOfInput is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0],
      msg="pctOverlapOfInput did not calculate 0 distance as expected.")

    # Test pctOverlapOfProto metric
    classifier.distanceMethod = "pctOverlapOfProto"
    _, _, dist, _ = classifier.infer(input)
    pctOverlaps = [0.40, 1.0]
    for actual, predicted in zip(pctOverlaps, dist):
      self.assertAlmostEqual(
        actual, predicted, places=5,
        msg="pctOverlapOfProto is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0],
      msg="pctOverlapOfProto did not calculate 0 distance as expected.")

    # Test pctOverlapOfLarger metric
    classifier.distanceMethod = "pctOverlapOfLarger"
    _, _, dist, _ = classifier.infer(input)
    pctOverlaps = [0.40, 1.0]
    for actual, predicted in zip(pctOverlaps, dist):
      self.assertAlmostEqual(
      actual, predicted, places=5,
        msg="pctOverlapOfLarger is not calculated as expected.")

    _, _, dist0, _ = classifier.infer(input0)
    self.assertEqual(
      0.0, dist0[0],
      msg="pctOverlapOfLarger did not calculate 0 distance as expected.")
class ClassificationModelFingerprint(ClassificationModel):
    """
  Class to run the survey response classification task with Coritcal.io
  fingerprint encodings.

  From the experiment runner, the methods expect to be fed one sample at a time.
  """
    def __init__(self,
                 verbosity=1,
                 numLabels=3,
                 fingerprintType=EncoderTypes.document):

        super(ClassificationModelFingerprint,
              self).__init__(verbosity, numLabels)

        # Init kNN classifier and Cortical.io encoder; need valid API key (see
        # CioEncoder init for details).
        self.classifier = KNNClassifier(k=numLabels,
                                        distanceMethod='rawOverlap',
                                        exact=False,
                                        verbosity=verbosity - 1)

        if fingerprintType is (not EncoderTypes.document
                               or not EncoderTypes.word):
            raise ValueError("Invaid type of fingerprint encoding; see the "
                             "EncoderTypes class for eligble types.")
        self.encoder = CioEncoder(cacheDir="./fluent/experiments/cioCache",
                                  fingerprintType=fingerprintType)
        self.n = self.encoder.n
        self.w = int((self.encoder.targetSparsity / 100) * self.n)

    def encodePattern(self, sample):
        """
    Encode an SDR of the input string by querying the Cortical.io API. If the
    client returns None, we create a random SDR with the model's dimensions n
    and w.

    @param sample     (list)            Tokenized sample, where each item is a
                                        string token.
    @return fp        (dict)            The sample text, sparsity, and bitmap.
    Example return dict:
      {
        "text": "Example text",
        "sparsity": 0.03,
        "bitmap": numpy.array([])
      }
    """
        sample = " ".join(sample)
        fpInfo = self.encoder.encode(sample)
        if fpInfo:
            fp = {
                "text": fpInfo["text"] if "text" in fpInfo else fpInfo["term"],
                "sparsity": fpInfo["sparsity"],
                "bitmap": numpy.array(fpInfo["fingerprint"]["positions"])
            }
        else:
            fp = {
                "text": sample,
                "sparsity": float(self.w) / self.n,
                "bitmap": self.encodeRandomly(sample)
            }

        return fp

    def resetModel(self):
        """Reset the model by clearing the classifier."""
        self.classifier.clear()

    def trainModel(self, samples, labels):
        """
    Train the classifier on the input sample and labels.

    @param samples    (list)          List of dictionaries containing the
                                      sample text, sparsity, and bitmap.
    @param labels     (list)          List of numpy arrays containing the
                                      reference indices for the classifications
                                      of each sample.
    """
        for sample, sample_labels in zip(samples, labels):
            if sample["bitmap"].any():
                for label in sample_labels:
                    self.classifier.learn(sample["bitmap"],
                                          label,
                                          isSparse=self.n)

    def testModel(self, sample, numLabels=3):
        """
    Test the kNN classifier on the input sample. Returns the classification most
    frequent amongst the classifications of the sample's individual tokens.
    We ignore the terms that are unclassified, picking the most frequent
    classification among those that are detected.

    @param sample         (dict)          The sample text, sparsity, and bitmap.
    @param numLabels      (int)           Number of predicted classifications.
    @return               (numpy array)   The numLabels most-frequent
                                          classifications for the data samples;
                                          values are int or empty.
    """
        (_, inferenceResult, _,
         _) = self.classifier.infer(self._densifyPattern(sample["bitmap"]))
        return self.getWinningLabels(inferenceResult, numLabels)