def testEquals(self): r1 = Random(42) v1 = r1.getReal64() r2 = Random(42) v2 = r2.getReal64() self.assertEquals(v1, v2) self.assertEquals(r1, r2)
def testEquals(self): r1 = Random(42) v1 = r1.getReal64() i1 = r1.getUInt32() r2 = Random(42) v2 = r2.getReal64() i2 = r2.getUInt32() self.assertEqual(v1, v2) self.assertEqual(r1, r2) self.assertEqual(i1, i2)
def init(): global numColumns, numInputs, inputDimensions, columnDimensions, connectedSynapses global numColumns inputDimensions = [32, 32] columnDimensions = [64, 64] initConnectedPct = 0.5 # Check the matrix SM_01_32_32 inputDimensions = numpy.array(inputDimensions, ndmin=1) columnDimensions = numpy.array(columnDimensions, ndmin=1) numColumns = columnDimensions.prod() numInputs = inputDimensions.prod() potentialPools = SparseBinaryMatrix(numInputs) potentialPools.resize(numColumns, numInputs) permances = SparseMatrix(numColumns, numInputs) random = NupicRandom() tieBreaker = 0.01*numpy.array([random.getReal64() for i in xrange(numColumns)]) connectedSynapses = SparseBinaryMatrix(numInputs) connectedSynapses.resize(numColumns, numInputs) connectedCounts = numpy.zeros(numColumns, dtype=realDType) potentialPools.replaceSparseRow(0, numpy.array([0, 1], dtype='int'))
def _orderForCoordinate(cls, coordinate): """ Returns the order for a coordinate. @param coordinate (numpy.array) Coordinate @return (float) A value in the interval [0, 1), representing the order of the coordinate """ seed = cls._hashCoordinate(coordinate) rng = Random(seed) return rng.getReal64()
class PatternMachine(object): """ Base pattern machine class. """ def __init__(self, n, w, num=100, seed=42): """ @param n (int) Number of available bits in pattern @param w (int/list) Number of on bits in pattern If list, each pattern will have a `w` randomly selected from the list. @param num (int) Number of available patterns """ # Save member variables self._n = n self._w = w self._num = num # Initialize member variables self._random = Random(seed) self._patterns = dict() self._generate() def get(self, number): """ Return a pattern for a number. @param number (int) Number of pattern @return (set) Indices of on bits """ if not number in self._patterns: raise IndexError("Invalid number") return self._patterns[number] def addNoise(self, bits, amount): """ Add noise to pattern. @param bits (set) Indices of on bits @param amount (float) Probability of switching an on bit with a random bit @return (set) Indices of on bits in noisy pattern """ newBits = set() for bit in bits: if self._random.getReal64() < amount: newBits.add(self._random.getUInt32(self._n)) else: newBits.add(bit) return newBits def numbersForBit(self, bit): """ Return the set of pattern numbers that match a bit. @param bit (int) Index of bit @return (set) Indices of numbers """ if bit >= self._n: raise IndexError("Invalid bit") numbers = set() for index, pattern in self._patterns.items(): if bit in pattern: numbers.add(index) return numbers def numberMapForBits(self, bits): """ Return a map from number to matching on bits, for all numbers that match a set of bits. @param bits (set) Indices of bits @return (dict) Mapping from number => on bits. """ numberMap = dict() for bit in bits: numbers = self.numbersForBit(bit) for number in numbers: if not number in numberMap: numberMap[number] = set() numberMap[number].add(bit) return numberMap def prettyPrintPattern(self, bits, verbosity=1): """ Pretty print a pattern. @param bits (set) Indices of on bits @param verbosity (int) Verbosity level @return (string) Pretty-printed text """ numberMap = self.numberMapForBits(bits) text = "" numberList = [] numberItems = sorted(iter(numberMap.items()), key=lambda number_bits: len(number_bits[1]), reverse=True) for number, bits in numberItems: if verbosity > 2: strBits = [str(n) for n in bits] numberText = "{0} (bits: {1})".format(number, ",".join(strBits)) elif verbosity > 1: numberText = "{0} ({1} bits)".format(number, len(bits)) else: numberText = str(number) numberList.append(numberText) text += "[{0}]".format(", ".join(numberList)) return text def _generate(self): """ Generates set of random patterns. """ candidates = np.array(list(range(self._n)), np.uint32) for i in range(self._num): self._random.shuffle(candidates) pattern = candidates[0:self._getW()] self._patterns[i] = set(pattern) def _getW(self): """ Gets a value of `w` for use in generating a pattern. """ w = self._w if type(w) is list: return w[self._random.getUInt32(len(w))] else: return w
class PatternMachine(object): """ Base pattern machine class. """ def __init__(self, n, w, num=100, seed=42): """ @param n (int) Number of available bits in pattern @param w (int/list) Number of on bits in pattern If list, each pattern will have a `w` randomly selected from the list. @param num (int) Number of available patterns """ # Save member variables self._n = n self._w = w self._num = num # Initialize member variables self._random = Random(seed) self._patterns = dict() self._generate() def get(self, number): """ Return a pattern for a number. @param number (int) Number of pattern @return (set) Indices of on bits """ if not number in self._patterns: raise IndexError("Invalid number") return self._patterns[number] def addNoise(self, bits, amount): """ Add noise to pattern. @param bits (set) Indices of on bits @param amount (float) Probability of switching an on bit with a random bit @return (set) Indices of on bits in noisy pattern """ newBits = set() for bit in bits: if self._random.getReal64() < amount: newBits.add(self._random.getUInt32(self._n)) else: newBits.add(bit) return newBits def numbersForBit(self, bit): """ Return the set of pattern numbers that match a bit. @param bit (int) Index of bit @return (set) Indices of numbers """ if bit >= self._n: raise IndexError("Invalid bit") numbers = set() for index, pattern in self._patterns.iteritems(): if bit in pattern: numbers.add(index) return numbers def numberMapForBits(self, bits): """ Return a map from number to matching on bits, for all numbers that match a set of bits. @param bits (set) Indices of bits @return (dict) Mapping from number => on bits. """ numberMap = dict() for bit in bits: numbers = self.numbersForBit(bit) for number in numbers: if not number in numberMap: numberMap[number] = set() numberMap[number].add(bit) return numberMap def prettyPrintPattern(self, bits, verbosity=1): """ Pretty print a pattern. @param bits (set) Indices of on bits @param verbosity (int) Verbosity level @return (string) Pretty-printed text """ numberMap = self.numberMapForBits(bits) text = "" numberList = [] numberItems = sorted(numberMap.iteritems(), key=lambda (number, bits): len(bits), reverse=True) for number, bits in numberItems: if verbosity > 2: strBits = [str(n) for n in bits] numberText = "{0} (bits: {1})".format(number, ",".join(strBits)) elif verbosity > 1: numberText = "{0} ({1} bits)".format(number, len(bits)) else: numberText = str(number) numberList.append(numberText) text += "[{0}]".format(", ".join(numberList)) return text def _generate(self): """ Generates set of random patterns. """ candidates = np.array(range(self._n), np.uint32) for i in xrange(self._num): self._random.shuffle(candidates) pattern = candidates[0:self._getW()] self._patterns[i] = set(pattern) def _getW(self): """ Gets a value of `w` for use in generating a pattern. """ w = self._w if type(w) is list: return w[self._random.getUInt32(len(w))] else: return w
class KNNClassifierRegion(PyRegion): """ KNNClassifierRegion implements the k Nearest Neighbor classification algorithm. By default it will implement vanilla 1-nearest neighbor using the L2 (Euclidean) distance norm. There are options for using different norms as well as various ways of sparsifying the input. Note: categories are ints >= 0. """ __VERSION__ = 1 @classmethod def getSpec(cls): ns = dict( description=KNNClassifierRegion.__doc__, singleNodeOnly=True, inputs=dict( categoryIn=dict( description='Vector of categories of the input sample', dataType='Real32', count=0, required=True, regionLevel=True, isDefaultInput=False, requireSplitterMap=False), bottomUpIn=dict( description='Belief values over children\'s groups', dataType='Real32', count=0, required=True, regionLevel=False, isDefaultInput=True, requireSplitterMap=False), partitionIn=dict( description='Partition ID of the input sample', dataType='Real32', count=0, required=True, regionLevel=True, isDefaultInput=False, requireSplitterMap=False), auxDataIn=dict( description='Auxiliary data from the sensor', dataType='Real32', count=0, required=False, regionLevel=True, isDefaultInput=False, requireSplitterMap=False) ), outputs=dict( categoriesOut=dict( description='A vector representing, for each category ' 'index, the likelihood that the input to the node belongs ' 'to that category based on the number of neighbors of ' 'that category that are among the nearest K.', dataType='Real32', count=0, regionLevel=True, isDefaultOutput=True), bestPrototypeIndices=dict( description='A vector that lists, in descending order of ' 'the match, the positions of the prototypes ' 'that best match the input pattern.', dataType='Real32', count=0, regionLevel=True, isDefaultOutput=False), categoryProbabilitiesOut=dict( description='A vector representing, for each category ' 'index, the probability that the input to the node belongs ' 'to that category based on the distance to the nearest ' 'neighbor of each category.', dataType='Real32', count=0, regionLevel=True, isDefaultOutput=True), ), parameters=dict( learningMode=dict( description='Boolean (0/1) indicating whether or not a region ' 'is in learning mode.', dataType='UInt32', count=1, constraints='bool', defaultValue=1, accessMode='ReadWrite'), inferenceMode=dict( description='Boolean (0/1) indicating whether or not a region ' 'is in inference mode.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='ReadWrite'), acceptanceProbability=dict( description='During learning, inputs are learned with ' 'probability equal to this parameter. ' 'If set to 1.0, the default, ' 'all inputs will be considered ' '(subject to other tests).', dataType='Real32', count=1, constraints='', defaultValue=1.0, #accessMode='Create'), accessMode='ReadWrite'), # and Create too confusion=dict( description='Confusion matrix accumulated during inference. ' 'Reset with reset(). This is available to Python ' 'client code only.', dataType='Handle', count=2, constraints='', defaultValue=None, accessMode='Read'), activeOutputCount=dict( description='The number of active elements in the ' '"categoriesOut" output.', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Read'), categoryCount=dict( description='An integer indicating the number of ' 'categories that have been learned', dataType='UInt32', count=1, constraints='', defaultValue=None, accessMode='Read'), patternCount=dict( description='Number of patterns learned by the classifier.', dataType='UInt32', count=1, constraints='', defaultValue=None, accessMode='Read'), patternMatrix=dict( description='The actual patterns learned by the classifier, ' 'returned as a matrix.', dataType='Handle', count=1, constraints='', defaultValue=None, accessMode='Read'), k=dict( description='The number of nearest neighbors to use ' 'during inference.', dataType='UInt32', count=1, constraints='', defaultValue=1, accessMode='Create'), maxCategoryCount=dict( description='The maximal number of categories the ' 'classifier will distinguish between.', dataType='UInt32', count=1, constraints='', defaultValue=2, accessMode='Create'), distanceNorm=dict( description='The norm to use for a distance metric (i.e., ' 'the "p" in Lp-norm)', dataType='Real32', count=1, constraints='', defaultValue=2.0, accessMode='ReadWrite'), #accessMode='Create'), distanceMethod=dict( description='Method used to compute distances between inputs and' 'prototypes. Possible options are norm, rawOverlap, ' 'pctOverlapOfLarger, and pctOverlapOfProto', dataType="Byte", count=0, constraints='enum: norm, rawOverlap, pctOverlapOfLarger, ' 'pctOverlapOfProto, pctOverlapOfInput', defaultValue='norm', accessMode='ReadWrite'), outputProbabilitiesByDist=dict( description='If True, categoryProbabilitiesOut is the probability of ' 'each category based on the distance to the nearest neighbor of ' 'each category. If False, categoryProbabilitiesOut is the ' 'percentage of neighbors among the top K that are of each category.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), distThreshold=dict( description='Distance Threshold. If a pattern that ' 'is less than distThreshold apart from ' 'the input pattern already exists in the ' 'KNN memory, then the input pattern is ' 'not added to KNN memory.', dataType='Real32', count=1, constraints='', defaultValue=0.0, accessMode='ReadWrite'), inputThresh=dict( description='Input binarization threshold, used if ' '"doBinarization" is True.', dataType='Real32', count=1, constraints='', defaultValue=0.5, accessMode='Create'), doBinarization=dict( description='Whether or not to binarize the input vectors.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), useSparseMemory=dict( description='A boolean flag that determines whether or ' 'not the KNNClassifier will use sparse Memory', dataType='UInt32', count=1, constraints='', defaultValue=1, accessMode='Create'), sparseThreshold=dict( description='If sparse memory is used, input variables ' 'whose absolute value is less than this ' 'threshold will be stored as zero', dataType='Real32', count=1, constraints='', defaultValue=0.0, accessMode='Create'), relativeThreshold=dict( description='Whether to multiply sparseThreshold by max value ' ' in input', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), winnerCount=dict( description='Only this many elements of the input are ' 'stored. All elements are stored if 0.', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), doSphering=dict( description='A boolean indicating whether or not data should' 'be "sphered" (i.e. each dimension should be normalized such' 'that its mean and variance are zero and one, respectively.) This' ' sphering normalization would be performed after all training ' 'samples had been received but before inference was performed. ' 'The dimension-specific normalization constants would then ' ' be applied to all future incoming vectors prior to performing ' ' conventional NN inference.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), SVDSampleCount=dict( description='If not 0, carries out SVD transformation after ' 'that many samples have been seen.', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), SVDDimCount=dict( description='Number of dimensions to keep after SVD if greater ' 'than 0. If set to -1 it is considered unspecified. ' 'If set to 0 it is consider "adaptive" and the number ' 'is chosen automatically.', dataType='Int32', count=1, constraints='', defaultValue=-1, accessMode='Create'), fractionOfMax=dict( description='The smallest singular value which is retained ' 'as a fraction of the largest singular value. This is ' 'used only if SVDDimCount==0 ("adaptive").', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), useAuxiliary=dict( description='Whether or not the classifier should use auxiliary ' 'input data.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), justUseAuxiliary=dict( description='Whether or not the classifier should ONLUY use the ' 'auxiliary input data.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), clVerbosity=dict( description='An integer that controls the verbosity level, ' '0 means no verbose output, increasing integers ' 'provide more verbosity.', dataType='UInt32', count=1, constraints='', defaultValue=0 , accessMode='ReadWrite'), doSelfValidation=dict( description='A boolean flag that determines whether or' 'not the KNNClassifier should perform partitionID-based' 'self-validation during the finishLearning() step.', dataType='UInt32', count=1, constraints='bool', defaultValue=None, accessMode='ReadWrite'), keepAllDistances=dict( description='Whether to store all the protoScores in an array, ' 'rather than just the ones for the last inference. ' 'When this parameter is changed from True to False, ' 'all the scores are discarded except for the most ' 'recent one.', dataType='UInt32', count=1, constraints='bool', defaultValue=None, accessMode='ReadWrite'), replaceDuplicates=dict( description='A boolean flag that determines whether or' 'not the KNNClassifier should replace duplicates' 'during learning. This should be on when online' 'learning.', dataType='UInt32', count=1, constraints='bool', defaultValue=None, accessMode='ReadWrite'), cellsPerCol=dict( description='If >= 1, we assume the input is organized into columns, ' 'in the same manner as the temporal pooler AND ' 'whenever we store a new prototype, we only store the ' 'start cell (first cell) in any column which is bursting.' 'colum ', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), maxStoredPatterns=dict( description='Limits the maximum number of the training patterns ' 'stored. When KNN learns in a fixed capacity mode, ' 'the unused patterns are deleted once the number ' 'of stored patterns is greater than maxStoredPatterns' 'columns. [-1 is no limit] ', dataType='Int32', count=1, constraints='', defaultValue=-1, accessMode='Create'), ), commands=dict() ) return ns def __init__(self, maxCategoryCount=0, bestPrototypeIndexCount=0, outputProbabilitiesByDist=False, k=1, distanceNorm=2.0, distanceMethod='norm', distThreshold=0.0, doBinarization=False, inputThresh=0.500, useSparseMemory=True, sparseThreshold=0.0, relativeThreshold=False, winnerCount=0, acceptanceProbability=1.0, seed=42, doSphering=False, SVDSampleCount=0, SVDDimCount=0, fractionOfMax=0, useAuxiliary=0, justUseAuxiliary=0, clVerbosity=0, doSelfValidation=False, replaceDuplicates=False, cellsPerCol=0, maxStoredPatterns=-1 ): self.version = KNNClassifierRegion.__VERSION__ # Convert various arguments to match the expectation # of the KNNClassifier if SVDSampleCount == 0: SVDSampleCount = None if SVDDimCount == -1: SVDDimCount = None elif SVDDimCount == 0: SVDDimCount = 'adaptive' if fractionOfMax == 0: fractionOfMax = None if useAuxiliary == 0: useAuxiliary = False if justUseAuxiliary == 0: justUseAuxiliary = False # KNN Parameters self.knnParams = dict( k=k, distanceNorm=distanceNorm, distanceMethod=distanceMethod, distThreshold=distThreshold, doBinarization=doBinarization, binarizationThreshold=inputThresh, useSparseMemory=useSparseMemory, sparseThreshold=sparseThreshold, relativeThreshold=relativeThreshold, numWinners=winnerCount, numSVDSamples=SVDSampleCount, numSVDDims=SVDDimCount, fractionOfMax=fractionOfMax, verbosity=clVerbosity, replaceDuplicates=replaceDuplicates, cellsPerCol=cellsPerCol, maxStoredPatterns=maxStoredPatterns ) # Initialize internal structures self.outputProbabilitiesByDist = outputProbabilitiesByDist self.learningMode = True self.inferenceMode = False self._epoch = 0 self.acceptanceProbability = acceptanceProbability self._rgen = Random(seed) self.confusion = numpy.zeros((1, 1)) self.keepAllDistances = False self._protoScoreCount = 0 self._useAuxiliary = useAuxiliary self._justUseAuxiliary = justUseAuxiliary # Sphering normalization self._doSphering = doSphering self._normOffset = None self._normScale = None self._samples = None self._labels = None # Debugging self.verbosity = clVerbosity # Boolean controlling whether or not the # region should perform partitionID-based # self-validation during the finishLearning() # step. self.doSelfValidation = doSelfValidation # Taps self._tapFileIn = None self._tapFileOut = None self._initEphemerals() self.maxStoredPatterns = maxStoredPatterns self.maxCategoryCount = maxCategoryCount self._bestPrototypeIndexCount = bestPrototypeIndexCount def _getEphemeralAttributes(self): """ List of attributes to not save with serialized state. """ return ['_firstComputeCall', '_accuracy', '_protoScores', '_categoryDistances'] def _initEphemerals(self): """ Initialize attributes that are not saved with the checkpoint. """ self._firstComputeCall = True self._accuracy = None self._protoScores = None self._categoryDistances = None self._knn = KNNClassifier.KNNClassifier(**self.knnParams) for x in ('_partitions', '_useAuxialiary', '_doSphering', '_scanInfo', '_protoScores', 'doSelfValidation'): if not hasattr(self, x): setattr(self, x, None) def __setstate__(self, state): """Set state from serialized state.""" if 'version' not in state: self.__dict__.update(state) elif state['version'] == 1: knnState = state['_knn_state'] del state['_knn_state'] self.__dict__.update(state) self._initEphemerals() self._knn.__setstate__(knnState) else: raise RuntimeError("Invalid KNNClassifierRegion version for __setstate__") # Set to current version self.version = KNNClassifierRegion.__VERSION__ def __getstate__(self): """Get serializable state.""" state = self.__dict__.copy() state['_knn_state'] = self._knn.__getstate__() del state['_knn'] for field in self._getEphemeralAttributes(): del state[field] return state def initialize(self, dims, splitterMaps): assert tuple(dims) == (1,) * len(dims) def _getActiveOutputCount(self): if self._knn._categoryList: return int(max(self._knn._categoryList)+1) else: return 0 activeOutputCount = property(fget=_getActiveOutputCount) def _getSeenCategoryCount(self): return len(set(self._knn._categoryList)) categoryCount = property(fget=_getSeenCategoryCount) def _getPatternMatrix(self): if self._knn._M is not None: return self._knn._M else: return self._knn._Memory def _getAccuracy(self): n = self.confusion.shape[0] assert n == self.confusion.shape[1], "Confusion matrix is non-square." return self.confusion[range(n), range(n)].sum(), self.confusion.sum() accuracy = property(fget=_getAccuracy) def clear(self): self._knn.clear() def getAlgorithmInstance(self): """Returns instance of the underlying KNNClassifier algorithm object.""" return self._knn def getParameter(self, name, index=-1): """ Get the value of the parameter. @param name -- the name of the parameter to retrieve, as defined by the Node Spec. """ if name == "patternCount": return self._knn._numPatterns elif name == "patternMatrix": return self._getPatternMatrix() elif name == "k": return self._knn.k elif name == "distanceNorm": return self._knn.distanceNorm elif name == "distanceMethod": return self._knn.distanceMethod elif name == "distThreshold": return self._knn.distThreshold elif name == "inputThresh": return self._knn.binarizationThreshold elif name == "doBinarization": return self._knn.doBinarization elif name == "useSparseMemory": return self._knn.useSparseMemory elif name == "sparseThreshold": return self._knn.sparseThreshold elif name == "winnerCount": return self._knn.numWinners elif name == "relativeThreshold": return self._knn.relativeThreshold elif name == "SVDSampleCount": v = self._knn.numSVDSamples return v if v is not None else 0 elif name == "SVDDimCount": v = self._knn.numSVDDims return v if v is not None else 0 elif name == "fractionOfMax": v = self._knn.fractionOfMax return v if v is not None else 0 elif name == "useAuxiliary": return self._useAuxiliary elif name == "justUseAuxiliary": return self._justUseAuxiliary elif name == "doSphering": return self._doSphering elif name == "cellsPerCol": return self._knn.cellsPerCol elif name == "maxStoredPatterns": return self.maxStoredPatterns elif name == 'categoryRecencyList': return self._knn._categoryRecencyList else: # If any spec parameter name is the same as an attribute, this call # will get it automatically, e.g. self.learningMode return PyRegion.getParameter(self, name, index) def setParameter(self, name, index, value): """ Set the value of the parameter. @param name -- the name of the parameter to update, as defined by the Node Spec. @param value -- the value to which the parameter is to be set. """ if name == "learningMode": if int(value) and not self.learningMode: self._restartLearning() self.learningMode = bool(int(value)) self._epoch = 0 elif name == "inferenceMode": self._epoch = 0 if int(value) and not self.inferenceMode: self._finishLearning() self.inferenceMode = bool(int(value)) elif name == "distanceNorm": self._knn.distanceNorm = value elif name == "distanceMethod": self._knn.distanceMethod = value elif name == "keepAllDistances": self.keepAllDistances = bool(value) if not self.keepAllDistances: # Discard all distances except the latest if self._protoScores is not None and self._protoScores.shape[0] > 1: self._protoScores = self._protoScores[-1,:] if self._protoScores is not None: self._protoScoreCount = 1 else: self._protoScoreCount = 0 elif name == "clVerbosity": self.verbosity = value self._knn.verbosity = value elif name == "doSelfValidation": self.doSelfValidation = value else: return PyRegion.setParameter(self, name, index, value) def reset(self): self.confusion = numpy.zeros((1, 1)) def doInference(self, activeInput): """Explicitly run inference on a vector that is passed in and return the category id. Useful for debugging.""" prediction, inference, allScores = self._knn.infer(activeInput) return inference def enableTap(self, tapPath): """ Begin writing output tap files. @param tapPath -- base name of the output tap files to write. """ self._tapFileIn = open(tapPath + '.in', 'w') self._tapFileOut = open(tapPath + '.out', 'w') def disableTap(self): """Disable writing of output tap files. """ if self._tapFileIn is not None: self._tapFileIn.close() self._tapFileIn = None if self._tapFileOut is not None: self._tapFileOut.close() self._tapFileOut = None def handleLogInput(self, inputs): """Write inputs to output tap file.""" if self._tapFileIn is not None: for input in inputs: for k in range(len(input)): print >> self._tapFileIn, input[k], print >> self._tapFileIn def handleLogOutput(self, output): """Write outputs to output tap file.""" #raise Exception('MULTI-LINE DUMMY\nMULTI-LINE DUMMY') if self._tapFileOut is not None: for k in range(len(output)): print >> self._tapFileOut, output[k], print >> self._tapFileOut def _storeSample(self, inputVector, trueCatIndex, partition=0): """ Store a training sample and associated category label """ # If this is the first sample, then allocate a numpy array # of the appropriate size in which to store all samples. if self._samples is None: self._samples = numpy.zeros((0, len(inputVector)), dtype=RealNumpyDType) assert self._labels is None self._labels = [] # Add the sample vector and category lable self._samples = numpy.concatenate((self._samples, numpy.atleast_2d(inputVector)), axis=0) self._labels += [trueCatIndex] # Add the parition ID if self._partitions is None: self._partitions = [] if partition is None: partition = 0 self._partitions += [partition] def compute(self, inputs, outputs): """ Process one input sample. This method is called by the runtime engine. NOTE: the number of input categories may vary, but the array size is fixed to the max number of categories allowed (by a lower region), so "unused" indices of the input category array are filled with -1s. TODO: confusion matrix does not support multi-label classification """ #raise Exception('MULTI-LINE DUMMY\nMULTI-LINE DUMMY') #For backward compatibility if self._useAuxiliary is None: self._useAuxiliary = False # If the first time being called, then print potential warning messsages if self._firstComputeCall: self._firstComputeCall = False if self._useAuxiliary: #print "\n Auxiliary input stream from Image Sensor enabled." if self._justUseAuxiliary == True: print " Warning: You have chosen to ignore the image data and instead just use the auxiliary data stream." # Format inputs #childInputs = [x.wvector(0) for x in inputs["bottomUpIn"]] #inputVector = numpy.concatenate([x.array() for x in childInputs]) inputVector = inputs['bottomUpIn'] # Look for auxiliary input if self._useAuxiliary==True: #auxVector = inputs['auxDataIn'][0].wvector(0).array() auxVector = inputs['auxDataIn'] if auxVector.dtype != numpy.float32: raise RuntimeError, "KNNClassifierRegion expects numpy.float32 for the auxiliary data vector" if self._justUseAuxiliary == True: #inputVector = inputs['auxDataIn'][0].wvector(0).array() inputVector = inputs['auxDataIn'] else: #inputVector = numpy.concatenate([inputVector, inputs['auxDataIn'][0].wvector(0).array()]) inputVector = numpy.concatenate([inputVector, inputs['auxDataIn']]) # Logging #self.handleLogInput(childInputs) self.handleLogInput([inputVector]) # Read the category. assert "categoryIn" in inputs, "No linked category input." categories = inputs['categoryIn'] # Read the partition ID. if "partitionIn" in inputs: assert len(inputs["partitionIn"]) == 1, "Must have exactly one link to partition input." #partInput = inputs["partitionIn"][0].wvector() partInput = inputs['partitionIn'] assert len(partInput) == 1, "Partition input element count must be exactly 1." partition = int(partInput[0]) else: partition = None # --------------------------------------------------------------------- # Inference (can be done simultaneously with learning) if self.inferenceMode: categoriesOut = outputs['categoriesOut'] probabilitiesOut = outputs['categoryProbabilitiesOut'] # If we are sphering, then apply normalization if self._doSphering: inputVector = (inputVector + self._normOffset) * self._normScale nPrototypes = 0 if "bestPrototypeIndices" in outputs: #bestPrototypeIndicesOut = outputs["bestPrototypeIndices"].wvector() bestPrototypeIndicesOut = outputs["bestPrototypeIndices"] nPrototypes = len(bestPrototypeIndicesOut) winner, inference, protoScores, categoryDistances = \ self._knn.infer(inputVector, partitionId=partition) if not self.keepAllDistances: self._protoScores = protoScores else: # Keep all prototype scores in an array if self._protoScores is None: self._protoScores = numpy.zeros((1, protoScores.shape[0]), protoScores.dtype) self._protoScores[0,:] = protoScores#.reshape(1, protoScores.shape[0]) self._protoScoreCount = 1 else: if self._protoScoreCount == self._protoScores.shape[0]: # Double the size of the array newProtoScores = numpy.zeros((self._protoScores.shape[0] * 2, self._protoScores.shape[1]), self._protoScores.dtype) newProtoScores[:self._protoScores.shape[0],:] = self._protoScores self._protoScores = newProtoScores # Store the new prototype score self._protoScores[self._protoScoreCount,:] = protoScores self._protoScoreCount += 1 self._categoryDistances = categoryDistances # -------------------------------------------------------------------- # Compute the probability of each category if self.outputProbabilitiesByDist: scores = 1.0 - self._categoryDistances else: scores = inference # Probability is simply the scores/scores.sum() total = scores.sum() if total == 0: numScores = len(scores) probabilities = numpy.ones(numScores) / numScores else: probabilities = scores / total #print "probabilities:", probabilities #import pdb; pdb.set_trace() # ------------------------------------------------------------------- # Fill the output vectors with our results nout = min(len(categoriesOut), len(inference)) categoriesOut.fill(0) categoriesOut[0:nout] = inference[0:nout] probabilitiesOut.fill(0) probabilitiesOut[0:nout] = probabilities[0:nout] if self.verbosity >= 1: print "KNNRegion: categoriesOut: ", categoriesOut[0:nout] print "KNNRegion: probabilitiesOut: ", probabilitiesOut[0:nout] if self._scanInfo is not None: self._scanResults = [tuple(inference[:nout])] # Update the stored confusion matrix. for category in categories: if category >= 0: dims = max(category+1, len(inference)) oldDims = len(self.confusion) if oldDims < dims: confusion = numpy.zeros((dims, dims)) confusion[0:oldDims, 0:oldDims] = self.confusion self.confusion = confusion self.confusion[inference.argmax(), category] += 1 # Calculate the best prototype indices if nPrototypes > 1: bestPrototypeIndicesOut.fill(0) if categoryDistances is not None: indices = categoryDistances.argsort() nout = min(len(indices), nPrototypes) bestPrototypeIndicesOut[0:nout] = indices[0:nout] elif nPrototypes == 1: if (categoryDistances is not None) and len(categoryDistances): bestPrototypeIndicesOut[0] = categoryDistances.argmin() else: bestPrototypeIndicesOut[0] = 0 # Logging self.handleLogOutput(inference) # --------------------------------------------------------------------- # Learning mode if self.learningMode: if (self.acceptanceProbability < 1.0) and \ (self._rgen.getReal64() > self.acceptanceProbability): pass else: # Accept the input for category in categories: if category >= 0: # category values of -1 are to be skipped (they are non-categories) if self._doSphering: # If we are sphering, then we can't provide the data to the KNN # library until we have computed per-dimension normalization # constants. So instead, we'll just store each training sample. self._storeSample(inputVector, category, partition) else: # Pass the raw training sample directly to the KNN library. self._knn.learn(inputVector, category, partition) self._epoch += 1 def getCategoryList(self): """ Public API for returning the category list This is a required API of the NearestNeighbor inspector. It returns an array which has one entry per stored prototype. The value of the entry is the category # of that stored prototype. """ return self._knn._categoryList def removeCategory(self, categoryToRemove): return self._knn.removeCategory(categoryToRemove) def getLatestDistances(self): """ Public API for returning the full scores (distance to each prototype) from the last compute() inference call. This is a required API of the NearestNeighbor inspector. It returns an array which has one entry per stored prototype. The value of the entry is distance of the most recenty inferred input from the stored prototype. """ if self._protoScores is not None: if self.keepAllDistances: return self._protoScores[self._protoScoreCount - 1,:] else: return self._protoScores else: return None def getAllDistances(self): """ Return all the prototype distances from all computes available. Like getLatestDistances, but returns all the scores if more than one set is available. getLatestDistances will always just return one set of scores. """ if self._protoScores is None: return None return self._protoScores[:self._protoScoreCount, :] def calculateProbabilities(self): # Get the scores, from 0 to 1 scores = 1.0 - self._categoryDistances # Probability is simply the score/scores.sum() total = scores.sum() if total == 0: numScores = len(scores) return numpy.ones(numScores) / numScores return scores / total def _restartLearning(self): """ Currently, we allow learning mode to be "re-started" after being ended, but only if PCA and sphering (if any) operations have already been completed (for the sake of simplicity.) """ self._knn.restartLearning() def _finishLearning(self): """Does nothing. Kept here for API compatibility """ if self._doSphering: self._finishSphering() self._knn.finishLearning() # Compute leave-one-out validation accuracy if # we actually received non-trivial partition info self._accuracy = None if self.doSelfValidation: #partitions = self._knn._partitionIdList #if len(set(partitions)) > 1: if self._knn._partitionIdArray is not None: numSamples, numCorrect = self._knn.leaveOneOutTest() if numSamples: self._accuracy = float(numCorrect) / float(numSamples) print "Leave-one-out validation: %d of %d correct ==> %.3f%%" % \ (numCorrect, numSamples, self._accuracy * 100.0) def _finishSphering(self): """ Compute normalization constants for each feature dimension based on the collected training samples. Then normalize our training samples using these constants (so that each input dimension has mean and variance of zero and one, respectively.) Then feed these "sphered" training samples into the underlying SVM model. """ # If we are sphering our data, we need to compute the # per-dimension normalization constants # First normalize the means (to zero) self._normOffset = self._samples.mean(axis=0) * -1.0 self._samples += self._normOffset # Now normalize the variances (to one). However, we need to be # careful because the variance could conceivably be zero for one # or more dimensions. variance = self._samples.var(axis=0) variance[numpy.where(variance == 0.0)] = 1.0 self._normScale = 1.0 / numpy.sqrt(variance) self._samples *= self._normScale # Now feed each "sphered" sample into the SVM library for sampleIndex in range(len(self._labels)): self._knn.learn(self._samples[sampleIndex], self._labels[sampleIndex], self._partitions[sampleIndex]) def _arraysToLists(self, samplesArray, labelsArray): labelsList = list(labelsArray) samplesList = [[float(y) for y in x] for x in [list(x) for x in samplesArray]] return samplesList, labelsList def getOutputElementCount(self, name): """This method will be called only when the node is used in nuPIC 2""" if name == 'categoriesOut': return self.maxCategoryCount elif name == 'categoryProbabilitiesOut': return self.maxCategoryCount elif name == 'bestPrototypeIndices': return self._bestPrototypeIndexCount if self._bestPrototypeIndexCount else 0 else: raise Exception('Unknown output: ' + name)
class KNNClassifierRegion(PyRegion): """ KNNClassifierRegion implements the k Nearest Neighbor classification algorithm. By default it will implement vanilla 1-nearest neighbor using the L2 (Euclidean) distance norm. There are options for using different norms as well as various ways of sparsifying the input. Note: categories are ints >= 0. """ __VERSION__ = 1 @classmethod def getSpec(cls): ns = dict( description=KNNClassifierRegion.__doc__, singleNodeOnly=True, inputs=dict( categoryIn=dict( description='Vector of zero or more category indices for this input' 'sample. -1 implies no category.', dataType='Real32', count=0, required=True, regionLevel=True, isDefaultInput=False, requireSplitterMap=False), bottomUpIn=dict( description='Belief values over children\'s groups', dataType='Real32', count=0, required=True, regionLevel=False, isDefaultInput=True, requireSplitterMap=False), partitionIn=dict( description='Partition ID of the input sample', dataType='Real32', count=0, required=True, regionLevel=True, isDefaultInput=False, requireSplitterMap=False), auxDataIn=dict( description='Auxiliary data from the sensor', dataType='Real32', count=0, required=False, regionLevel=True, isDefaultInput=False, requireSplitterMap=False) ), outputs=dict( categoriesOut=dict( description='A vector representing, for each category ' 'index, the likelihood that the input to the node belongs ' 'to that category based on the number of neighbors of ' 'that category that are among the nearest K.', dataType='Real32', count=0, regionLevel=True, isDefaultOutput=True), bestPrototypeIndices=dict( description='A vector that lists, in descending order of ' 'the match, the positions of the prototypes ' 'that best match the input pattern.', dataType='Real32', count=0, regionLevel=True, isDefaultOutput=False), categoryProbabilitiesOut=dict( description='A vector representing, for each category ' 'index, the probability that the input to the node belongs ' 'to that category based on the distance to the nearest ' 'neighbor of each category.', dataType='Real32', count=0, regionLevel=True, isDefaultOutput=True), ), parameters=dict( learningMode=dict( description='Boolean (0/1) indicating whether or not a region ' 'is in learning mode.', dataType='UInt32', count=1, constraints='bool', defaultValue=1, accessMode='ReadWrite'), inferenceMode=dict( description='Boolean (0/1) indicating whether or not a region ' 'is in inference mode.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='ReadWrite'), acceptanceProbability=dict( description='During learning, inputs are learned with ' 'probability equal to this parameter. ' 'If set to 1.0, the default, ' 'all inputs will be considered ' '(subject to other tests).', dataType='Real32', count=1, constraints='', defaultValue=1.0, #accessMode='Create'), accessMode='ReadWrite'), # and Create too confusion=dict( description='Confusion matrix accumulated during inference. ' 'Reset with reset(). This is available to Python ' 'client code only.', dataType='Handle', count=2, constraints='', defaultValue=None, accessMode='Read'), activeOutputCount=dict( description='The number of active elements in the ' '"categoriesOut" output.', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Read'), categoryCount=dict( description='An integer indicating the number of ' 'categories that have been learned', dataType='UInt32', count=1, constraints='', defaultValue=None, accessMode='Read'), patternCount=dict( description='Number of patterns learned by the classifier.', dataType='UInt32', count=1, constraints='', defaultValue=None, accessMode='Read'), patternMatrix=dict( description='The actual patterns learned by the classifier, ' 'returned as a matrix.', dataType='Handle', count=1, constraints='', defaultValue=None, accessMode='Read'), k=dict( description='The number of nearest neighbors to use ' 'during inference.', dataType='UInt32', count=1, constraints='', defaultValue=1, accessMode='Create'), maxCategoryCount=dict( description='The maximal number of categories the ' 'classifier will distinguish between.', dataType='UInt32', count=1, constraints='', defaultValue=2, accessMode='Create'), distanceNorm=dict( description='The norm to use for a distance metric (i.e., ' 'the "p" in Lp-norm)', dataType='Real32', count=1, constraints='', defaultValue=2.0, accessMode='ReadWrite'), #accessMode='Create'), distanceMethod=dict( description='Method used to compute distances between inputs and' 'prototypes. Possible options are norm, rawOverlap, ' 'pctOverlapOfLarger, and pctOverlapOfProto', dataType="Byte", count=0, constraints='enum: norm, rawOverlap, pctOverlapOfLarger, ' 'pctOverlapOfProto, pctOverlapOfInput', defaultValue='norm', accessMode='ReadWrite'), outputProbabilitiesByDist=dict( description='If True, categoryProbabilitiesOut is the probability of ' 'each category based on the distance to the nearest neighbor of ' 'each category. If False, categoryProbabilitiesOut is the ' 'percentage of neighbors among the top K that are of each category.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), distThreshold=dict( description='Distance Threshold. If a pattern that ' 'is less than distThreshold apart from ' 'the input pattern already exists in the ' 'KNN memory, then the input pattern is ' 'not added to KNN memory.', dataType='Real32', count=1, constraints='', defaultValue=0.0, accessMode='ReadWrite'), inputThresh=dict( description='Input binarization threshold, used if ' '"doBinarization" is True.', dataType='Real32', count=1, constraints='', defaultValue=0.5, accessMode='Create'), doBinarization=dict( description='Whether or not to binarize the input vectors.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), useSparseMemory=dict( description='A boolean flag that determines whether or ' 'not the KNNClassifier will use sparse Memory', dataType='UInt32', count=1, constraints='', defaultValue=1, accessMode='Create'), minSparsity=dict( description="If useSparseMemory is set, only vectors with sparsity" " >= minSparsity will be stored during learning. A value" " of 0.0 implies all vectors will be stored. A value of" " 0.1 implies only vectors with at least 10% sparsity" " will be stored", dataType='Real32', count=1, constraints='', defaultValue=0.0, accessMode='ReadWrite'), sparseThreshold=dict( description='If sparse memory is used, input variables ' 'whose absolute value is less than this ' 'threshold will be stored as zero', dataType='Real32', count=1, constraints='', defaultValue=0.0, accessMode='Create'), relativeThreshold=dict( description='Whether to multiply sparseThreshold by max value ' ' in input', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), winnerCount=dict( description='Only this many elements of the input are ' 'stored. All elements are stored if 0.', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), doSphering=dict( description='A boolean indicating whether or not data should' 'be "sphered" (i.e. each dimension should be normalized such' 'that its mean and variance are zero and one, respectively.) This' ' sphering normalization would be performed after all training ' 'samples had been received but before inference was performed. ' 'The dimension-specific normalization constants would then ' ' be applied to all future incoming vectors prior to performing ' ' conventional NN inference.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), SVDSampleCount=dict( description='If not 0, carries out SVD transformation after ' 'that many samples have been seen.', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), SVDDimCount=dict( description='Number of dimensions to keep after SVD if greater ' 'than 0. If set to -1 it is considered unspecified. ' 'If set to 0 it is consider "adaptive" and the number ' 'is chosen automatically.', dataType='Int32', count=1, constraints='', defaultValue=-1, accessMode='Create'), fractionOfMax=dict( description='The smallest singular value which is retained ' 'as a fraction of the largest singular value. This is ' 'used only if SVDDimCount==0 ("adaptive").', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), useAuxiliary=dict( description='Whether or not the classifier should use auxiliary ' 'input data.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), justUseAuxiliary=dict( description='Whether or not the classifier should ONLUY use the ' 'auxiliary input data.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), clVerbosity=dict( description='An integer that controls the verbosity level, ' '0 means no verbose output, increasing integers ' 'provide more verbosity.', dataType='UInt32', count=1, constraints='', defaultValue=0 , accessMode='ReadWrite'), keepAllDistances=dict( description='Whether to store all the protoScores in an array, ' 'rather than just the ones for the last inference. ' 'When this parameter is changed from True to False, ' 'all the scores are discarded except for the most ' 'recent one.', dataType='UInt32', count=1, constraints='bool', defaultValue=None, accessMode='ReadWrite'), replaceDuplicates=dict( description='A boolean flag that determines whether or' 'not the KNNClassifier should replace duplicates' 'during learning. This should be on when online' 'learning.', dataType='UInt32', count=1, constraints='bool', defaultValue=None, accessMode='ReadWrite'), cellsPerCol=dict( description='If >= 1, we assume the input is organized into columns, ' 'in the same manner as the temporal pooler AND ' 'whenever we store a new prototype, we only store the ' 'start cell (first cell) in any column which is bursting.' 'colum ', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), maxStoredPatterns=dict( description='Limits the maximum number of the training patterns ' 'stored. When KNN learns in a fixed capacity mode, ' 'the unused patterns are deleted once the number ' 'of stored patterns is greater than maxStoredPatterns' 'columns. [-1 is no limit] ', dataType='Int32', count=1, constraints='', defaultValue=-1, accessMode='Create'), ), commands=dict() ) return ns def __init__(self, maxCategoryCount=0, bestPrototypeIndexCount=0, outputProbabilitiesByDist=False, k=1, distanceNorm=2.0, distanceMethod='norm', distThreshold=0.0, doBinarization=False, inputThresh=0.500, useSparseMemory=True, sparseThreshold=0.0, relativeThreshold=False, winnerCount=0, acceptanceProbability=1.0, seed=42, doSphering=False, SVDSampleCount=0, SVDDimCount=0, fractionOfMax=0, useAuxiliary=0, justUseAuxiliary=0, clVerbosity=0, replaceDuplicates=False, cellsPerCol=0, maxStoredPatterns=-1, minSparsity=0.0 ): self.version = KNNClassifierRegion.__VERSION__ # Convert various arguments to match the expectation # of the KNNClassifier if SVDSampleCount == 0: SVDSampleCount = None if SVDDimCount == -1: SVDDimCount = None elif SVDDimCount == 0: SVDDimCount = 'adaptive' if fractionOfMax == 0: fractionOfMax = None if useAuxiliary == 0: useAuxiliary = False if justUseAuxiliary == 0: justUseAuxiliary = False # KNN Parameters self.knnParams = dict( k=k, distanceNorm=distanceNorm, distanceMethod=distanceMethod, distThreshold=distThreshold, doBinarization=doBinarization, binarizationThreshold=inputThresh, useSparseMemory=useSparseMemory, sparseThreshold=sparseThreshold, relativeThreshold=relativeThreshold, numWinners=winnerCount, numSVDSamples=SVDSampleCount, numSVDDims=SVDDimCount, fractionOfMax=fractionOfMax, verbosity=clVerbosity, replaceDuplicates=replaceDuplicates, cellsPerCol=cellsPerCol, maxStoredPatterns=maxStoredPatterns, minSparsity=minSparsity ) # Initialize internal structures self.outputProbabilitiesByDist = outputProbabilitiesByDist self.learningMode = True self.inferenceMode = False self._epoch = 0 self.acceptanceProbability = acceptanceProbability self._rgen = Random(seed) self.confusion = numpy.zeros((1, 1)) self.keepAllDistances = False self._protoScoreCount = 0 self._useAuxiliary = useAuxiliary self._justUseAuxiliary = justUseAuxiliary # Sphering normalization self._doSphering = doSphering self._normOffset = None self._normScale = None self._samples = None self._labels = None # Debugging self.verbosity = clVerbosity # Taps self._tapFileIn = None self._tapFileOut = None self._initEphemerals() self.maxStoredPatterns = maxStoredPatterns self.maxCategoryCount = maxCategoryCount self._bestPrototypeIndexCount = bestPrototypeIndexCount def _getEphemeralAttributes(self): """ List of attributes to not save with serialized state. """ return ['_firstComputeCall', '_accuracy', '_protoScores', '_categoryDistances'] def _initEphemerals(self): """ Initialize attributes that are not saved with the checkpoint. """ self._firstComputeCall = True self._accuracy = None self._protoScores = None self._categoryDistances = None self._knn = KNNClassifier.KNNClassifier(**self.knnParams) for x in ('_partitions', '_useAuxiliary', '_doSphering', '_scanInfo', '_protoScores'): if not hasattr(self, x): setattr(self, x, None) def __setstate__(self, state): """Set state from serialized state.""" if 'version' not in state: self.__dict__.update(state) elif state['version'] == 1: # Backward compatibility if "doSelfValidation" in state: state.pop("doSelfValidation") knnState = state['_knn_state'] del state['_knn_state'] self.__dict__.update(state) self._initEphemerals() self._knn.__setstate__(knnState) else: raise RuntimeError("Invalid KNNClassifierRegion version for __setstate__") # Set to current version self.version = KNNClassifierRegion.__VERSION__ def __getstate__(self): """Get serializable state.""" state = self.__dict__.copy() state['_knn_state'] = self._knn.__getstate__() del state['_knn'] for field in self._getEphemeralAttributes(): del state[field] return state def initialize(self, dims, splitterMaps): assert tuple(dims) == (1,) * len(dims) def _getActiveOutputCount(self): if self._knn._categoryList: return int(max(self._knn._categoryList)+1) else: return 0 activeOutputCount = property(fget=_getActiveOutputCount) def _getSeenCategoryCount(self): return len(set(self._knn._categoryList)) categoryCount = property(fget=_getSeenCategoryCount) def _getPatternMatrix(self): if self._knn._M is not None: return self._knn._M else: return self._knn._Memory def _getAccuracy(self): n = self.confusion.shape[0] assert n == self.confusion.shape[1], "Confusion matrix is non-square." return self.confusion[range(n), range(n)].sum(), self.confusion.sum() accuracy = property(fget=_getAccuracy) def clear(self): self._knn.clear() def getAlgorithmInstance(self): """Returns instance of the underlying KNNClassifier algorithm object.""" return self._knn def getParameter(self, name, index=-1): """ Get the value of the parameter. @param name -- the name of the parameter to retrieve, as defined by the Node Spec. """ if name == "patternCount": return self._knn._numPatterns elif name == "patternMatrix": return self._getPatternMatrix() elif name == "k": return self._knn.k elif name == "distanceNorm": return self._knn.distanceNorm elif name == "distanceMethod": return self._knn.distanceMethod elif name == "distThreshold": return self._knn.distThreshold elif name == "inputThresh": return self._knn.binarizationThreshold elif name == "doBinarization": return self._knn.doBinarization elif name == "useSparseMemory": return self._knn.useSparseMemory elif name == "sparseThreshold": return self._knn.sparseThreshold elif name == "winnerCount": return self._knn.numWinners elif name == "relativeThreshold": return self._knn.relativeThreshold elif name == "SVDSampleCount": v = self._knn.numSVDSamples return v if v is not None else 0 elif name == "SVDDimCount": v = self._knn.numSVDDims return v if v is not None else 0 elif name == "fractionOfMax": v = self._knn.fractionOfMax return v if v is not None else 0 elif name == "useAuxiliary": return self._useAuxiliary elif name == "justUseAuxiliary": return self._justUseAuxiliary elif name == "doSphering": return self._doSphering elif name == "cellsPerCol": return self._knn.cellsPerCol elif name == "maxStoredPatterns": return self.maxStoredPatterns elif name == 'categoryRecencyList': return self._knn._categoryRecencyList else: # If any spec parameter name is the same as an attribute, this call # will get it automatically, e.g. self.learningMode return PyRegion.getParameter(self, name, index) def setParameter(self, name, index, value): """ Set the value of the parameter. @param name -- the name of the parameter to update, as defined by the Node Spec. @param value -- the value to which the parameter is to be set. """ if name == "learningMode": self.learningMode = bool(int(value)) self._epoch = 0 elif name == "inferenceMode": self._epoch = 0 if int(value) and not self.inferenceMode: self._finishLearning() self.inferenceMode = bool(int(value)) elif name == "distanceNorm": self._knn.distanceNorm = value elif name == "distanceMethod": self._knn.distanceMethod = value elif name == "keepAllDistances": self.keepAllDistances = bool(value) if not self.keepAllDistances: # Discard all distances except the latest if self._protoScores is not None and self._protoScores.shape[0] > 1: self._protoScores = self._protoScores[-1,:] if self._protoScores is not None: self._protoScoreCount = 1 else: self._protoScoreCount = 0 elif name == "clVerbosity": self.verbosity = value self._knn.verbosity = value else: return PyRegion.setParameter(self, name, index, value) def reset(self): self.confusion = numpy.zeros((1, 1)) def doInference(self, activeInput): """Explicitly run inference on a vector that is passed in and return the category id. Useful for debugging.""" prediction, inference, allScores = self._knn.infer(activeInput) return inference def enableTap(self, tapPath): """ Begin writing output tap files. @param tapPath -- base name of the output tap files to write. """ self._tapFileIn = open(tapPath + '.in', 'w') self._tapFileOut = open(tapPath + '.out', 'w') def disableTap(self): """Disable writing of output tap files. """ if self._tapFileIn is not None: self._tapFileIn.close() self._tapFileIn = None if self._tapFileOut is not None: self._tapFileOut.close() self._tapFileOut = None def handleLogInput(self, inputs): """Write inputs to output tap file.""" if self._tapFileIn is not None: for input in inputs: for k in range(len(input)): print >> self._tapFileIn, input[k], print >> self._tapFileIn def handleLogOutput(self, output): """Write outputs to output tap file.""" #raise Exception('MULTI-LINE DUMMY\nMULTI-LINE DUMMY') if self._tapFileOut is not None: for k in range(len(output)): print >> self._tapFileOut, output[k], print >> self._tapFileOut def _storeSample(self, inputVector, trueCatIndex, partition=0): """ Store a training sample and associated category label """ # If this is the first sample, then allocate a numpy array # of the appropriate size in which to store all samples. if self._samples is None: self._samples = numpy.zeros((0, len(inputVector)), dtype=RealNumpyDType) assert self._labels is None self._labels = [] # Add the sample vector and category lable self._samples = numpy.concatenate((self._samples, numpy.atleast_2d(inputVector)), axis=0) self._labels += [trueCatIndex] # Add the partition ID if self._partitions is None: self._partitions = [] if partition is None: partition = 0 self._partitions += [partition] def compute(self, inputs, outputs): """ Process one input sample. This method is called by the runtime engine. NOTE: the number of input categories may vary, but the array size is fixed to the max number of categories allowed (by a lower region), so "unused" indices of the input category array are filled with -1s. TODO: confusion matrix does not support multi-label classification """ #raise Exception('MULTI-LINE DUMMY\nMULTI-LINE DUMMY') #For backward compatibility if self._useAuxiliary is None: self._useAuxiliary = False # If the first time being called, then print potential warning messsages if self._firstComputeCall: self._firstComputeCall = False if self._useAuxiliary: #print "\n Auxiliary input stream from Image Sensor enabled." if self._justUseAuxiliary == True: print " Warning: You have chosen to ignore the image data and instead just use the auxiliary data stream." # Format inputs #childInputs = [x.wvector(0) for x in inputs["bottomUpIn"]] #inputVector = numpy.concatenate([x.array() for x in childInputs]) inputVector = inputs['bottomUpIn'] # Look for auxiliary input if self._useAuxiliary==True: #auxVector = inputs['auxDataIn'][0].wvector(0).array() auxVector = inputs['auxDataIn'] if auxVector.dtype != numpy.float32: raise RuntimeError, "KNNClassifierRegion expects numpy.float32 for the auxiliary data vector" if self._justUseAuxiliary == True: #inputVector = inputs['auxDataIn'][0].wvector(0).array() inputVector = inputs['auxDataIn'] else: #inputVector = numpy.concatenate([inputVector, inputs['auxDataIn'][0].wvector(0).array()]) inputVector = numpy.concatenate([inputVector, inputs['auxDataIn']]) # Logging #self.handleLogInput(childInputs) self.handleLogInput([inputVector]) # Read the category. assert "categoryIn" in inputs, "No linked category input." categories = inputs['categoryIn'] # Read the partition ID. if "partitionIn" in inputs: assert len(inputs["partitionIn"]) == 1, "Must have exactly one link to partition input." partInput = inputs['partitionIn'] assert len(partInput) == 1, "Partition input element count must be exactly 1." partition = int(partInput[0]) else: partition = None # --------------------------------------------------------------------- # Inference (can be done simultaneously with learning) if self.inferenceMode: categoriesOut = outputs['categoriesOut'] probabilitiesOut = outputs['categoryProbabilitiesOut'] # If we are sphering, then apply normalization if self._doSphering: inputVector = (inputVector + self._normOffset) * self._normScale nPrototypes = 0 if "bestPrototypeIndices" in outputs: #bestPrototypeIndicesOut = outputs["bestPrototypeIndices"].wvector() bestPrototypeIndicesOut = outputs["bestPrototypeIndices"] nPrototypes = len(bestPrototypeIndicesOut) winner, inference, protoScores, categoryDistances = \ self._knn.infer(inputVector, partitionId=partition) if not self.keepAllDistances: self._protoScores = protoScores else: # Keep all prototype scores in an array if self._protoScores is None: self._protoScores = numpy.zeros((1, protoScores.shape[0]), protoScores.dtype) self._protoScores[0,:] = protoScores#.reshape(1, protoScores.shape[0]) self._protoScoreCount = 1 else: if self._protoScoreCount == self._protoScores.shape[0]: # Double the size of the array newProtoScores = numpy.zeros((self._protoScores.shape[0] * 2, self._protoScores.shape[1]), self._protoScores.dtype) newProtoScores[:self._protoScores.shape[0],:] = self._protoScores self._protoScores = newProtoScores # Store the new prototype score self._protoScores[self._protoScoreCount,:] = protoScores self._protoScoreCount += 1 self._categoryDistances = categoryDistances # -------------------------------------------------------------------- # Compute the probability of each category if self.outputProbabilitiesByDist: scores = 1.0 - self._categoryDistances else: scores = inference # Probability is simply the scores/scores.sum() total = scores.sum() if total == 0: numScores = len(scores) probabilities = numpy.ones(numScores) / numScores else: probabilities = scores / total # ------------------------------------------------------------------- # Fill the output vectors with our results nout = min(len(categoriesOut), len(inference)) categoriesOut.fill(0) categoriesOut[0:nout] = inference[0:nout] probabilitiesOut.fill(0) probabilitiesOut[0:nout] = probabilities[0:nout] if self.verbosity >= 1: print "KNNRegion: categoriesOut: ", categoriesOut[0:nout] print "KNNRegion: probabilitiesOut: ", probabilitiesOut[0:nout] if self._scanInfo is not None: self._scanResults = [tuple(inference[:nout])] # Update the stored confusion matrix. for category in categories: if category >= 0: dims = max(category+1, len(inference)) oldDims = len(self.confusion) if oldDims < dims: confusion = numpy.zeros((dims, dims)) confusion[0:oldDims, 0:oldDims] = self.confusion self.confusion = confusion self.confusion[inference.argmax(), category] += 1 # Calculate the best prototype indices if nPrototypes > 1: bestPrototypeIndicesOut.fill(0) if categoryDistances is not None: indices = categoryDistances.argsort() nout = min(len(indices), nPrototypes) bestPrototypeIndicesOut[0:nout] = indices[0:nout] elif nPrototypes == 1: if (categoryDistances is not None) and len(categoryDistances): bestPrototypeIndicesOut[0] = categoryDistances.argmin() else: bestPrototypeIndicesOut[0] = 0 # Logging self.handleLogOutput(inference) # --------------------------------------------------------------------- # Learning mode if self.learningMode: if (self.acceptanceProbability < 1.0) and \ (self._rgen.getReal64() > self.acceptanceProbability): pass else: # Accept the input for category in categories: if category >= 0: # category values of -1 are to be skipped (they are non-categories) if self._doSphering: # If we are sphering, then we can't provide the data to the KNN # library until we have computed per-dimension normalization # constants. So instead, we'll just store each training sample. self._storeSample(inputVector, category, partition) else: # Pass the raw training sample directly to the KNN library. self._knn.learn(inputVector, category, partition) self._epoch += 1 def getCategoryList(self): """ Public API for returning the category list This is a required API of the NearestNeighbor inspector. It returns an array which has one entry per stored prototype. The value of the entry is the category # of that stored prototype. """ return self._knn._categoryList def removeCategory(self, categoryToRemove): return self._knn.removeCategory(categoryToRemove) def getLatestDistances(self): """ Public API for returning the full scores (distance to each prototype) from the last compute() inference call. This is a required API of the NearestNeighbor inspector. It returns an array which has one entry per stored prototype. The value of the entry is distance of the most recenty inferred input from the stored prototype. """ if self._protoScores is not None: if self.keepAllDistances: return self._protoScores[self._protoScoreCount - 1,:] else: return self._protoScores else: return None def getAllDistances(self): """ Return all the prototype distances from all computes available. Like getLatestDistances, but returns all the scores if more than one set is available. getLatestDistances will always just return one set of scores. """ if self._protoScores is None: return None return self._protoScores[:self._protoScoreCount, :] def calculateProbabilities(self): # Get the scores, from 0 to 1 scores = 1.0 - self._categoryDistances # Probability is simply the score/scores.sum() total = scores.sum() if total == 0: numScores = len(scores) return numpy.ones(numScores) / numScores return scores / total def _finishLearning(self): """Does nothing. Kept here for API compatibility """ if self._doSphering: self._finishSphering() self._knn.finishLearning() # Compute leave-one-out validation accuracy if # we actually received non-trivial partition info self._accuracy = None def _finishSphering(self): """ Compute normalization constants for each feature dimension based on the collected training samples. Then normalize our training samples using these constants (so that each input dimension has mean and variance of zero and one, respectively.) Then feed these "sphered" training samples into the underlying SVM model. """ # If we are sphering our data, we need to compute the # per-dimension normalization constants # First normalize the means (to zero) self._normOffset = self._samples.mean(axis=0) * -1.0 self._samples += self._normOffset # Now normalize the variances (to one). However, we need to be # careful because the variance could conceivably be zero for one # or more dimensions. variance = self._samples.var(axis=0) variance[numpy.where(variance == 0.0)] = 1.0 self._normScale = 1.0 / numpy.sqrt(variance) self._samples *= self._normScale # Now feed each "sphered" sample into the SVM library for sampleIndex in range(len(self._labels)): self._knn.learn(self._samples[sampleIndex], self._labels[sampleIndex], self._partitions[sampleIndex]) def _arraysToLists(self, samplesArray, labelsArray): labelsList = list(labelsArray) samplesList = [[float(y) for y in x] for x in [list(x) for x in samplesArray]] return samplesList, labelsList def getOutputElementCount(self, name): """This method will be called only when the node is used in nuPIC 2""" if name == 'categoriesOut': return self.maxCategoryCount elif name == 'categoryProbabilitiesOut': return self.maxCategoryCount elif name == 'bestPrototypeIndices': return self._bestPrototypeIndexCount if self._bestPrototypeIndexCount else 0 else: raise Exception('Unknown output: ' + name)