def testWriteRead(self): c1 = CLAClassifier([1], 0.1, 0.1, 0) # Create a vector of input bit indices input1 = [1, 5, 9] result = c1.compute(recordNum=0, patternNZ=input1, classification={'bucketIdx': 4, 'actValue': 34.7}, learn=True, infer=True) proto1 = ClaClassifier_capnp.ClaClassifierProto.new_message() c1.write(proto1) # Write the proto to a temp file and read it back into a new proto with tempfile.TemporaryFile() as f: proto1.write(f) f.seek(0) proto2 = ClaClassifier_capnp.ClaClassifierProto.read(f) # Load the deserialized proto c2 = CLAClassifier.read(proto2) self.assertEqual(c1.steps, c2.steps) self.assertAlmostEqual(c1.alpha, c2.alpha) self.assertAlmostEqual(c1.actValueAlpha, c2.actValueAlpha) self.assertEqual(c1._learnIteration, c2._learnIteration) self.assertEqual(c1._recordNumMinusLearnIteration, c2._recordNumMinusLearnIteration) self.assertEqual(c1._patternNZHistory, c2._patternNZHistory) self.assertEqual(c1._activeBitHistory.keys(), c2._activeBitHistory.keys()) for bit, nSteps in c1._activeBitHistory.keys(): c1BitHistory = c1._activeBitHistory[(bit, nSteps)] c2BitHistory = c2._activeBitHistory[(bit, nSteps)] self.assertEqual(c1BitHistory._id, c2BitHistory._id) self.assertEqual(c1BitHistory._stats, c2BitHistory._stats) self.assertEqual(c1BitHistory._lastTotalUpdate, c2BitHistory._lastTotalUpdate) self.assertEqual(c1BitHistory._learnIteration, c2BitHistory._learnIteration) self.assertEqual(c1._maxBucketIdx, c2._maxBucketIdx) self.assertEqual(len(c1._actualValues), len(c2._actualValues)) for i in xrange(len(c1._actualValues)): self.assertAlmostEqual(c1._actualValues[i], c2._actualValues[i], 5) self.assertEqual(c1._version, c2._version) self.assertEqual(c1.verbosity, c2.verbosity) result1 = c1.compute(recordNum=1, patternNZ=input1, classification={'bucketIdx': 4, 'actValue': 34.7}, learn=True, infer=True) result2 = c2.compute(recordNum=1, patternNZ=input1, classification={'bucketIdx': 4, 'actValue': 34.7}, learn=True, infer=True) self.assertEqual(result1.keys(), result2.keys()) for key in result1.keys(): for i in xrange(len(c1._actualValues)): self.assertAlmostEqual(result1[key][i], result2[key][i], 5)
class CLAClassifierDiff(object): """Classifier-like object that diffs the output from different classifiers. Instances of each version of the CLA classifier are created and each call to compute is passed to each version of the classifier. The results are diffed to make sure the there are no differences. Optionally, the classifiers can be serialized and deserialized after a specified number of calls to compute to ensure that serialization does not cause discrepencies between the results. TODO: Check internal state as well. TODO: Provide option to write output to a file. TODO: Provide record differences without throwing an exception. """ __VERSION__ = 'CLAClassifierDiffV1' def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0, callsPerSerialize=CALLS_PER_SERIALIZE): self._claClassifier = CLAClassifier(steps, alpha, actValueAlpha, verbosity) self._fastCLAClassifier = FastCLAClassifier(steps, alpha, actValueAlpha, verbosity) self._calls = 0 self._callsPerSerialize = callsPerSerialize def compute(self, recordNum, patternNZ, classification, learn, infer): result1 = self._claClassifier.compute(recordNum, patternNZ, classification, learn, infer) result2 = self._fastCLAClassifier.compute(recordNum, patternNZ, classification, learn, infer) self._calls += 1 # Check if it is time to serialize and deserialize. if self._calls % self._callsPerSerialize == 0: self._claClassifier = pickle.loads(pickle.dumps(self._claClassifier)) self._fastCLAClassifier = pickle.loads(pickle.dumps( self._fastCLAClassifier)) # Assert both results are the same type. assert type(result1) == type(result2) # Assert that the keys match. assert set(result1.keys()) == set(result2.keys()), "diff detected: " \ "py result=%s, C++ result=%s" % (result1, result2) # Assert that the values match. for k, l in result1.iteritems(): assert type(l) == type(result2[k]) for i in xrange(len(l)): if isinstance(classification['actValue'], numbers.Real): assert abs(float(l[i]) - float(result2[k][i])) < 0.0000001, ( 'Python CLAClassifier has value %f and C++ FastCLAClassifier has ' 'value %f.' % (l[i], result2[k][i])) else: assert l[i] == result2[k][i], ( 'Python CLAClassifier has value %s and C++ FastCLAClassifier has ' 'value %s.' % (str(l[i]), str(result2[k][i]))) return result1
def initializeClassifiers(Nelements, encoder): cla = CLAClassifier(steps=[0]) nn_classifier = SDRClassifier(steps=[0], alpha=0.1) patternNZ = list(numpy.where(encoder.encode(Nelements-1))[0]) classification = {'bucketIdx': Nelements-1, 'actValue': Nelements-1} # feed in the pattern with the highest bucket index claRetval = cla.compute(0, patternNZ, classification, learn=True, infer=True) nnRetval = nn_classifier.compute(0, patternNZ, classification, learn=True, infer=True) return cla, nn_classifier
def initializeClassifiers(Nelements, encoder): claClassiiier = CLAClassifier(steps=[0]) sdrClassifier = SDRClassifier(steps=[0], alpha=0.1) patternNZ = list(numpy.where(encoder.encode(Nelements - 1))[0]) classification = {'bucketIdx': Nelements - 1, 'actValue': Nelements - 1} # feed in the pattern with the highest bucket index claRetval = claClassiiier.compute(0, patternNZ, classification, learn=True, infer=True) sdrRetval = sdrClassifier.compute(0, patternNZ, classification, learn=True, infer=True) return claClassiiier, sdrClassifier
class Model(object): def __init__(self, params): """ :param params: A dict of modelParams in the format {'clParams':{'alpha':float,'steps':'1,2,3'}, 'sensorParams':{'encoders':{} """ modelParams = params['modelParams'] self._encoders = { field: getattr(nupic.encoders, args['type'])(**dict( (arg, val) for arg, val in args.items() if arg not in ['type', 'fieldname'])) for field, args in modelParams['sensorParams']['encoders'].items() if args is not None } self.predicted_field = modelParams['predictedField'] modelParams['spParams']['inputWidth'] = sum( map(lambda x: x.getWidth(), self._encoders.values())) self.sp = SpatialPooler(**modelParams['spParams']) self.sp.initialize(None, None) self.tm = TemporalMemory(**modelParams['tpParams']) self.tm.initialize(None, None) self.classifier = CLAClassifier(**modelParams['clParams']) self.spOutputs = { 'bottomUpOut': np.zeros(modelParams['spParams']['columnCount'], dtype=np.float32), 'anomalyScore': np.zeros(modelParams['spParams']['columnCount'], dtype=np.float32) } self.tmOutputs = { 'bottomUpOut': np.zeros(modelParams['tpParams']['columnCount'] * modelParams['tpParams']['cellsPerColumn'], dtype=np.float32) } self.recordNum = 0 def encode(self, inputs): """ :param inputs: dict of input names to their values inputs :return: encoded inputs concatenated """ return np.concatenate([ encoder.encode(inputs[name]) for name, encoder in self._encoders.iteritems() ]) def run(self, inputs): """ Runs a single timestep :param inputs: a dict mapping input names to their values :return: a dict of predictions-++ """ self.recordNum += 1 encodings = self.encode(inputs) predictedValue = inputs[self.predicted_field] bucketIdx = self._encoders[self.predicted_field].getBucketIndices( predictedValue)[0] self.recordNum += 1 self.sp.compute({'bottomUpIn': encodings}, self.spOutputs) self.tm.compute({'bottomUpIn': self.spOutputs['bottomUpOut']}, self.tmOutputs) return self.classifier.compute(self.recordNum, self.tmOutputs['bottomUpOut'], { 'bucketIdx': bucketIdx, 'actValue': predictedValue }, True, True)
class Sensor(Node): """ A super class only to group properties related to sensors. """ #region Constructor def __init__(self, parentNode, name): """ Initializes a new instance of this class. """ Node.__init__(self, parentNode, name, NodeType.sensor) #region Instance fields self.bits = [] """An array of the bit objects that compose the current output of this node.""" self.dataSourceType = DataSourceType.file """Type of the data source (File or Database)""" self.fileName = '' """The input file name to be handled. Returns the input file name only if it is in the project directory, full path otherwise.""" self._file = None """File stream to handle the file.""" self.databaseConnectionString = "" """Connection string of the database.""" self.databaseTable = '' """Target table of the database.""" self.databaseField = '' """Target field of the database table.""" self.inputFormat = InputFormat.htm """Format of the node (HTM or raw data)""" self.inputRawDataType = InputRawDataType.string """Data type of the raw input""" self.encoder = None """Optional encoder to convert raw data to htm input and vice-versa.""" self.encoderModule = "" """Module name which encoder class is imported.""" self.encoderClass = "" """Class name which encode or decode values.""" self.encoderParams = "" """Parameters passed to the encoder class constructor.""" self.predictionsMethod = PredictionsMethod.reconstruction """Method used to get predicted values and their probabilities.""" self.enableClassificationLearning = True """Switch for classification learning""" self.enableClassificationInference = True """Switch for classification inference""" self.currentValue = [None] * maxPreviousSteps """Raw value encoded to network.""" self.predictedValues = [None] * maxPreviousSteps """Raw value decoded from network.""" #endregion #region Statistics properties self.statsPrecisionRate = 0. #endregion #endregion #region Methods def getBit(self, x, y): """ Return the bit located at given position """ bit = self.bits[(y * self.width) + x] return bit def initialize(self): """ Initialize this node. """ Node.initialize(self) # Initialize input bits self.bits = [] for x in range(self.width): for y in range(self.height): bit = Bit() bit.x = x bit.y = y self.bits.append(bit) if self.dataSourceType == DataSourceType.file: """ Initialize this node opening the file and place cursor on the first record. """ # If file name provided is a relative path, use project file path if self.fileName != '' and os.path.dirname(self.fileName) == '': fullFileName = os.path.dirname(Global.project.fileName) + '/' + self.fileName else: fullFileName = self.fileName # Open file if not os.path.isfile(fullFileName): QtGui.QMessageBox.warning(None, "Warning", "Input stream file '" + fullFileName + "' was not found or specified.", QtGui.QMessageBox.Ok) return if self.inputFormat == InputFormat.htm: self._file = open(fullFileName, "rb") # Get dimensions of the record width = 0 height = 0 character = 0 while True: # Read next character character = self._file.read(1) # Check if character is 'return' and not a number, i.e. if the first record was read if character == '\r': character = self._file.read(1) if character == '\n': break # Pass over the line until find a 'return' character in order to get the width width = 0 while character != '\n': width += 1 character = self._file.read(1) if character == '\r': character = self._file.read(1) # Increments height height += 1 # If current file record dimensions is not the same to sensor size then throws exception if self.width != width or self.height != height: QtGui.QMessageBox.warning(None, "Warning", "'" + self.name + "': File input size (" + width + " x " + height + ") is different from sensor size (" + self.width + " x " + self.height + ").", QtGui.QMessageBox.Ok) return # Put the pointer back to initial position self._file.seek(0) elif self.inputFormat == InputFormat.raw: self._file = open(fullFileName) # Create an instance class for an encoder given its module, class and constructor params self.encoder = getInstantiatedClass(self.encoderModule, self.encoderClass, self.encoderParams) # If encoder size is not the same to sensor size then throws exception encoderSize = self.encoder.getWidth() sensorSize = self.width * self.height if encoderSize > sensorSize: QtGui.QMessageBox.warning(None, "Warning", "'" + self.name + "': Encoder size (" + str(encoderSize) + ") is different from sensor size (" + str(self.width) + " x " + str(self.height) + " = " + str(sensorSize) + ").", QtGui.QMessageBox.Ok) return elif self.dataSourceType == DataSourceType.database: pass # Create Classifier instance with appropriate parameters self.minProbabilityThreshold = 0.0001 self.steps = [] for step in range(maxFutureSteps): self.steps.append(step+1) self.classifier = CLAClassifier(steps=self.steps) def nextStep(self): """ Performs actions related to time step progression. """ # Update states machine by remove the first element and add a new element in the end if self.inputFormat == InputFormat.raw: if len(self.currentValue) > maxPreviousSteps: self.currentValue.remove(self.currentValue[0]) self.predictedValues.remove(self.predictedValues[0]) self.currentValue.append(None) self.predictedValues.append(None) Node.nextStep(self) for bit in self.bits: bit.nextStep() # Get record value from data source recordValue = None if self.dataSourceType == DataSourceType.file: recordValue = self.__getNextFileRecord() elif self.dataSourceType == DataSourceType.database: pass # Handle the value according to its type self._output = [] if self.inputFormat == InputFormat.htm: # Initialize the array for representing the current record self._output = recordValue elif self.inputFormat == InputFormat.raw: # Convert the value to its respective data type rawValue = None if self.inputRawDataType == InputRawDataType.boolean: rawValue = bool(recordValue) elif self.inputRawDataType == InputRawDataType.integer: rawValue = int(recordValue) elif self.inputRawDataType == InputRawDataType.decimal: rawValue = float(recordValue) elif self.inputRawDataType == InputRawDataType.dateTime: rawValue = datetime.datetime.strptime(recordValue, "%m/%d/%y %H:%M") elif self.inputRawDataType == InputRawDataType.string: rawValue = str(recordValue) self.currentValue[maxPreviousSteps - 1] = rawValue # Pass raw value to encoder and get its respective array self._output = self.encoder.encode(rawValue) # Update sensor bits for i in range(len(self._output)): if self._output[i] > 0.: self.bits[i].isActive[maxPreviousSteps - 1] = True else: self.bits[i].isActive[maxPreviousSteps - 1] = False # Mark falsely predicted bits for bit in self.bits: if bit.isPredicted[maxPreviousSteps - 2] and not bit.isActive[maxPreviousSteps - 1]: bit.isFalselyPredicted[maxPreviousSteps - 1] = True def getPredictions(self): """ Get the predictions after an iteration. """ if self.inputFormat == InputFormat.raw: if self.predictionsMethod == PredictionsMethod.reconstruction: # Prepare list with predictions to be classified # This list contains the indexes of all bits that are predicted output = [] for i in range(len(self.bits)): if self.bits[i].isPredicted[maxPreviousSteps - 1]: output.append(1) else: output.append(0) output = numpy.array(output) # Decode output and create predictions list fieldsDict, fieldsOrder = self.encoder.decode(output) self.predictedValues[maxPreviousSteps - 1] = dict() predictions = [] if len(fieldsOrder) > 0: fieldName = fieldsOrder[0] predictedLabels = fieldsDict[fieldName][1].split(', ') predictedValues = fieldsDict[fieldName][0] for i in range(len(predictedLabels)): predictions.append([predictedValues[i], predictedLabels[i]]) self.predictedValues[maxPreviousSteps - 1][1] = predictions elif self.predictionsMethod == PredictionsMethod.classification: # A classification involves estimate which are the likely values to occurs in the next time step. # Prepare list with predictions to be classified # This list contains the indexes of all bits that are predicted patternNZ = [] for i in range(len(self.bits)): if self.bits[i].isActive[maxPreviousSteps - 1]: patternNZ.append(i) # Get the bucket index of the current value at the encoder actualValue = self.currentValue[maxPreviousSteps - 1] bucketIdx = self.encoder.getBucketIndices(actualValue)[0] # Perform classification clasResults = self.classifier.compute(recordNum=Global.currStep, patternNZ=patternNZ, classification={'bucketIdx': bucketIdx, 'actValue': actualValue}, learn=self.enableClassificationLearning, infer=self.enableClassificationInference) self.predictedValues[maxPreviousSteps - 1] = dict() for step in self.steps: # Calculate probability for each predicted value predictions = dict() for (actValue, prob) in zip(clasResults['actualValues'], clasResults[step]): if actValue in predictions: predictions[actValue] += prob else: predictions[actValue] = prob # Remove predictions with low probabilities maxVal = (None, None) for (actValue, prob) in predictions.items(): if len(predictions) <= 1: break if maxVal[0] is None or prob >= maxVal[1]: if maxVal[0] is not None and maxVal[1] < self.minProbabilityThreshold: del predictions[maxVal[0]] maxVal = (actValue, prob) elif prob < self.minProbabilityThreshold: del predictions[actValue] # Sort the list of values from more probable to less probable values # an decrease the list length to max predictions per step limit predictions = sorted(predictions.iteritems(), key=operator.itemgetter(1), reverse=True) predictions = predictions[:maxFutureSteps] self.predictedValues[maxPreviousSteps - 1][step] = predictions def calculateStatistics(self): """ Calculate statistics after an iteration. """ if Global.currStep > 0: precision = 0. if self.inputFormat == InputFormat.htm: # Calculate the prediction precision comparing with bits are equal between the predicted array and the active array # The prediction precision is the percentage of shared bits over the sum of all bits numSharedBitStates = 0 numNonSharedBitStates = 0 for bit in self.bits: if bit.isPredicted[maxPreviousSteps - 2] or bit.isActive[maxPreviousSteps - 1]: if bit.isPredicted[maxPreviousSteps - 2] == bit.isActive[maxPreviousSteps - 1]: numSharedBitStates += 1 else: numNonSharedBitStates += 1 precision = (numSharedBitStates / float(numNonSharedBitStates + numSharedBitStates)) * 100 elif self.inputFormat == InputFormat.raw: # Calculate the prediction precision comparing if the current value is in the range of any prediction. predictions = self.predictedValues[maxPreviousSteps - 2][1] for predictedValue in predictions: min = 0. max = 0. value = predictedValue[0] if self.predictionsMethod == PredictionsMethod.reconstruction: min = math.floor(value[0]) max = math.ceil(value[1]) elif self.predictionsMethod == PredictionsMethod.classification: min = math.floor(value) max = math.ceil(value) if min <= self.currentValue[maxPreviousSteps - 1] <= max: precision = 100. break # The precision rate is the average of the precision calculated in every step self.statsPrecisionRate = (self.statsPrecisionRate + precision) / 2 else: self.statsPrecisionRate = 0. for bit in self.bits: bit.calculateStatistics() def __getNextFileRecord(self): """ Get the next record from file. If file end is reached then start reading from scratch again. """ recordValue = None # If end of file was reached then place cursor on the first byte again if self._file.tell() == os.fstat(self._file.fileno()).st_size: self._file.seek(0) if self.inputFormat == InputFormat.htm: # Start reading from last position outputList = [] character = 0 for y in range(self.height): for x in range(self.width): character = self._file.read(1) if character == '1': outputList.append(1.) elif character == '0': outputList.append(0.) else: raise Exception("Invalid file format.") # Check if next char is a 'return', i.e. the row end character = self._file.read(1) if character == '\r': character = self._file.read(1) if character != '\n': raise Exception("Invalid file format.") # Check if next char is a 'return' character, i.e. the record end character = self._file.read(1) if character == '\r': character = self._file.read(1) if character != '\n' and character != -1: raise Exception("Invalid file format.") # Return the output list as record value recordValue = numpy.array(outputList) elif self.inputFormat == InputFormat.raw: # Return the raw value as record value recordValue = self._file.readline() recordValue = recordValue.rstrip('\r\n').rstrip('\n') return recordValue #endregion
class SpatialPoolerAgent(Agent): """ agent that uses CAM (content-addresable memory; uses SpatialPooler to make abstractions and generalizations of inputs to learn actions. Can be trained in both supervised and unsupervised ways. Uses utility encoder with feedback = 1, to remember 1 step -> start={stateA, actionA} , score(start)==score after applying actionA""" def __init__(self, numFields): a=dict() t=[] # target super(SpatialPoolerAgent, self).__init__(a, t, listMemFields=["score", "visited"], name='SPlearner') self.me['x']=0 self.me['y']=0 self.me['steps']=0 self.enc = UtilEncoder(length=numFields, minval=0, maxval=100, scoreMin=0, scoreMax=100, scoreResolution=0.1) self.enc.setEvaluationFn(evalFn) # spatial pooler self.sp = SP( inputDimensions = [self.enc._offset], columnDimensions = [1024], potentialRadius = 15, potentialPct = 0.5, globalInhibition = True, localAreaDensity = -1.0, numActiveColumnsPerInhArea = 5, stimulusThreshold=0, synPermInactiveDec=0.01, synPermActiveInc = 0.1, synPermConnected = 0.20, minPctOverlapDutyCycle = 0.1, minPctActiveDutyCycle = 0.1, dutyCyclePeriod = 10, maxBoost = 10.0, seed = -1, spVerbosity = 2,) self.cls = Clas() # classifier def testSP(self): dataSize = 5 totalPatterns = 5 patterns=[] for i in xrange(0,totalPatterns): patterns.append(numpy.random.randint(0,2,dataSize).tolist()) # generate input patterns # SP learn patterns for _ in xrange(0,10): #learn-repeate for pattern in patterns: ret = numpy.zeros(1024) print "input=", pattern enc = self.enc.encode(pattern) print "encoded=",enc self.sp.compute(enc,True, ret) nz = numpy.nonzero(ret)[0].tolist() print len(nz) score = self.enc.getScoreIN(pattern) buckets = self.enc.getBucketIndices({'simpleUtility' : pattern,'utility' : score}) print self.enc.getScalarNames(), buckets print self.cls.compute(recordNum=1, patternNZ=nz, classification={'bucketIdx': buckets[0], 'actValue': score }, learn=True, infer=True) print "Test" for pattern in patterns: ret = numpy.zeros(1024) enc = self.enc.encode(pattern) self.sp.compute(enc,False, ret) nz = numpy.nonzero(ret)[0].tolist() score = self.enc.getScoreIN(pattern) buckets = self.enc.getBucketIndices({'simpleUtility' : pattern,'utility' : score}) print self.cls.compute(recordNum=1, patternNZ=nz, classification={'bucketIdx': buckets[0], 'actValue': None }, learn=False, infer=True)