class Sensor(Node): """ A super class only to group properties related to sensors. """ #region Constructor def __init__(self, name): """ Initializes a new instance of this class. """ Node.__init__(self, name, NodeType.sensor) #region Instance fields self.bits = [] """An array of the bit objects that compose the current output of this node.""" self.dataSource = None """Data source which provides records to fed into a region.""" self.dataSourceType = DataSourceType.file """Type of the data source (File or Database)""" self.fileName = '' """The input file name to be handled. Returns the input file name only if it is in the project directory, full path otherwise.""" self.databaseConnectionString = "" """Connection string of the database.""" self.databaseTable = '' """Target table of the database.""" self.encoder = None """Multi-encoder which concatenate sub-encodings to convert raw data to htm input and vice-versa.""" self.encodings = [] """List of sub-encodings that handles the input from database""" self.predictionsMethod = PredictionsMethod.reconstruction """Method used to get predicted values and their probabilities.""" self.enableClassificationLearning = True """Switch for classification learning""" self.enableClassificationInference = True """Switch for classification inference""" #endregion #region Statistics properties self.statsPrecisionRate = 0. #endregion #endregion #region Methods def getBit(self, x, y): """ Return the bit located at given position """ bit = self.bits[(y * self.width) + x] return bit def initialize(self): """ Initialize this node. """ Node.initialize(self) # Initialize input bits self.bits = [] for x in range(self.width): for y in range(self.height): bit = Bit() bit.x = x bit.y = y self.bits.append(bit) if self.dataSourceType == DataSourceType.file: """ Initialize this node opening the file and place cursor on the first record. """ # If file name provided is a relative path, use project file path if self.fileName != '' and os.path.dirname(self.fileName) == '': fullFileName = os.path.dirname(Global.project.fileName) + '/' + self.fileName else: fullFileName = self.fileName # Check if file really exists if not os.path.isfile(fullFileName): QtGui.QMessageBox.warning(None, "Warning", "Input stream file '" + fullFileName + "' was not found or specified.", QtGui.QMessageBox.Ok) return # Create a data source for read the file self.dataSource = FileRecordStream(fullFileName) elif self.dataSourceType == DataSourceType.database: pass self.encoder = MultiEncoder() for encoding in self.encodings: encoding.initialize() # Create an instance class for an encoder given its module, class and constructor params encoding.encoder = getInstantiatedClass(encoding.encoderModule, encoding.encoderClass, encoding.encoderParams) # Take the first part of encoder field name as encoder name # Ex: timestamp_weekend.weekend => timestamp_weekend encoding.encoder.name = encoding.encoderFieldName.split('.')[0] # Add sub-encoder to multi-encoder list self.encoder.addEncoder(encoding.dataSourceFieldName, encoding.encoder) # If encoder size is not the same to sensor size then throws exception encoderSize = self.encoder.getWidth() sensorSize = self.width * self.height if encoderSize > sensorSize: QtGui.QMessageBox.warning(None, "Warning", "'" + self.name + "': Encoder size (" + str(encoderSize) + ") is different from sensor size (" + str(self.width) + " x " + str(self.height) + " = " + str(sensorSize) + ").", QtGui.QMessageBox.Ok) return return True def nextStep(self): """ Performs actions related to time step progression. """ # Update states machine by remove the first element and add a new element in the end for encoding in self.encodings: encoding.currentValue.rotate() if encoding.enableInference: encoding.predictedValues.rotate() encoding.bestPredictedValue.rotate() Node.nextStep(self) for bit in self.bits: bit.nextStep() # Get record value from data source # If the last record was reached just rewind it data = self.dataSource.getNextRecordDict() if not data: self.dataSource.rewind() data = self.dataSource.getNextRecordDict() # Pass raw values to encoder and get a concatenated array outputArray = numpy.zeros(self.encoder.getWidth()) self.encoder.encodeIntoArray(data, outputArray) # Get values obtained from the data source. outputValues = self.encoder.getScalars(data) # Get raw values and respective encoded bit array for each field prevOffset = 0 for i in range(len(self.encodings)): encoding = self.encodings[i] # Convert the value to its respective data type currValue = outputValues[i] if encoding.encoderFieldDataType == FieldDataType.boolean: currValue = bool(currValue) elif encoding.encoderFieldDataType == FieldDataType.integer: currValue = int(currValue) elif encoding.encoderFieldDataType == FieldDataType.decimal: currValue = float(currValue) elif encoding.encoderFieldDataType == FieldDataType.dateTime: currValue = dateutil.parser.parse(str(currValue)) elif encoding.encoderFieldDataType == FieldDataType.string: currValue = str(currValue) encoding.currentValue.setForCurrStep(currValue) # Update sensor bits for i in range(len(outputArray)): if outputArray[i] > 0.: self.bits[i].isActive.setForCurrStep(True) else: self.bits[i].isActive.setForCurrStep(False) # Mark falsely predicted bits for bit in self.bits: if bit.isPredicted.atPreviousStep() and not bit.isActive.atCurrStep(): bit.isFalselyPredicted.setForCurrStep(True) self._output = outputArray def getPredictions(self): """ Get the predictions after an iteration. """ if self.predictionsMethod == PredictionsMethod.reconstruction: # Prepare list with predictions to be classified # This list contains the indexes of all bits that are predicted output = [] for i in range(len(self.bits)): if self.bits[i].isPredicted.atCurrStep(): output.append(1) else: output.append(0) output = numpy.array(output) # Decode output and create predictions list fieldsDict, fieldsOrder = self.encoder.decode(output) for encoding in self.encodings: if encoding.enableInference: predictions = [] encoding.predictedValues.setForCurrStep(dict()) # If encoder field name was returned by decode(), assign the the predictions to it if encoding.encoderFieldName in fieldsOrder: predictedLabels = fieldsDict[encoding.encoderFieldName][1].split(', ') predictedValues = fieldsDict[encoding.encoderFieldName][0] for i in range(len(predictedLabels)): predictions.append([predictedValues[i], predictedLabels[i]]) encoding.predictedValues.atCurrStep()[1] = predictions # Get the predicted value with the biggest probability to happen if len(predictions) > 0: bestPredictionRange = predictions[0][0] min = bestPredictionRange[0] max = bestPredictionRange[1] bestPredictedValue = (min + max) / 2.0 encoding.bestPredictedValue.setForCurrStep(bestPredictedValue) elif self.predictionsMethod == PredictionsMethod.classification: # A classification involves estimate which are the likely values to occurs in the next time step. offset = 0 for encoding in self.encodings: encoderWidth = encoding.encoder.getWidth() if encoding.enableInference: # Prepare list with predictions to be classified # This list contains the indexes of all bits that are predicted patternNZ = [] for i in range(offset, encoderWidth): if self.bits[i].isActive.atCurrStep(): patternNZ.append(i) # Get the bucket index of the current value at the encoder actualValue = encoding.currentValue.atCurrStep() bucketIdx = encoding.encoder.getBucketIndices(actualValue)[0] # Perform classification clasResults = encoding.classifier.compute(recordNum=Global.currStep, patternNZ=patternNZ, classification={'bucketIdx': bucketIdx, 'actValue': actualValue}, learn=self.enableClassificationLearning, infer=self.enableClassificationInference) encoding.predictedValues.setForCurrStep(dict()) for step in encoding.steps: # Calculate probability for each predicted value predictions = dict() for (actValue, prob) in zip(clasResults['actualValues'], clasResults[step]): if actValue in predictions: predictions[actValue] += prob else: predictions[actValue] = prob # Remove predictions with low probabilities maxVal = (None, None) for (actValue, prob) in predictions.items(): if len(predictions) <= 1: break if maxVal[0] is None or prob >= maxVal[1]: if maxVal[0] is not None and maxVal[1] < encoding.minProbabilityThreshold: del predictions[maxVal[0]] maxVal = (actValue, prob) elif prob < encoding.minProbabilityThreshold: del predictions[actValue] # Sort the list of values from more probable to less probable values # an decrease the list length to max predictions per step limit predictions = sorted(predictions.iteritems(), key=operator.itemgetter(1), reverse=True) predictions = predictions[:maxFutureSteps] encoding.predictedValues.atCurrStep()[step] = predictions # Get the predicted value with the biggest probability to happen bestPredictedValue = encoding.predictedValues.atCurrStep()[1][0][0] encoding.bestPredictedValue.setForCurrStep(bestPredictedValue) offset += encoderWidth def calculateStatistics(self): """ Calculate statistics after an iteration. """ if Global.currStep > 0: precision = 0. # Calculate the prediction precision comparing if the current value is in the range of any prediction. for encoding in self.encodings: if encoding.enableInference: predictions = encoding.predictedValues.atPreviousStep()[1] for predictedValue in predictions: min = None max = None value = predictedValue[0] if self.predictionsMethod == PredictionsMethod.reconstruction: min = value[0] max = value[1] elif self.predictionsMethod == PredictionsMethod.classification: min = value max = value if isinstance(min, (int, long, float, complex)) and isinstance(max, (int, long, float, complex)): min = math.floor(min) max = math.ceil(max) if min <= encoding.currentValue.atCurrStep() <= max: precision = 100. break # The precision rate is the average of the precision calculated in every step self.statsPrecisionRate = (self.statsPrecisionRate + precision) / 2 else: self.statsPrecisionRate = 0. for bit in self.bits: bit.calculateStatistics()
def testSimpleMulticlassNetworkPY(self): # Setup data record stream of fake data (with three categories) filename = _getTempFileName() fields = [("timestamp", "datetime", "T"), ("value", "float", ""), ("reset", "int", "R"), ("sid", "int", "S"), ("categories", "list", "C")] records = ( [datetime(day=1, month=3, year=2010), 0.0, 1, 0, "0"], [datetime(day=2, month=3, year=2010), 1.0, 0, 0, "1"], [datetime(day=3, month=3, year=2010), 0.0, 0, 0, "0"], [datetime(day=4, month=3, year=2010), 1.0, 0, 0, "1"], [datetime(day=5, month=3, year=2010), 0.0, 0, 0, "0"], [datetime(day=6, month=3, year=2010), 1.0, 0, 0, "1"], [datetime(day=7, month=3, year=2010), 0.0, 0, 0, "0"], [datetime(day=8, month=3, year=2010), 1.0, 0, 0, "1"]) dataSource = FileRecordStream(streamID=filename, write=True, fields=fields) for r in records: dataSource.appendRecord(list(r)) # Create the network and get region instances. net = Network() net.addRegion("sensor", "py.RecordSensor", "{'numCategories': 3}") net.addRegion("classifier", "py.SDRClassifierRegion", "{steps: '0', alpha: 0.001, implementation: 'py'}") net.link("sensor", "classifier", "UniformLink", "", srcOutput="dataOut", destInput="bottomUpIn") net.link("sensor", "classifier", "UniformLink", "", srcOutput="categoryOut", destInput="categoryIn") sensor = net.regions["sensor"] classifier = net.regions["classifier"] # Setup sensor region encoder and data stream. dataSource.close() dataSource = FileRecordStream(filename) sensorRegion = sensor.getSelf() sensorRegion.encoder = MultiEncoder() sensorRegion.encoder.addEncoder( "value", ScalarEncoder(21, 0.0, 13.0, n=256, name="value")) sensorRegion.dataSource = dataSource # Get ready to run. net.initialize() # Train the network (by default learning is ON in the classifier, but assert # anyway) and then turn off learning and turn on inference mode. self.assertEqual(classifier.getParameter("learningMode"), 1) net.run(8) # Test the network on the same data as it trained on; should classify with # 100% accuracy. classifier.setParameter("inferenceMode", 1) classifier.setParameter("learningMode", 0) # Assert learning is OFF and that the classifier learned the dataset. self.assertEqual(classifier.getParameter("learningMode"), 0, "Learning mode is not turned off.") self.assertEqual(classifier.getParameter("inferenceMode"), 1, "Inference mode is not turned on.") # make sure we can access all the parameters with getParameter self.assertEqual(classifier.getParameter("maxCategoryCount"), 2000) self.assertAlmostEqual(float(classifier.getParameter("alpha")), 0.001) self.assertEqual(int(classifier.getParameter("steps")), 0) self.assertTrue(classifier.getParameter("implementation") == "py") self.assertEqual(classifier.getParameter("verbosity"), 0) expectedCats = ([0.0], [1.0], [0.0], [1.0], [0.0], [1.0], [0.0], [1.0],) dataSource.rewind() for i in xrange(8): net.run(1) inferredCats = classifier.getOutputData("categoriesOut") self.assertSequenceEqual(expectedCats[i], inferredCats.tolist(), "Classififer did not infer expected category " "for record number {}.".format(i)) # Close data stream, delete file. dataSource.close() os.remove(filename)
def testSimpleMulticlassNetworkPY(self): # Setup data record stream of fake data (with three categories) filename = _getTempFileName() fields = [("timestamp", "datetime", "T"), ("value", "float", ""), ("reset", "int", "R"), ("sid", "int", "S"), ("categories", "list", "C")] records = ([datetime(day=1, month=3, year=2010), 0.0, 1, 0, "0"], [ datetime(day=2, month=3, year=2010), 1.0, 0, 0, "1" ], [datetime(day=3, month=3, year=2010), 0.0, 0, 0, "0"], [datetime(day=4, month=3, year=2010), 1.0, 0, 0, "1"], [datetime(day=5, month=3, year=2010), 0.0, 0, 0, "0"], [datetime(day=6, month=3, year=2010), 1.0, 0, 0, "1" ], [datetime(day=7, month=3, year=2010), 0.0, 0, 0, "0"], [datetime(day=8, month=3, year=2010), 1.0, 0, 0, "1"]) dataSource = FileRecordStream(streamID=filename, write=True, fields=fields) for r in records: dataSource.appendRecord(list(r)) # Create the network and get region instances. net = Network() net.addRegion("sensor", "py.RecordSensor", "{'numCategories': 3}") net.addRegion("classifier", "py.SDRClassifierRegion", "{steps: '0', alpha: 0.001, implementation: 'py'}") net.link("sensor", "classifier", "UniformLink", "", srcOutput="dataOut", destInput="bottomUpIn") net.link("sensor", "classifier", "UniformLink", "", srcOutput="categoryOut", destInput="categoryIn") sensor = net.regions["sensor"] classifier = net.regions["classifier"] # Setup sensor region encoder and data stream. dataSource.close() dataSource = FileRecordStream(filename) sensorRegion = sensor.getSelf() sensorRegion.encoder = MultiEncoder() sensorRegion.encoder.addEncoder( "value", ScalarEncoder(21, 0.0, 13.0, n=256, name="value")) sensorRegion.dataSource = dataSource # Get ready to run. net.initialize() # Train the network (by default learning is ON in the classifier, but assert # anyway) and then turn off learning and turn on inference mode. self.assertEqual(classifier.getParameter("learningMode"), 1) net.run(8) # Test the network on the same data as it trained on; should classify with # 100% accuracy. classifier.setParameter("inferenceMode", 1) classifier.setParameter("learningMode", 0) # Assert learning is OFF and that the classifier learned the dataset. self.assertEqual(classifier.getParameter("learningMode"), 0, "Learning mode is not turned off.") self.assertEqual(classifier.getParameter("inferenceMode"), 1, "Inference mode is not turned on.") # make sure we can access all the parameters with getParameter self.assertEqual(classifier.getParameter("maxCategoryCount"), 2000) self.assertAlmostEqual(float(classifier.getParameter("alpha")), 0.001) self.assertEqual(int(classifier.getParameter("steps")), 0) self.assertTrue(classifier.getParameter("implementation") == "py") self.assertEqual(classifier.getParameter("verbosity"), 0) expectedCats = ( [0.0], [1.0], [0.0], [1.0], [0.0], [1.0], [0.0], [1.0], ) dataSource.rewind() for i in xrange(8): net.run(1) inferredCats = classifier.getOutputData("categoriesOut") self.assertSequenceEqual( expectedCats[i], inferredCats.tolist(), "Classififer did not infer expected category " "for record number {}.".format(i)) # Close data stream, delete file. dataSource.close() os.remove(filename)
def testSimpleMulticlassNetwork(self): # Setup data record stream of fake data (with three categories) filename = _getTempFileName() fields = [("timestamp", "datetime", "T"), ("value", "float", ""), ("reset", "int", "R"), ("sid", "int", "S"), ("categories", "list", "C")] records = ( [datetime(day=1, month=3, year=2010), 0.0, 1, 0, ""], [datetime(day=2, month=3, year=2010), 1.0, 0, 0, "1 2"], [datetime(day=3, month=3, year=2010), 1.0, 0, 0, "1 2"], [datetime(day=4, month=3, year=2010), 2.0, 0, 0, "0"], [datetime(day=5, month=3, year=2010), 3.0, 0, 0, "1 2"], [datetime(day=6, month=3, year=2010), 5.0, 0, 0, "1 2"], [datetime(day=7, month=3, year=2010), 8.0, 0, 0, "0"], [datetime(day=8, month=3, year=2010), 13.0, 0, 0, "1 2"]) dataSource = FileRecordStream(streamID=filename, write=True, fields=fields) for r in records: dataSource.appendRecord(list(r)) # Create the network and get region instances. net = Network() net.addRegion("sensor", "py.RecordSensor", "{'numCategories': 3}") net.addRegion("classifier","py.KNNClassifierRegion", "{'k': 2,'distThreshold': 0,'maxCategoryCount': 3}") net.link("sensor", "classifier", "UniformLink", "", srcOutput = "dataOut", destInput = "bottomUpIn") net.link("sensor", "classifier", "UniformLink", "", srcOutput = "categoryOut", destInput = "categoryIn") sensor = net.regions["sensor"] classifier = net.regions["classifier"] # Setup sensor region encoder and data stream. dataSource.close() dataSource = FileRecordStream(filename) sensorRegion = sensor.getSelf() sensorRegion.encoder = MultiEncoder() sensorRegion.encoder.addEncoder( "value", ScalarEncoder(21, 0.0, 13.0, n=256, name="value")) sensorRegion.dataSource = dataSource # Get ready to run. net.initialize() # Train the network (by default learning is ON in the classifier, but assert # anyway) and then turn off learning and turn on inference mode. self.assertEqual(classifier.getParameter("learningMode"), 1) net.run(8) classifier.setParameter("inferenceMode", 1) classifier.setParameter("learningMode", 0) # Assert learning is OFF and that the classifier learned the dataset. self.assertEqual(classifier.getParameter("learningMode"), 0, "Learning mode is not turned off.") self.assertEqual(classifier.getParameter("inferenceMode"), 1, "Inference mode is not turned on.") self.assertEqual(classifier.getParameter("categoryCount"), 3, "The classifier should count three total categories.") # classififer learns 12 patterns b/c there are 12 categories amongst the # records: self.assertEqual(classifier.getParameter("patternCount"), 12, "The classifier should've learned 12 samples in total.") # Test the network on the same data as it trained on; should classify with # 100% accuracy. expectedCats = ([0.0, 0.5, 0.5], [0.0, 0.5, 0.5], [0.0, 0.5, 0.5], [0.5, 0.5, 0.0], [0.0, 0.5, 0.5], [0.0, 0.5, 0.5], [0.5, 0.5, 0.0], [0.0, 0.5, 0.5]) dataSource.rewind() for i in xrange(8): net.run(1) inferredCats = classifier.getOutputData("categoriesOut") self.assertSequenceEqual(expectedCats[i], inferredCats.tolist(), "Classififer did not infer expected category probabilites for record " "number {}.".format(i)) # Close data stream, delete file. dataSource.close() os.remove(filename)
def testSimpleMulticlassNetwork(self): # Setup data record stream of fake data (with three categories) filename = _getTempFileName() fields = [("timestamp", "datetime", "T"), ("value", "float", ""), ("reset", "int", "R"), ("sid", "int", "S"), ("categories", "list", "C")] records = ([datetime(day=1, month=3, year=2010), 0.0, 1, 0, ""], [ datetime(day=2, month=3, year=2010), 1.0, 0, 0, "1 2" ], [datetime(day=3, month=3, year=2010), 1.0, 0, 0, "1 2"], [datetime(day=4, month=3, year=2010), 2.0, 0, 0, "0"], [ datetime(day=5, month=3, year=2010), 3.0, 0, 0, "1 2" ], [datetime(day=6, month=3, year=2010), 5.0, 0, 0, "1 2"], [datetime(day=7, month=3, year=2010), 8.0, 0, 0, "0"], [datetime(day=8, month=3, year=2010), 13.0, 0, 0, "1 2"]) dataSource = FileRecordStream(streamID=filename, write=True, fields=fields) for r in records: dataSource.appendRecord(list(r)) # Create the network and get region instances. net = Network() net.addRegion("sensor", "py.RecordSensor", "{'numCategories': 3}") net.addRegion("classifier", "py.KNNClassifierRegion", "{'k': 2,'distThreshold': 0,'maxCategoryCount': 3}") net.link("sensor", "classifier", "UniformLink", "", srcOutput="dataOut", destInput="bottomUpIn") net.link("sensor", "classifier", "UniformLink", "", srcOutput="categoryOut", destInput="categoryIn") sensor = net.regions["sensor"] classifier = net.regions["classifier"] # Setup sensor region encoder and data stream. dataSource.close() dataSource = FileRecordStream(filename) sensorRegion = sensor.getSelf() sensorRegion.encoder = MultiEncoder() sensorRegion.encoder.addEncoder( "value", ScalarEncoder(21, 0.0, 13.0, n=256, name="value")) sensorRegion.dataSource = dataSource # Get ready to run. net.initialize() # Train the network (by default learning is ON in the classifier, but assert # anyway) and then turn off learning and turn on inference mode. self.assertEqual(classifier.getParameter("learningMode"), 1) net.run(8) classifier.setParameter("inferenceMode", 1) classifier.setParameter("learningMode", 0) # Assert learning is OFF and that the classifier learned the dataset. self.assertEqual(classifier.getParameter("learningMode"), 0, "Learning mode is not turned off.") self.assertEqual(classifier.getParameter("inferenceMode"), 1, "Inference mode is not turned on.") self.assertEqual( classifier.getParameter("categoryCount"), 3, "The classifier should count three total categories.") # classififer learns 12 patterns b/c there are 12 categories amongst the # records: self.assertEqual( classifier.getParameter("patternCount"), 12, "The classifier should've learned 12 samples in total.") # Test the network on the same data as it trained on; should classify with # 100% accuracy. expectedCats = ([0.0, 0.5, 0.5], [0.0, 0.5, 0.5], [0.0, 0.5, 0.5], [0.5, 0.5, 0.0], [0.0, 0.5, 0.5], [0.0, 0.5, 0.5], [0.5, 0.5, 0.0], [0.0, 0.5, 0.5]) dataSource.rewind() for i in xrange(8): net.run(1) inferredCats = classifier.getOutputData("categoriesOut") self.assertSequenceEqual( expectedCats[i], inferredCats.tolist(), "Classififer did not infer expected category probabilites for record " "number {}.".format(i)) # Close data stream, delete file. dataSource.close() os.remove(filename)
class Sensor(Node): """ A super class only to group properties related to sensors. """ #region Constructor def __init__(self, name): """ Initializes a new instance of this class. """ Node.__init__(self, name, NodeType.sensor) #region Instance fields self.bits = [] """An array of the bit objects that compose the current output of this node.""" self.dataSource = None """Data source which provides records to fed into a region.""" self.dataSourceType = DataSourceType.file """Type of the data source (File or Database)""" self.fileName = '' """The input file name to be handled. Returns the input file name only if it is in the project directory, full path otherwise.""" self.databaseConnectionString = "" """Connection string of the database.""" self.databaseTable = '' """Target table of the database.""" self.encoder = None """Multi-encoder which concatenate sub-encodings to convert raw data to htm input and vice-versa.""" self.encodings = [] """List of sub-encodings that handles the input from database""" self.predictionsMethod = PredictionsMethod.reconstruction """Method used to get predicted values and their probabilities.""" self.enableClassificationLearning = True """Switch for classification learning""" self.enableClassificationInference = True """Switch for classification inference""" #endregion #region Statistics properties self.statsPrecisionRate = 0. #endregion #endregion #region Methods def getBit(self, x, y): """ Return the bit located at given position """ bit = self.bits[(y * self.width) + x] return bit def initialize(self): """ Initialize this node. """ Node.initialize(self) # Initialize input bits self.bits = [] for x in range(self.width): for y in range(self.height): bit = Bit() bit.x = x bit.y = y self.bits.append(bit) if self.dataSourceType == DataSourceType.file: """ Initialize this node opening the file and place cursor on the first record. """ # If file name provided is a relative path, use project file path if self.fileName != '' and os.path.dirname(self.fileName) == '': fullFileName = os.path.dirname( Global.project.fileName) + '/' + self.fileName else: fullFileName = self.fileName # Check if file really exists if not os.path.isfile(fullFileName): QtGui.QMessageBox.warning( None, "Warning", "Input stream file '" + fullFileName + "' was not found or specified.", QtGui.QMessageBox.Ok) return # Create a data source for read the file self.dataSource = FileRecordStream(fullFileName) elif self.dataSourceType == DataSourceType.database: pass self.encoder = MultiEncoder() for encoding in self.encodings: encoding.initialize() # Create an instance class for an encoder given its module, class and constructor params encoding.encoder = getInstantiatedClass(encoding.encoderModule, encoding.encoderClass, encoding.encoderParams) # Take the first part of encoder field name as encoder name # Ex: timestamp_weekend.weekend => timestamp_weekend encoding.encoder.name = encoding.encoderFieldName.split('.')[0] # Add sub-encoder to multi-encoder list self.encoder.addEncoder(encoding.dataSourceFieldName, encoding.encoder) # If encoder size is not the same to sensor size then throws exception encoderSize = self.encoder.getWidth() sensorSize = self.width * self.height if encoderSize > sensorSize: QtGui.QMessageBox.warning( None, "Warning", "'" + self.name + "': Encoder size (" + str(encoderSize) + ") is different from sensor size (" + str(self.width) + " x " + str(self.height) + " = " + str(sensorSize) + ").", QtGui.QMessageBox.Ok) return return True def nextStep(self): """ Performs actions related to time step progression. """ # Update states machine by remove the first element and add a new element in the end for encoding in self.encodings: encoding.currentValue.rotate() if encoding.enableInference: encoding.predictedValues.rotate() encoding.bestPredictedValue.rotate() Node.nextStep(self) for bit in self.bits: bit.nextStep() # Get record value from data source # If the last record was reached just rewind it data = self.dataSource.getNextRecordDict() if not data: self.dataSource.rewind() data = self.dataSource.getNextRecordDict() # Pass raw values to encoder and get a concatenated array outputArray = numpy.zeros(self.encoder.getWidth()) self.encoder.encodeIntoArray(data, outputArray) # Get values obtained from the data source. outputValues = self.encoder.getScalars(data) # Get raw values and respective encoded bit array for each field prevOffset = 0 for i in range(len(self.encodings)): encoding = self.encodings[i] # Convert the value to its respective data type currValue = outputValues[i] if encoding.encoderFieldDataType == FieldDataType.boolean: currValue = bool(currValue) elif encoding.encoderFieldDataType == FieldDataType.integer: currValue = int(currValue) elif encoding.encoderFieldDataType == FieldDataType.decimal: currValue = float(currValue) elif encoding.encoderFieldDataType == FieldDataType.dateTime: currValue = dateutil.parser.parse(str(currValue)) elif encoding.encoderFieldDataType == FieldDataType.string: currValue = str(currValue) encoding.currentValue.setForCurrStep(currValue) # Update sensor bits for i in range(len(outputArray)): if outputArray[i] > 0.: self.bits[i].isActive.setForCurrStep(True) else: self.bits[i].isActive.setForCurrStep(False) # Mark falsely predicted bits for bit in self.bits: if bit.isPredicted.atPreviousStep( ) and not bit.isActive.atCurrStep(): bit.isFalselyPredicted.setForCurrStep(True) self._output = outputArray def getPredictions(self): """ Get the predictions after an iteration. """ if self.predictionsMethod == PredictionsMethod.reconstruction: # Prepare list with predictions to be classified # This list contains the indexes of all bits that are predicted output = [] for i in range(len(self.bits)): if self.bits[i].isPredicted.atCurrStep(): output.append(1) else: output.append(0) output = numpy.array(output) # Decode output and create predictions list fieldsDict, fieldsOrder = self.encoder.decode(output) for encoding in self.encodings: if encoding.enableInference: predictions = [] encoding.predictedValues.setForCurrStep(dict()) # If encoder field name was returned by decode(), assign the the predictions to it if encoding.encoderFieldName in fieldsOrder: predictedLabels = fieldsDict[ encoding.encoderFieldName][1].split(', ') predictedValues = fieldsDict[ encoding.encoderFieldName][0] for i in range(len(predictedLabels)): predictions.append( [predictedValues[i], predictedLabels[i]]) encoding.predictedValues.atCurrStep()[1] = predictions # Get the predicted value with the biggest probability to happen if len(predictions) > 0: bestPredictionRange = predictions[0][0] min = bestPredictionRange[0] max = bestPredictionRange[1] bestPredictedValue = (min + max) / 2.0 encoding.bestPredictedValue.setForCurrStep( bestPredictedValue) elif self.predictionsMethod == PredictionsMethod.classification: # A classification involves estimate which are the likely values to occurs in the next time step. offset = 0 for encoding in self.encodings: encoderWidth = encoding.encoder.getWidth() if encoding.enableInference: # Prepare list with predictions to be classified # This list contains the indexes of all bits that are predicted patternNZ = [] for i in range(offset, encoderWidth): if self.bits[i].isActive.atCurrStep(): patternNZ.append(i) # Get the bucket index of the current value at the encoder actualValue = encoding.currentValue.atCurrStep() bucketIdx = encoding.encoder.getBucketIndices( actualValue)[0] # Perform classification clasResults = encoding.classifier.compute( recordNum=Global.currStep, patternNZ=patternNZ, classification={ 'bucketIdx': bucketIdx, 'actValue': actualValue }, learn=self.enableClassificationLearning, infer=self.enableClassificationInference) encoding.predictedValues.setForCurrStep(dict()) for step in encoding.steps: # Calculate probability for each predicted value predictions = dict() for (actValue, prob) in zip(clasResults['actualValues'], clasResults[step]): if actValue in predictions: predictions[actValue] += prob else: predictions[actValue] = prob # Remove predictions with low probabilities maxVal = (None, None) for (actValue, prob) in predictions.items(): if len(predictions) <= 1: break if maxVal[0] is None or prob >= maxVal[1]: if maxVal[0] is not None and maxVal[ 1] < encoding.minProbabilityThreshold: del predictions[maxVal[0]] maxVal = (actValue, prob) elif prob < encoding.minProbabilityThreshold: del predictions[actValue] # Sort the list of values from more probable to less probable values # an decrease the list length to max predictions per step limit predictions = sorted(predictions.iteritems(), key=operator.itemgetter(1), reverse=True) predictions = predictions[:maxFutureSteps] encoding.predictedValues.atCurrStep( )[step] = predictions # Get the predicted value with the biggest probability to happen bestPredictedValue = encoding.predictedValues.atCurrStep( )[1][0][0] encoding.bestPredictedValue.setForCurrStep( bestPredictedValue) offset += encoderWidth def calculateStatistics(self): """ Calculate statistics after an iteration. """ if Global.currStep > 0: precision = 0. # Calculate the prediction precision comparing if the current value is in the range of any prediction. for encoding in self.encodings: if encoding.enableInference: predictions = encoding.predictedValues.atPreviousStep()[1] for predictedValue in predictions: min = None max = None value = predictedValue[0] if self.predictionsMethod == PredictionsMethod.reconstruction: min = value[0] max = value[1] elif self.predictionsMethod == PredictionsMethod.classification: min = value max = value if isinstance( min, (int, long, float, complex)) and isinstance( max, (int, long, float, complex)): min = math.floor(min) max = math.ceil(max) if min <= encoding.currentValue.atCurrStep() <= max: precision = 100. break # The precision rate is the average of the precision calculated in every step self.statsPrecisionRate = (self.statsPrecisionRate + precision) / 2 else: self.statsPrecisionRate = 0. for bit in self.bits: bit.calculateStatistics() #endregion