Beispiel #1
0
class Sensor(Node):
  """
  A super class only to group properties related to sensors.
  """

  #region Constructor

  def __init__(self, name):
    """
    Initializes a new instance of this class.
    """

    Node.__init__(self, name, NodeType.sensor)

    #region Instance fields

    self.bits = []
    """An array of the bit objects that compose the current output of this node."""

    self.dataSource = None
    """Data source which provides records to fed into a region."""

    self.dataSourceType = DataSourceType.file
    """Type of the data source (File or Database)"""

    self.fileName = ''
    """The input file name to be handled. Returns the input file name only if it is in the project directory, full path otherwise."""

    self.databaseConnectionString = ""
    """Connection string of the database."""

    self.databaseTable = ''
    """Target table of the database."""

    self.encoder = None
    """Multi-encoder which concatenate sub-encodings to convert raw data to htm input and vice-versa."""

    self.encodings = []
    """List of sub-encodings that handles the input from database"""

    self.predictionsMethod = PredictionsMethod.reconstruction
    """Method used to get predicted values and their probabilities."""

    self.enableClassificationLearning = True
    """Switch for classification learning"""

    self.enableClassificationInference = True
    """Switch for classification inference"""

    #endregion

    #region Statistics properties

    self.statsPrecisionRate = 0.

    #endregion

  #endregion

  #region Methods

  def getBit(self, x, y):
    """
    Return the bit located at given position
    """

    bit = self.bits[(y * self.width) + x]

    return bit

  def initialize(self):
    """
    Initialize this node.
    """

    Node.initialize(self)

    # Initialize input bits
    self.bits = []
    for x in range(self.width):
      for y in range(self.height):
        bit = Bit()
        bit.x = x
        bit.y = y
        self.bits.append(bit)

    if self.dataSourceType == DataSourceType.file:
      """
      Initialize this node opening the file and place cursor on the first record.
      """

      # If file name provided is a relative path, use project file path
      if self.fileName != '' and os.path.dirname(self.fileName) == '':
        fullFileName = os.path.dirname(Global.project.fileName) + '/' + self.fileName
      else:
        fullFileName = self.fileName

      # Check if file really exists
      if not os.path.isfile(fullFileName):
        QtGui.QMessageBox.warning(None, "Warning", "Input stream file '" + fullFileName + "' was not found or specified.", QtGui.QMessageBox.Ok)
        return

      # Create a data source for read the file
      self.dataSource = FileRecordStream(fullFileName)

    elif self.dataSourceType == DataSourceType.database:
      pass

    self.encoder = MultiEncoder()
    for encoding in self.encodings:
      encoding.initialize()

      # Create an instance class for an encoder given its module, class and constructor params
      encoding.encoder = getInstantiatedClass(encoding.encoderModule, encoding.encoderClass, encoding.encoderParams)

      # Take the first part of encoder field name as encoder name
      # Ex: timestamp_weekend.weekend => timestamp_weekend
      encoding.encoder.name = encoding.encoderFieldName.split('.')[0]

      # Add sub-encoder to multi-encoder list
      self.encoder.addEncoder(encoding.dataSourceFieldName, encoding.encoder)

    # If encoder size is not the same to sensor size then throws exception
    encoderSize = self.encoder.getWidth()
    sensorSize = self.width * self.height
    if encoderSize > sensorSize:
      QtGui.QMessageBox.warning(None, "Warning", "'" + self.name + "': Encoder size (" + str(encoderSize) + ") is different from sensor size (" + str(self.width) + " x " + str(self.height) + " = " + str(sensorSize) + ").", QtGui.QMessageBox.Ok)
      return

    return True

  def nextStep(self):
    """
    Performs actions related to time step progression.
    """

    # Update states machine by remove the first element and add a new element in the end
    for encoding in self.encodings:
      encoding.currentValue.rotate()
      if encoding.enableInference:
        encoding.predictedValues.rotate()
        encoding.bestPredictedValue.rotate()

    Node.nextStep(self)
    for bit in self.bits:
      bit.nextStep()

    # Get record value from data source
    # If the last record was reached just rewind it
    data = self.dataSource.getNextRecordDict()
    if not data:
      self.dataSource.rewind()
      data = self.dataSource.getNextRecordDict()

    # Pass raw values to encoder and get a concatenated array
    outputArray = numpy.zeros(self.encoder.getWidth())
    self.encoder.encodeIntoArray(data, outputArray)

    # Get values obtained from the data source.
    outputValues = self.encoder.getScalars(data)

    # Get raw values and respective encoded bit array for each field
    prevOffset = 0
    for i in range(len(self.encodings)):
      encoding = self.encodings[i]

      # Convert the value to its respective data type
      currValue = outputValues[i]
      if encoding.encoderFieldDataType == FieldDataType.boolean:
        currValue = bool(currValue)
      elif encoding.encoderFieldDataType == FieldDataType.integer:
        currValue = int(currValue)
      elif encoding.encoderFieldDataType == FieldDataType.decimal:
        currValue = float(currValue)
      elif encoding.encoderFieldDataType == FieldDataType.dateTime:
        currValue = dateutil.parser.parse(str(currValue))
      elif encoding.encoderFieldDataType == FieldDataType.string:
        currValue = str(currValue)
      encoding.currentValue.setForCurrStep(currValue)

    # Update sensor bits
    for i in range(len(outputArray)):
      if outputArray[i] > 0.:
        self.bits[i].isActive.setForCurrStep(True)
      else:
        self.bits[i].isActive.setForCurrStep(False)

    # Mark falsely predicted bits
    for bit in self.bits:
      if bit.isPredicted.atPreviousStep() and not bit.isActive.atCurrStep():
        bit.isFalselyPredicted.setForCurrStep(True)

    self._output = outputArray

  def getPredictions(self):
    """
    Get the predictions after an iteration.
    """

    if self.predictionsMethod == PredictionsMethod.reconstruction:

      # Prepare list with predictions to be classified
      # This list contains the indexes of all bits that are predicted
      output = []
      for i in range(len(self.bits)):
        if self.bits[i].isPredicted.atCurrStep():
          output.append(1)
        else:
          output.append(0)
      output = numpy.array(output)

      # Decode output and create predictions list
      fieldsDict, fieldsOrder = self.encoder.decode(output)
      for encoding in self.encodings:
        if encoding.enableInference:
          predictions = []
          encoding.predictedValues.setForCurrStep(dict())

          # If encoder field name was returned by decode(), assign the the predictions to it
          if encoding.encoderFieldName in fieldsOrder:
            predictedLabels = fieldsDict[encoding.encoderFieldName][1].split(', ')
            predictedValues = fieldsDict[encoding.encoderFieldName][0]
            for i in range(len(predictedLabels)):
              predictions.append([predictedValues[i], predictedLabels[i]])

          encoding.predictedValues.atCurrStep()[1] = predictions

          # Get the predicted value with the biggest probability to happen
          if len(predictions) > 0:
            bestPredictionRange = predictions[0][0]
            min = bestPredictionRange[0]
            max = bestPredictionRange[1]
            bestPredictedValue = (min + max) / 2.0
            encoding.bestPredictedValue.setForCurrStep(bestPredictedValue)

    elif self.predictionsMethod == PredictionsMethod.classification:
      # A classification involves estimate which are the likely values to occurs in the next time step.

      offset = 0
      for encoding in self.encodings:
        encoderWidth = encoding.encoder.getWidth()

        if encoding.enableInference:
          # Prepare list with predictions to be classified
          # This list contains the indexes of all bits that are predicted
          patternNZ = []
          for i in range(offset, encoderWidth):
            if self.bits[i].isActive.atCurrStep():
              patternNZ.append(i)

          # Get the bucket index of the current value at the encoder
          actualValue = encoding.currentValue.atCurrStep()
          bucketIdx = encoding.encoder.getBucketIndices(actualValue)[0]

          # Perform classification
          clasResults = encoding.classifier.compute(recordNum=Global.currStep, patternNZ=patternNZ, classification={'bucketIdx': bucketIdx, 'actValue': actualValue}, learn=self.enableClassificationLearning, infer=self.enableClassificationInference)

          encoding.predictedValues.setForCurrStep(dict())
          for step in encoding.steps:

            # Calculate probability for each predicted value
            predictions = dict()
            for (actValue, prob) in zip(clasResults['actualValues'], clasResults[step]):
              if actValue in predictions:
                predictions[actValue] += prob
              else:
                predictions[actValue] = prob

            # Remove predictions with low probabilities
            maxVal = (None, None)
            for (actValue, prob) in predictions.items():
              if len(predictions) <= 1:
                break
              if maxVal[0] is None or prob >= maxVal[1]:
                if maxVal[0] is not None and maxVal[1] < encoding.minProbabilityThreshold:
                  del predictions[maxVal[0]]
                maxVal = (actValue, prob)
              elif prob < encoding.minProbabilityThreshold:
                del predictions[actValue]

            # Sort the list of values from more probable to less probable values
            # an decrease the list length to max predictions per step limit
            predictions = sorted(predictions.iteritems(), key=operator.itemgetter(1), reverse=True)
            predictions = predictions[:maxFutureSteps]

            encoding.predictedValues.atCurrStep()[step] = predictions

          # Get the predicted value with the biggest probability to happen
          bestPredictedValue = encoding.predictedValues.atCurrStep()[1][0][0]
          encoding.bestPredictedValue.setForCurrStep(bestPredictedValue)

        offset += encoderWidth

  def calculateStatistics(self):
    """
    Calculate statistics after an iteration.
    """

    if Global.currStep > 0:
      precision = 0.

      # Calculate the prediction precision comparing if the current value is in the range of any prediction.
      for encoding in self.encodings:
        if encoding.enableInference:
          predictions = encoding.predictedValues.atPreviousStep()[1]
          for predictedValue in predictions:
            min = None
            max = None
            value = predictedValue[0]
            if self.predictionsMethod == PredictionsMethod.reconstruction:
              min = value[0]
              max = value[1]
            elif self.predictionsMethod == PredictionsMethod.classification:
              min = value
              max = value
            if isinstance(min, (int, long, float, complex)) and isinstance(max, (int, long, float, complex)):
              min = math.floor(min)
              max = math.ceil(max)
            if min <= encoding.currentValue.atCurrStep() <= max:
              precision = 100.
              break

      # The precision rate is the average of the precision calculated in every step
      self.statsPrecisionRate = (self.statsPrecisionRate + precision) / 2
    else:
      self.statsPrecisionRate = 0.

    for bit in self.bits:
      bit.calculateStatistics()
  def testSimpleMulticlassNetworkPY(self):
    # Setup data record stream of fake data (with three categories)
    filename = _getTempFileName()
    fields = [("timestamp", "datetime", "T"),
              ("value", "float", ""),
              ("reset", "int", "R"),
              ("sid", "int", "S"),
              ("categories", "list", "C")]
    records = (
      [datetime(day=1, month=3, year=2010), 0.0, 1, 0, "0"],
      [datetime(day=2, month=3, year=2010), 1.0, 0, 0, "1"],
      [datetime(day=3, month=3, year=2010), 0.0, 0, 0, "0"],
      [datetime(day=4, month=3, year=2010), 1.0, 0, 0, "1"],
      [datetime(day=5, month=3, year=2010), 0.0, 0, 0, "0"],
      [datetime(day=6, month=3, year=2010), 1.0, 0, 0, "1"],
      [datetime(day=7, month=3, year=2010), 0.0, 0, 0, "0"],
      [datetime(day=8, month=3, year=2010), 1.0, 0, 0, "1"])
    dataSource = FileRecordStream(streamID=filename, write=True, fields=fields)
    for r in records:
      dataSource.appendRecord(list(r))

    # Create the network and get region instances.
    net = Network()
    net.addRegion("sensor", "py.RecordSensor", "{'numCategories': 3}")
    net.addRegion("classifier", "py.SDRClassifierRegion",
                  "{steps: '0', alpha: 0.001, implementation: 'py'}")
    net.link("sensor", "classifier", "UniformLink", "",
             srcOutput="dataOut", destInput="bottomUpIn")
    net.link("sensor", "classifier", "UniformLink", "",
             srcOutput="categoryOut", destInput="categoryIn")
    sensor = net.regions["sensor"]
    classifier = net.regions["classifier"]

    # Setup sensor region encoder and data stream.
    dataSource.close()
    dataSource = FileRecordStream(filename)
    sensorRegion = sensor.getSelf()
    sensorRegion.encoder = MultiEncoder()
    sensorRegion.encoder.addEncoder(
      "value", ScalarEncoder(21, 0.0, 13.0, n=256, name="value"))
    sensorRegion.dataSource = dataSource

    # Get ready to run.
    net.initialize()

    # Train the network (by default learning is ON in the classifier, but assert
    # anyway) and then turn off learning and turn on inference mode.
    self.assertEqual(classifier.getParameter("learningMode"), 1)
    net.run(8)

    # Test the network on the same data as it trained on; should classify with
    # 100% accuracy.
    classifier.setParameter("inferenceMode", 1)
    classifier.setParameter("learningMode", 0)

    # Assert learning is OFF and that the classifier learned the dataset.
    self.assertEqual(classifier.getParameter("learningMode"), 0,
                     "Learning mode is not turned off.")
    self.assertEqual(classifier.getParameter("inferenceMode"), 1,
                     "Inference mode is not turned on.")

    # make sure we can access all the parameters with getParameter
    self.assertEqual(classifier.getParameter("maxCategoryCount"), 2000)
    self.assertAlmostEqual(float(classifier.getParameter("alpha")), 0.001)
    self.assertEqual(int(classifier.getParameter("steps")), 0)
    self.assertTrue(classifier.getParameter("implementation") == "py")
    self.assertEqual(classifier.getParameter("verbosity"), 0)


    expectedCats = ([0.0], [1.0], [0.0], [1.0], [0.0], [1.0], [0.0], [1.0],)
    dataSource.rewind()
    for i in xrange(8):
      net.run(1)
      inferredCats = classifier.getOutputData("categoriesOut")
      self.assertSequenceEqual(expectedCats[i], inferredCats.tolist(),
                               "Classififer did not infer expected category "
                               "for record number {}.".format(i))
    # Close data stream, delete file.
    dataSource.close()
    os.remove(filename)
    def testSimpleMulticlassNetworkPY(self):
        # Setup data record stream of fake data (with three categories)
        filename = _getTempFileName()
        fields = [("timestamp", "datetime", "T"), ("value", "float", ""),
                  ("reset", "int", "R"), ("sid", "int", "S"),
                  ("categories", "list", "C")]
        records = ([datetime(day=1, month=3, year=2010), 0.0, 1, 0, "0"], [
            datetime(day=2, month=3, year=2010), 1.0, 0, 0, "1"
        ], [datetime(day=3, month=3, year=2010), 0.0, 0, 0,
            "0"], [datetime(day=4, month=3, year=2010), 1.0, 0, 0,
                   "1"], [datetime(day=5, month=3, year=2010), 0.0, 0, 0, "0"],
                   [datetime(day=6, month=3, year=2010), 1.0, 0, 0, "1"
                    ], [datetime(day=7, month=3, year=2010), 0.0, 0, 0, "0"],
                   [datetime(day=8, month=3, year=2010), 1.0, 0, 0, "1"])
        dataSource = FileRecordStream(streamID=filename,
                                      write=True,
                                      fields=fields)
        for r in records:
            dataSource.appendRecord(list(r))

        # Create the network and get region instances.
        net = Network()
        net.addRegion("sensor", "py.RecordSensor", "{'numCategories': 3}")
        net.addRegion("classifier", "py.SDRClassifierRegion",
                      "{steps: '0', alpha: 0.001, implementation: 'py'}")
        net.link("sensor",
                 "classifier",
                 "UniformLink",
                 "",
                 srcOutput="dataOut",
                 destInput="bottomUpIn")
        net.link("sensor",
                 "classifier",
                 "UniformLink",
                 "",
                 srcOutput="categoryOut",
                 destInput="categoryIn")
        sensor = net.regions["sensor"]
        classifier = net.regions["classifier"]

        # Setup sensor region encoder and data stream.
        dataSource.close()
        dataSource = FileRecordStream(filename)
        sensorRegion = sensor.getSelf()
        sensorRegion.encoder = MultiEncoder()
        sensorRegion.encoder.addEncoder(
            "value", ScalarEncoder(21, 0.0, 13.0, n=256, name="value"))
        sensorRegion.dataSource = dataSource

        # Get ready to run.
        net.initialize()

        # Train the network (by default learning is ON in the classifier, but assert
        # anyway) and then turn off learning and turn on inference mode.
        self.assertEqual(classifier.getParameter("learningMode"), 1)
        net.run(8)

        # Test the network on the same data as it trained on; should classify with
        # 100% accuracy.
        classifier.setParameter("inferenceMode", 1)
        classifier.setParameter("learningMode", 0)

        # Assert learning is OFF and that the classifier learned the dataset.
        self.assertEqual(classifier.getParameter("learningMode"), 0,
                         "Learning mode is not turned off.")
        self.assertEqual(classifier.getParameter("inferenceMode"), 1,
                         "Inference mode is not turned on.")

        # make sure we can access all the parameters with getParameter
        self.assertEqual(classifier.getParameter("maxCategoryCount"), 2000)
        self.assertAlmostEqual(float(classifier.getParameter("alpha")), 0.001)
        self.assertEqual(int(classifier.getParameter("steps")), 0)
        self.assertTrue(classifier.getParameter("implementation") == "py")
        self.assertEqual(classifier.getParameter("verbosity"), 0)

        expectedCats = (
            [0.0],
            [1.0],
            [0.0],
            [1.0],
            [0.0],
            [1.0],
            [0.0],
            [1.0],
        )
        dataSource.rewind()
        for i in xrange(8):
            net.run(1)
            inferredCats = classifier.getOutputData("categoriesOut")
            self.assertSequenceEqual(
                expectedCats[i], inferredCats.tolist(),
                "Classififer did not infer expected category "
                "for record number {}.".format(i))
        # Close data stream, delete file.
        dataSource.close()
        os.remove(filename)
Beispiel #4
0
  def testSimpleMulticlassNetwork(self):
  
    # Setup data record stream of fake data (with three categories)
    filename = _getTempFileName()
    fields = [("timestamp", "datetime", "T"),
              ("value", "float", ""),
              ("reset", "int", "R"),
              ("sid", "int", "S"),
              ("categories", "list", "C")]
    records = (
      [datetime(day=1, month=3, year=2010), 0.0, 1, 0, ""],
      [datetime(day=2, month=3, year=2010), 1.0, 0, 0, "1 2"],
      [datetime(day=3, month=3, year=2010), 1.0, 0, 0, "1 2"],
      [datetime(day=4, month=3, year=2010), 2.0, 0, 0, "0"],
      [datetime(day=5, month=3, year=2010), 3.0, 0, 0, "1 2"],
      [datetime(day=6, month=3, year=2010), 5.0, 0, 0, "1 2"],
      [datetime(day=7, month=3, year=2010), 8.0, 0, 0, "0"],
      [datetime(day=8, month=3, year=2010), 13.0, 0, 0, "1 2"])
    dataSource = FileRecordStream(streamID=filename, write=True, fields=fields)
    for r in records:
      dataSource.appendRecord(list(r))

    # Create the network and get region instances.
    net = Network()
    net.addRegion("sensor", "py.RecordSensor", "{'numCategories': 3}")
    net.addRegion("classifier","py.KNNClassifierRegion",
                  "{'k': 2,'distThreshold': 0,'maxCategoryCount': 3}")
    net.link("sensor", "classifier", "UniformLink", "",
             srcOutput = "dataOut", destInput = "bottomUpIn")
    net.link("sensor", "classifier", "UniformLink", "",
             srcOutput = "categoryOut", destInput = "categoryIn")
    sensor = net.regions["sensor"]
    classifier = net.regions["classifier"]
    
    # Setup sensor region encoder and data stream.
    dataSource.close()
    dataSource = FileRecordStream(filename)
    sensorRegion = sensor.getSelf()
    sensorRegion.encoder = MultiEncoder()
    sensorRegion.encoder.addEncoder(
        "value", ScalarEncoder(21, 0.0, 13.0, n=256, name="value"))
    sensorRegion.dataSource = dataSource
    
    # Get ready to run.
    net.initialize()

    # Train the network (by default learning is ON in the classifier, but assert
    # anyway) and then turn off learning and turn on inference mode.
    self.assertEqual(classifier.getParameter("learningMode"), 1)
    net.run(8)
    classifier.setParameter("inferenceMode", 1)
    classifier.setParameter("learningMode", 0)

    # Assert learning is OFF and that the classifier learned the dataset.
    self.assertEqual(classifier.getParameter("learningMode"), 0,
        "Learning mode is not turned off.")
    self.assertEqual(classifier.getParameter("inferenceMode"), 1,
        "Inference mode is not turned on.")
    self.assertEqual(classifier.getParameter("categoryCount"), 3,
        "The classifier should count three total categories.")
    # classififer learns 12 patterns b/c there are 12 categories amongst the
    # records:
    self.assertEqual(classifier.getParameter("patternCount"), 12,
        "The classifier should've learned 12 samples in total.")

    # Test the network on the same data as it trained on; should classify with
    # 100% accuracy.
    expectedCats = ([0.0, 0.5, 0.5],
                    [0.0, 0.5, 0.5],
                    [0.0, 0.5, 0.5],
                    [0.5, 0.5, 0.0],
                    [0.0, 0.5, 0.5],
                    [0.0, 0.5, 0.5],
                    [0.5, 0.5, 0.0],
                    [0.0, 0.5, 0.5])
    dataSource.rewind()
    for i in xrange(8):
      net.run(1)
      inferredCats = classifier.getOutputData("categoriesOut")
      self.assertSequenceEqual(expectedCats[i], inferredCats.tolist(),
          "Classififer did not infer expected category probabilites for record "
          "number {}.".format(i))
    
    # Close data stream, delete file.
    dataSource.close()
    os.remove(filename)
Beispiel #5
0
    def testSimpleMulticlassNetwork(self):

        # Setup data record stream of fake data (with three categories)
        filename = _getTempFileName()
        fields = [("timestamp", "datetime", "T"), ("value", "float", ""),
                  ("reset", "int", "R"), ("sid", "int", "S"),
                  ("categories", "list", "C")]
        records = ([datetime(day=1, month=3, year=2010), 0.0, 1, 0, ""], [
            datetime(day=2, month=3, year=2010), 1.0, 0, 0, "1 2"
        ], [datetime(day=3, month=3, year=2010), 1.0, 0, 0,
            "1 2"], [datetime(day=4, month=3, year=2010), 2.0, 0, 0, "0"], [
                datetime(day=5, month=3, year=2010), 3.0, 0, 0, "1 2"
            ], [datetime(day=6, month=3, year=2010), 5.0, 0, 0,
                "1 2"], [datetime(day=7, month=3, year=2010), 8.0, 0, 0, "0"],
                   [datetime(day=8, month=3, year=2010), 13.0, 0, 0, "1 2"])
        dataSource = FileRecordStream(streamID=filename,
                                      write=True,
                                      fields=fields)
        for r in records:
            dataSource.appendRecord(list(r))

        # Create the network and get region instances.
        net = Network()
        net.addRegion("sensor", "py.RecordSensor", "{'numCategories': 3}")
        net.addRegion("classifier", "py.KNNClassifierRegion",
                      "{'k': 2,'distThreshold': 0,'maxCategoryCount': 3}")
        net.link("sensor",
                 "classifier",
                 "UniformLink",
                 "",
                 srcOutput="dataOut",
                 destInput="bottomUpIn")
        net.link("sensor",
                 "classifier",
                 "UniformLink",
                 "",
                 srcOutput="categoryOut",
                 destInput="categoryIn")
        sensor = net.regions["sensor"]
        classifier = net.regions["classifier"]

        # Setup sensor region encoder and data stream.
        dataSource.close()
        dataSource = FileRecordStream(filename)
        sensorRegion = sensor.getSelf()
        sensorRegion.encoder = MultiEncoder()
        sensorRegion.encoder.addEncoder(
            "value", ScalarEncoder(21, 0.0, 13.0, n=256, name="value"))
        sensorRegion.dataSource = dataSource

        # Get ready to run.
        net.initialize()

        # Train the network (by default learning is ON in the classifier, but assert
        # anyway) and then turn off learning and turn on inference mode.
        self.assertEqual(classifier.getParameter("learningMode"), 1)
        net.run(8)
        classifier.setParameter("inferenceMode", 1)
        classifier.setParameter("learningMode", 0)

        # Assert learning is OFF and that the classifier learned the dataset.
        self.assertEqual(classifier.getParameter("learningMode"), 0,
                         "Learning mode is not turned off.")
        self.assertEqual(classifier.getParameter("inferenceMode"), 1,
                         "Inference mode is not turned on.")
        self.assertEqual(
            classifier.getParameter("categoryCount"), 3,
            "The classifier should count three total categories.")
        # classififer learns 12 patterns b/c there are 12 categories amongst the
        # records:
        self.assertEqual(
            classifier.getParameter("patternCount"), 12,
            "The classifier should've learned 12 samples in total.")

        # Test the network on the same data as it trained on; should classify with
        # 100% accuracy.
        expectedCats = ([0.0, 0.5, 0.5], [0.0, 0.5, 0.5], [0.0, 0.5, 0.5],
                        [0.5, 0.5, 0.0], [0.0, 0.5,
                                          0.5], [0.0, 0.5,
                                                 0.5], [0.5, 0.5,
                                                        0.0], [0.0, 0.5, 0.5])
        dataSource.rewind()
        for i in xrange(8):
            net.run(1)
            inferredCats = classifier.getOutputData("categoriesOut")
            self.assertSequenceEqual(
                expectedCats[i], inferredCats.tolist(),
                "Classififer did not infer expected category probabilites for record "
                "number {}.".format(i))

        # Close data stream, delete file.
        dataSource.close()
        os.remove(filename)
Beispiel #6
0
class Sensor(Node):
    """
	A super class only to group properties related to sensors.
	"""

    #region Constructor

    def __init__(self, name):
        """
		Initializes a new instance of this class.
		"""

        Node.__init__(self, name, NodeType.sensor)

        #region Instance fields

        self.bits = []
        """An array of the bit objects that compose the current output of this node."""

        self.dataSource = None
        """Data source which provides records to fed into a region."""

        self.dataSourceType = DataSourceType.file
        """Type of the data source (File or Database)"""

        self.fileName = ''
        """The input file name to be handled. Returns the input file name only if it is in the project directory, full path otherwise."""

        self.databaseConnectionString = ""
        """Connection string of the database."""

        self.databaseTable = ''
        """Target table of the database."""

        self.encoder = None
        """Multi-encoder which concatenate sub-encodings to convert raw data to htm input and vice-versa."""

        self.encodings = []
        """List of sub-encodings that handles the input from database"""

        self.predictionsMethod = PredictionsMethod.reconstruction
        """Method used to get predicted values and their probabilities."""

        self.enableClassificationLearning = True
        """Switch for classification learning"""

        self.enableClassificationInference = True
        """Switch for classification inference"""

        #endregion

        #region Statistics properties

        self.statsPrecisionRate = 0.

        #endregion

    #endregion

    #region Methods

    def getBit(self, x, y):
        """
		Return the bit located at given position
		"""

        bit = self.bits[(y * self.width) + x]

        return bit

    def initialize(self):
        """
		Initialize this node.
		"""

        Node.initialize(self)

        # Initialize input bits
        self.bits = []
        for x in range(self.width):
            for y in range(self.height):
                bit = Bit()
                bit.x = x
                bit.y = y
                self.bits.append(bit)

        if self.dataSourceType == DataSourceType.file:
            """
			Initialize this node opening the file and place cursor on the first record.
			"""

            # If file name provided is a relative path, use project file path
            if self.fileName != '' and os.path.dirname(self.fileName) == '':
                fullFileName = os.path.dirname(
                    Global.project.fileName) + '/' + self.fileName
            else:
                fullFileName = self.fileName

            # Check if file really exists
            if not os.path.isfile(fullFileName):
                QtGui.QMessageBox.warning(
                    None, "Warning", "Input stream file '" + fullFileName +
                    "' was not found or specified.", QtGui.QMessageBox.Ok)
                return

            # Create a data source for read the file
            self.dataSource = FileRecordStream(fullFileName)

        elif self.dataSourceType == DataSourceType.database:
            pass

        self.encoder = MultiEncoder()
        for encoding in self.encodings:
            encoding.initialize()

            # Create an instance class for an encoder given its module, class and constructor params
            encoding.encoder = getInstantiatedClass(encoding.encoderModule,
                                                    encoding.encoderClass,
                                                    encoding.encoderParams)

            # Take the first part of encoder field name as encoder name
            # Ex: timestamp_weekend.weekend => timestamp_weekend
            encoding.encoder.name = encoding.encoderFieldName.split('.')[0]

            # Add sub-encoder to multi-encoder list
            self.encoder.addEncoder(encoding.dataSourceFieldName,
                                    encoding.encoder)

        # If encoder size is not the same to sensor size then throws exception
        encoderSize = self.encoder.getWidth()
        sensorSize = self.width * self.height
        if encoderSize > sensorSize:
            QtGui.QMessageBox.warning(
                None, "Warning",
                "'" + self.name + "': Encoder size (" + str(encoderSize) +
                ") is different from sensor size (" + str(self.width) + " x " +
                str(self.height) + " = " + str(sensorSize) + ").",
                QtGui.QMessageBox.Ok)
            return

        return True

    def nextStep(self):
        """
		Performs actions related to time step progression.
		"""

        # Update states machine by remove the first element and add a new element in the end
        for encoding in self.encodings:
            encoding.currentValue.rotate()
            if encoding.enableInference:
                encoding.predictedValues.rotate()
                encoding.bestPredictedValue.rotate()

        Node.nextStep(self)
        for bit in self.bits:
            bit.nextStep()

        # Get record value from data source
        # If the last record was reached just rewind it
        data = self.dataSource.getNextRecordDict()
        if not data:
            self.dataSource.rewind()
            data = self.dataSource.getNextRecordDict()

        # Pass raw values to encoder and get a concatenated array
        outputArray = numpy.zeros(self.encoder.getWidth())
        self.encoder.encodeIntoArray(data, outputArray)

        # Get values obtained from the data source.
        outputValues = self.encoder.getScalars(data)

        # Get raw values and respective encoded bit array for each field
        prevOffset = 0
        for i in range(len(self.encodings)):
            encoding = self.encodings[i]

            # Convert the value to its respective data type
            currValue = outputValues[i]
            if encoding.encoderFieldDataType == FieldDataType.boolean:
                currValue = bool(currValue)
            elif encoding.encoderFieldDataType == FieldDataType.integer:
                currValue = int(currValue)
            elif encoding.encoderFieldDataType == FieldDataType.decimal:
                currValue = float(currValue)
            elif encoding.encoderFieldDataType == FieldDataType.dateTime:
                currValue = dateutil.parser.parse(str(currValue))
            elif encoding.encoderFieldDataType == FieldDataType.string:
                currValue = str(currValue)
            encoding.currentValue.setForCurrStep(currValue)

        # Update sensor bits
        for i in range(len(outputArray)):
            if outputArray[i] > 0.:
                self.bits[i].isActive.setForCurrStep(True)
            else:
                self.bits[i].isActive.setForCurrStep(False)

        # Mark falsely predicted bits
        for bit in self.bits:
            if bit.isPredicted.atPreviousStep(
            ) and not bit.isActive.atCurrStep():
                bit.isFalselyPredicted.setForCurrStep(True)

        self._output = outputArray

    def getPredictions(self):
        """
		Get the predictions after an iteration.
		"""

        if self.predictionsMethod == PredictionsMethod.reconstruction:

            # Prepare list with predictions to be classified
            # This list contains the indexes of all bits that are predicted
            output = []
            for i in range(len(self.bits)):
                if self.bits[i].isPredicted.atCurrStep():
                    output.append(1)
                else:
                    output.append(0)
            output = numpy.array(output)

            # Decode output and create predictions list
            fieldsDict, fieldsOrder = self.encoder.decode(output)
            for encoding in self.encodings:
                if encoding.enableInference:
                    predictions = []
                    encoding.predictedValues.setForCurrStep(dict())

                    # If encoder field name was returned by decode(), assign the the predictions to it
                    if encoding.encoderFieldName in fieldsOrder:
                        predictedLabels = fieldsDict[
                            encoding.encoderFieldName][1].split(', ')
                        predictedValues = fieldsDict[
                            encoding.encoderFieldName][0]
                        for i in range(len(predictedLabels)):
                            predictions.append(
                                [predictedValues[i], predictedLabels[i]])

                    encoding.predictedValues.atCurrStep()[1] = predictions

                    # Get the predicted value with the biggest probability to happen
                    if len(predictions) > 0:
                        bestPredictionRange = predictions[0][0]
                        min = bestPredictionRange[0]
                        max = bestPredictionRange[1]
                        bestPredictedValue = (min + max) / 2.0
                        encoding.bestPredictedValue.setForCurrStep(
                            bestPredictedValue)

        elif self.predictionsMethod == PredictionsMethod.classification:
            # A classification involves estimate which are the likely values to occurs in the next time step.

            offset = 0
            for encoding in self.encodings:
                encoderWidth = encoding.encoder.getWidth()

                if encoding.enableInference:
                    # Prepare list with predictions to be classified
                    # This list contains the indexes of all bits that are predicted
                    patternNZ = []
                    for i in range(offset, encoderWidth):
                        if self.bits[i].isActive.atCurrStep():
                            patternNZ.append(i)

                    # Get the bucket index of the current value at the encoder
                    actualValue = encoding.currentValue.atCurrStep()
                    bucketIdx = encoding.encoder.getBucketIndices(
                        actualValue)[0]

                    # Perform classification
                    clasResults = encoding.classifier.compute(
                        recordNum=Global.currStep,
                        patternNZ=patternNZ,
                        classification={
                            'bucketIdx': bucketIdx,
                            'actValue': actualValue
                        },
                        learn=self.enableClassificationLearning,
                        infer=self.enableClassificationInference)

                    encoding.predictedValues.setForCurrStep(dict())
                    for step in encoding.steps:

                        # Calculate probability for each predicted value
                        predictions = dict()
                        for (actValue,
                             prob) in zip(clasResults['actualValues'],
                                          clasResults[step]):
                            if actValue in predictions:
                                predictions[actValue] += prob
                            else:
                                predictions[actValue] = prob

                        # Remove predictions with low probabilities
                        maxVal = (None, None)
                        for (actValue, prob) in predictions.items():
                            if len(predictions) <= 1:
                                break
                            if maxVal[0] is None or prob >= maxVal[1]:
                                if maxVal[0] is not None and maxVal[
                                        1] < encoding.minProbabilityThreshold:
                                    del predictions[maxVal[0]]
                                maxVal = (actValue, prob)
                            elif prob < encoding.minProbabilityThreshold:
                                del predictions[actValue]

                        # Sort the list of values from more probable to less probable values
                        # an decrease the list length to max predictions per step limit
                        predictions = sorted(predictions.iteritems(),
                                             key=operator.itemgetter(1),
                                             reverse=True)
                        predictions = predictions[:maxFutureSteps]

                        encoding.predictedValues.atCurrStep(
                        )[step] = predictions

                    # Get the predicted value with the biggest probability to happen
                    bestPredictedValue = encoding.predictedValues.atCurrStep(
                    )[1][0][0]
                    encoding.bestPredictedValue.setForCurrStep(
                        bestPredictedValue)

                offset += encoderWidth

    def calculateStatistics(self):
        """
		Calculate statistics after an iteration.
		"""

        if Global.currStep > 0:
            precision = 0.

            # Calculate the prediction precision comparing if the current value is in the range of any prediction.
            for encoding in self.encodings:
                if encoding.enableInference:
                    predictions = encoding.predictedValues.atPreviousStep()[1]
                    for predictedValue in predictions:
                        min = None
                        max = None
                        value = predictedValue[0]
                        if self.predictionsMethod == PredictionsMethod.reconstruction:
                            min = value[0]
                            max = value[1]
                        elif self.predictionsMethod == PredictionsMethod.classification:
                            min = value
                            max = value
                        if isinstance(
                                min,
                            (int, long, float, complex)) and isinstance(
                                max, (int, long, float, complex)):
                            min = math.floor(min)
                            max = math.ceil(max)
                        if min <= encoding.currentValue.atCurrStep() <= max:
                            precision = 100.
                            break

            # The precision rate is the average of the precision calculated in every step
            self.statsPrecisionRate = (self.statsPrecisionRate + precision) / 2
        else:
            self.statsPrecisionRate = 0.

        for bit in self.bits:
            bit.calculateStatistics()

    #endregion