예제 #1
0
 def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0,
              callsPerSerialize=CALLS_PER_SERIALIZE):
   self._claClassifier = CLAClassifier(steps, alpha, actValueAlpha, verbosity)
   self._fastCLAClassifier = FastCLAClassifier(steps, alpha, actValueAlpha,
                                               verbosity)
   self._calls = 0
   self._callsPerSerialize = callsPerSerialize
예제 #2
0
def initializeClassifiers(Nelements, encoder):
  claClassiiier = CLAClassifier(steps=[0])

  sdrClassifier = SDRClassifier(steps=[0], alpha=0.1)

  patternNZ = list(numpy.where(encoder.encode(Nelements - 1))[0])
  classification = {'bucketIdx': Nelements - 1, 'actValue': Nelements - 1}

  # feed in the pattern with the highest bucket index
  claRetval = claClassiiier.compute(0, patternNZ, classification,
                                    learn=True, infer=True)
  sdrRetval = sdrClassifier.compute(0, patternNZ, classification,
                                    learn=True, infer=True)
  return claClassiiier, sdrClassifier
def initializeClassifiers(Nelements, encoder):
  cla = CLAClassifier(steps=[0])

  nn_classifier = SDRClassifier(steps=[0], alpha=0.1)

  patternNZ = list(numpy.where(encoder.encode(Nelements-1))[0])
  classification = {'bucketIdx': Nelements-1, 'actValue': Nelements-1}

  # feed in the pattern with the highest bucket index
  claRetval = cla.compute(0, patternNZ, classification,
                           learn=True, infer=True)
  nnRetval = nn_classifier.compute(0, patternNZ, classification,
                                    learn=True, infer=True)
  return cla, nn_classifier
예제 #4
0
 def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0,
              callsPerSerialize=CALLS_PER_SERIALIZE):
   self._claClassifier = CLAClassifier(steps, alpha, actValueAlpha, verbosity)
   self._fastCLAClassifier = FastCLAClassifier(steps, alpha, actValueAlpha,
                                               verbosity)
   self._calls = 0
   self._callsPerSerialize = callsPerSerialize
예제 #5
0
 def __init__(self, numFields):
   a=dict()
   t=[]  # target
   super(SpatialPoolerAgent, self).__init__(a, t, listMemFields=["score", "visited"], name='SPlearner')
   self.me['x']=0
   self.me['y']=0
   self.me['steps']=0
   self.enc = UtilEncoder(length=numFields, minval=0, maxval=100, scoreMin=0, scoreMax=100, scoreResolution=0.1)
   self.enc.setEvaluationFn(evalFn)
   # spatial pooler
   self.sp = SP(
       inputDimensions = [self.enc._offset],
       columnDimensions = [1024],
       potentialRadius = 15,
       potentialPct = 0.5,
       globalInhibition = True,
       localAreaDensity = -1.0,
       numActiveColumnsPerInhArea = 5,
       stimulusThreshold=0,
       synPermInactiveDec=0.01,
       synPermActiveInc = 0.1,
       synPermConnected = 0.20,
       minPctOverlapDutyCycle = 0.1,
       minPctActiveDutyCycle = 0.1,
       dutyCyclePeriod = 10,
       maxBoost = 10.0,
       seed = -1,
       spVerbosity = 2,)
   self.cls = Clas() # classifier 
예제 #6
0
class CLAClassifierDiff(object):
  """Classifier-like object that diffs the output from different classifiers.

  Instances of each version of the CLA classifier are created and each call to
  compute is passed to each version of the classifier. The results are diffed
  to make sure the there are no differences.

  Optionally, the classifiers can be serialized and deserialized after a
  specified number of calls to compute to ensure that serialization does not
  cause discrepencies between the results.

  TODO: Check internal state as well.
  TODO: Provide option to write output to a file.
  TODO: Provide record differences without throwing an exception.
  """


  __VERSION__ = 'CLAClassifierDiffV1'


  def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0,
               callsPerSerialize=CALLS_PER_SERIALIZE):
    self._claClassifier = CLAClassifier(steps, alpha, actValueAlpha, verbosity)
    self._fastCLAClassifier = FastCLAClassifier(steps, alpha, actValueAlpha,
                                                verbosity)
    self._calls = 0
    self._callsPerSerialize = callsPerSerialize


  def compute(self, recordNum, patternNZ, classification, learn, infer):
    result1 = self._claClassifier.compute(recordNum, patternNZ, classification,
                                          learn, infer)
    result2 = self._fastCLAClassifier.compute(recordNum, patternNZ,
                                              classification, learn, infer)
    self._calls += 1
    # Check if it is time to serialize and deserialize.
    if self._calls % self._callsPerSerialize == 0:
      self._claClassifier = pickle.loads(pickle.dumps(self._claClassifier))
      self._fastCLAClassifier = pickle.loads(pickle.dumps(
          self._fastCLAClassifier))
    # Assert both results are the same type.
    assert type(result1) == type(result2)
    # Assert that the keys match.
    assert set(result1.keys()) == set(result2.keys()), "diff detected: " \
      "py result=%s, C++ result=%s" % (result1, result2)
    # Assert that the values match.
    for k, l in result1.iteritems():
      assert type(l) == type(result2[k])
      for i in xrange(len(l)):
        if isinstance(classification['actValue'], numbers.Real):
          assert abs(float(l[i]) - float(result2[k][i])) < 0.0000001, (
              'Python CLAClassifier has value %f and C++ FastCLAClassifier has '
              'value %f.' % (l[i], result2[k][i]))
        else:
          assert l[i] == result2[k][i], (
              'Python CLAClassifier has value %s and C++ FastCLAClassifier has '
              'value %s.' % (str(l[i]), str(result2[k][i])))
    return result1
예제 #7
0
class CLAClassifierDiff(object):
  """Classifier-like object that diffs the output from different classifiers.

  Instances of each version of the CLA classifier are created and each call to
  compute is passed to each version of the classifier. The results are diffed
  to make sure the there are no differences.

  Optionally, the classifiers can be serialized and deserialized after a
  specified number of calls to compute to ensure that serialization does not
  cause discrepencies between the results.

  TODO: Check internal state as well.
  TODO: Provide option to write output to a file.
  TODO: Provide record differences without throwing an exception.
  """


  __VERSION__ = 'CLAClassifierDiffV1'


  def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0,
               callsPerSerialize=CALLS_PER_SERIALIZE):
    self._claClassifier = CLAClassifier(steps, alpha, actValueAlpha, verbosity)
    self._fastCLAClassifier = FastCLAClassifier(steps, alpha, actValueAlpha,
                                                verbosity)
    self._calls = 0
    self._callsPerSerialize = callsPerSerialize


  def compute(self, recordNum, patternNZ, classification, learn, infer):
    result1 = self._claClassifier.compute(recordNum, patternNZ, classification,
                                          learn, infer)
    result2 = self._fastCLAClassifier.compute(recordNum, patternNZ,
                                              classification, learn, infer)
    self._calls += 1
    # Check if it is time to serialize and deserialize.
    if self._calls % self._callsPerSerialize == 0:
      self._claClassifier = pickle.loads(pickle.dumps(self._claClassifier))
      self._fastCLAClassifier = pickle.loads(pickle.dumps(
          self._fastCLAClassifier))
    # Assert both results are the same type.
    assert type(result1) == type(result2)
    # Assert that the keys match.
    assert set(result1.keys()) == set(result2.keys()), "diff detected: " \
      "py result=%s, C++ result=%s" % (result1, result2)
    # Assert that the values match.
    for k, l in result1.iteritems():
      assert type(l) == type(result2[k])
      for i in xrange(len(l)):
        if isinstance(classification['actValue'], numbers.Real):
          assert abs(float(l[i]) - float(result2[k][i])) < 0.0000001, (
              'Python CLAClassifier has value %f and C++ FastCLAClassifier has '
              'value %f.' % (l[i], result2[k][i]))
        else:
          assert l[i] == result2[k][i], (
              'Python CLAClassifier has value %s and C++ FastCLAClassifier has '
              'value %s.' % (str(l[i]), str(result2[k][i])))
    return result1
예제 #8
0
  def testWriteRead(self):
    c1 = CLAClassifier([1], 0.1, 0.1, 0)

    # Create a vector of input bit indices
    input1 = [1, 5, 9]
    result = c1.compute(recordNum=0,
                        patternNZ=input1,
                        classification={'bucketIdx': 4, 'actValue': 34.7},
                        learn=True, infer=True)

    proto1 = ClaClassifier_capnp.ClaClassifierProto.new_message()
    c1.write(proto1)

    # Write the proto to a temp file and read it back into a new proto
    with tempfile.TemporaryFile() as f:
      proto1.write(f)
      f.seek(0)
      proto2 = ClaClassifier_capnp.ClaClassifierProto.read(f)

    # Load the deserialized proto
    c2 = CLAClassifier.read(proto2)

    self.assertEqual(c1.steps, c2.steps)
    self.assertAlmostEqual(c1.alpha, c2.alpha)
    self.assertAlmostEqual(c1.actValueAlpha, c2.actValueAlpha)
    self.assertEqual(c1._learnIteration, c2._learnIteration)
    self.assertEqual(c1._recordNumMinusLearnIteration, c2._recordNumMinusLearnIteration)
    self.assertEqual(c1._patternNZHistory, c2._patternNZHistory)
    self.assertEqual(c1._activeBitHistory.keys(), c2._activeBitHistory.keys())
    for bit, nSteps in c1._activeBitHistory.keys():
      c1BitHistory = c1._activeBitHistory[(bit, nSteps)]
      c2BitHistory = c2._activeBitHistory[(bit, nSteps)]
      self.assertEqual(c1BitHistory._id, c2BitHistory._id)
      self.assertEqual(c1BitHistory._stats, c2BitHistory._stats)
      self.assertEqual(c1BitHistory._lastTotalUpdate, c2BitHistory._lastTotalUpdate)
      self.assertEqual(c1BitHistory._learnIteration, c2BitHistory._learnIteration)
    self.assertEqual(c1._maxBucketIdx, c2._maxBucketIdx)
    self.assertEqual(len(c1._actualValues), len(c2._actualValues))
    for i in xrange(len(c1._actualValues)):
      self.assertAlmostEqual(c1._actualValues[i], c2._actualValues[i], 5)
    self.assertEqual(c1._version, c2._version)
    self.assertEqual(c1.verbosity, c2.verbosity)

    result1 = c1.compute(recordNum=1,
                         patternNZ=input1,
                         classification={'bucketIdx': 4, 'actValue': 34.7},
                         learn=True, infer=True)
    result2 = c2.compute(recordNum=1,
                         patternNZ=input1,
                         classification={'bucketIdx': 4, 'actValue': 34.7},
                         learn=True, infer=True)

    self.assertEqual(result1.keys(), result2.keys())
    for key in result1.keys():
      for i in xrange(len(c1._actualValues)):
        self.assertAlmostEqual(result1[key][i], result2[key][i], 5)
예제 #9
0
    def __init__(self, params):
        """

        :param params: A dict of modelParams in the format
         {'clParams':{'alpha':float,'steps':'1,2,3'},
          'sensorParams':{'encoders':{}
        """

        modelParams = params['modelParams']
        self._encoders = {
            field: getattr(nupic.encoders, args['type'])(**dict(
                (arg, val) for arg, val in args.items()
                if arg not in ['type', 'fieldname']))
            for field, args in modelParams['sensorParams']['encoders'].items()
            if args is not None
        }

        self.predicted_field = modelParams['predictedField']
        modelParams['spParams']['inputWidth'] = sum(
            map(lambda x: x.getWidth(), self._encoders.values()))
        self.sp = SpatialPooler(**modelParams['spParams'])
        self.sp.initialize(None, None)
        self.tm = TemporalMemory(**modelParams['tpParams'])
        self.tm.initialize(None, None)
        self.classifier = CLAClassifier(**modelParams['clParams'])

        self.spOutputs = {
            'bottomUpOut':
            np.zeros(modelParams['spParams']['columnCount'], dtype=np.float32),
            'anomalyScore':
            np.zeros(modelParams['spParams']['columnCount'], dtype=np.float32)
        }
        self.tmOutputs = {
            'bottomUpOut':
            np.zeros(modelParams['tpParams']['columnCount'] *
                     modelParams['tpParams']['cellsPerColumn'],
                     dtype=np.float32)
        }

        self.recordNum = 0
예제 #10
0
 def create(*args, **kwargs):
   impl = kwargs.pop('implementation', None)
   if impl is None:
     impl = Configuration.get('nupic.opf.claClassifier.implementation')
   if impl == 'py':
     return CLAClassifier(*args, **kwargs)
   elif impl == 'cpp':
     return FastCLAClassifier(*args, **kwargs)
   elif impl == 'diff':
     return CLAClassifierDiff(*args, **kwargs)
   else:
     raise ValueError('Invalid classifier implementation (%r). Value must be '
                      '"py" or "cpp".' % impl)
예제 #11
0
 def read(proto):
   """
   proto: CLAClassifierRegionProto capnproto object
   """
   impl = proto.classifierImp
   if impl == 'py':
     return CLAClassifier.read(proto.claClassifier)
   elif impl == 'cpp':
     return FastCLAClassifier.read(proto.claClassifier)
   elif impl == 'diff':
     raise NotImplementedError("CLAClassifierDiff.read not implemented")
   else:
     raise ValueError('Invalid classifier implementation (%r). Value must be '
                      '"py" or "cpp".' % impl)
예제 #12
0
 def read(proto):
     """
 proto: CLAClassifierRegionProto capnproto object
 """
     impl = proto.classifierImp
     if impl == 'py':
         return CLAClassifier.read(proto.claClassifier)
     elif impl == 'cpp':
         return FastCLAClassifier.read(proto.claClassifier)
     elif impl == 'diff':
         raise NotImplementedError("CLAClassifierDiff.read not implemented")
     else:
         raise ValueError(
             'Invalid classifier implementation (%r). Value must be '
             '"py" or "cpp".' % impl)
예제 #13
0
  def testWriteRead(self):
    c1 = CLAClassifier([1], 0.1, 0.1, 0)

    # Create a vector of input bit indices
    input1 = [1, 5, 9]
    result = c1.compute(recordNum=0,
                        patternNZ=input1,
                        classification={'bucketIdx': 4, 'actValue': 34.7},
                        learn=True, infer=True)

    proto1 = ClaClassifier_capnp.ClaClassifierProto.new_message()
    c1.write(proto1)

    # Write the proto to a temp file and read it back into a new proto
    with tempfile.TemporaryFile() as f:
      proto1.write(f)
      f.seek(0)
      proto2 = ClaClassifier_capnp.ClaClassifierProto.read(f)

    # Load the deserialized proto
    c2 = CLAClassifier.read(proto2)

    self.assertEqual(c1.steps, c2.steps)
    self.assertAlmostEqual(c1.alpha, c2.alpha)
    self.assertAlmostEqual(c1.actValueAlpha, c2.actValueAlpha)
    self.assertEqual(c1._learnIteration, c2._learnIteration)
    self.assertEqual(c1._recordNumMinusLearnIteration, c2._recordNumMinusLearnIteration)
    self.assertEqual(c1._patternNZHistory, c2._patternNZHistory)
    self.assertEqual(c1._activeBitHistory.keys(), c2._activeBitHistory.keys())
    for bit, nSteps in c1._activeBitHistory.keys():
      c1BitHistory = c1._activeBitHistory[(bit, nSteps)]
      c2BitHistory = c2._activeBitHistory[(bit, nSteps)]
      self.assertEqual(c1BitHistory._id, c2BitHistory._id)
      self.assertEqual(c1BitHistory._stats, c2BitHistory._stats)
      self.assertEqual(c1BitHistory._lastTotalUpdate, c2BitHistory._lastTotalUpdate)
      self.assertEqual(c1BitHistory._learnIteration, c2BitHistory._learnIteration)
    self.assertEqual(c1._maxBucketIdx, c2._maxBucketIdx)
    self.assertEqual(len(c1._actualValues), len(c2._actualValues))
    for i in xrange(len(c1._actualValues)):
      self.assertAlmostEqual(c1._actualValues[i], c2._actualValues[i], 5)
    self.assertEqual(c1._version, c2._version)
    self.assertEqual(c1.verbosity, c2.verbosity)

    result1 = c1.compute(recordNum=1,
                         patternNZ=input1,
                         classification={'bucketIdx': 4, 'actValue': 34.7},
                         learn=True, infer=True)
    result2 = c2.compute(recordNum=1,
                         patternNZ=input1,
                         classification={'bucketIdx': 4, 'actValue': 34.7},
                         learn=True, infer=True)

    self.assertEqual(result1.keys(), result2.keys())
    for key in result1.keys():
      for i in xrange(len(c1._actualValues)):
        self.assertAlmostEqual(result1[key][i], result2[key][i], 5)
예제 #14
0
    def initialize(self):
        """
    Initialize this node.
    """

        # Create Classifier instance with appropriate parameters
        self.minProbabilityThreshold = 0.0001
        self.steps = []
        for step in range(maxFutureSteps):
            self.steps.append(step + 1)
        self.classifier = CLAClassifier(steps=self.steps)

        # Increase history according to inference flag
        if self.enableInference:
            maxLen = maxPreviousStepsWithInference
            self.bestPredictedValue = MachineState(0, maxLen)
        else:
            maxLen = maxPreviousSteps
        self.currentValue = MachineState(0, maxLen)
예제 #15
0
class SpatialPoolerAgent(Agent):
  """ agent that uses CAM (content-addresable memory; 
      uses SpatialPooler to make abstractions and generalizations of inputs to learn actions. 
      Can be trained in both supervised and unsupervised ways. 
      Uses utility encoder with feedback = 1, to remember 1 step -> start={stateA, actionA} , score(start)==score after applying actionA"""

  
  def __init__(self, numFields):
    a=dict()
    t=[]  # target
    super(SpatialPoolerAgent, self).__init__(a, t, listMemFields=["score", "visited"], name='SPlearner')
    self.me['x']=0
    self.me['y']=0
    self.me['steps']=0
    self.enc = UtilEncoder(length=numFields, minval=0, maxval=100, scoreMin=0, scoreMax=100, scoreResolution=0.1)
    self.enc.setEvaluationFn(evalFn)
    # spatial pooler
    self.sp = SP(
        inputDimensions = [self.enc._offset],
        columnDimensions = [1024],
        potentialRadius = 15,
        potentialPct = 0.5,
        globalInhibition = True,
        localAreaDensity = -1.0,
        numActiveColumnsPerInhArea = 5,
        stimulusThreshold=0,
        synPermInactiveDec=0.01,
        synPermActiveInc = 0.1,
        synPermConnected = 0.20,
        minPctOverlapDutyCycle = 0.1,
        minPctActiveDutyCycle = 0.1,
        dutyCyclePeriod = 10,
        maxBoost = 10.0,
        seed = -1,
        spVerbosity = 2,)
    self.cls = Clas() # classifier 


  def testSP(self):
    dataSize = 5
    totalPatterns = 5
    patterns=[]
    for i in xrange(0,totalPatterns):
      patterns.append(numpy.random.randint(0,2,dataSize).tolist()) # generate input patterns

    # SP learn patterns
    for _ in xrange(0,10): #learn-repeate 
     for pattern in patterns:
      ret = numpy.zeros(1024)
      print "input=", pattern
      enc = self.enc.encode(pattern)
      print "encoded=",enc
      self.sp.compute(enc,True, ret)
      nz = numpy.nonzero(ret)[0].tolist()
      print len(nz)
      score = self.enc.getScoreIN(pattern)
      buckets = self.enc.getBucketIndices({'simpleUtility' : pattern,'utility' : score})
      print self.enc.getScalarNames(), buckets
      print self.cls.compute(recordNum=1, patternNZ=nz, classification={'bucketIdx': buckets[0], 'actValue': score }, learn=True, infer=True)

    print "Test"
    for pattern in patterns:
      ret = numpy.zeros(1024)
      enc = self.enc.encode(pattern)
      self.sp.compute(enc,False, ret)
      nz = numpy.nonzero(ret)[0].tolist()
      score = self.enc.getScoreIN(pattern)
      buckets = self.enc.getBucketIndices({'simpleUtility' : pattern,'utility' : score})
      print self.cls.compute(recordNum=1, patternNZ=nz, classification={'bucketIdx': buckets[0], 'actValue': None }, learn=False, infer=True)
예제 #16
0
파일: snippet.py 프로젝트: szabo92/gistable
                       maxBoost=10.0,
                       seed=42,
                       spVerbosity=0)

    tm = TemporalMemory(columnDimensions=(20, ),
                        cellsPerColumn=(6),
                        initialPermanence=0.2,
                        connectedPermanence=0.8,
                        minThreshold=5,
                        maxNewSynapseCount=6,
                        permanenceDecrement=0.1,
                        permanenceIncrement=0.1,
                        activationThreshold=4)

    classifier = CLAClassifier(steps=[1],
                               alpha=0.1,
                               actValueAlpha=0.3,
                               verbosity=0)

    sp.printParameters()
    print ""

    layer = Layer(encoder, sp, tm, classifier)

    firstWeek = 0

    i = 1
    for x in range(2000):
        if i == 1:
            tm.reset()
            if firstWeek == 0 and layer.getWeeksAnomaly(
            ) > 0 and layer.getWeeksAnomaly() < 7.0:
예제 #17
0
	def initialize(self):
		"""
		Initialize this node.
		"""

		Node.initialize(self)

		# Initialize input bits
		self.bits = []
		for x in range(self.width):
			for y in range(self.height):
				bit = Bit()
				bit.x = x
				bit.y = y
				self.bits.append(bit)

		if self.dataSourceType == DataSourceType.file:
			"""
			Initialize this node opening the file and place cursor on the first record.
			"""

			# If file name provided is a relative path, use project file path
			if self.fileName != '' and os.path.dirname(self.fileName) == '':
				fullFileName = os.path.dirname(Global.project.fileName) + '/' + self.fileName
			else:
				fullFileName = self.fileName

			# Open file
			if not os.path.isfile(fullFileName):
				QtGui.QMessageBox.warning(None, "Warning", "Input stream file '" + fullFileName + "' was not found or specified.", QtGui.QMessageBox.Ok)
				return

			if self.inputFormat == InputFormat.htm:
				self._file = open(fullFileName, "rb")

				# Get dimensions of the record
				width = 0
				height = 0
				character = 0
				while True:
					# Read next character
					character = self._file.read(1)

					# Check if character is 'return' and not a number, i.e. if the first record was read
					if character == '\r':
						character = self._file.read(1)
					if character == '\n':
						break

					# Pass over the line until find a 'return' character in order to get the width
					width = 0
					while character != '\n':
						width += 1
						character = self._file.read(1)
						if character == '\r':
							character = self._file.read(1)

					# Increments height
					height += 1

				# If current file record dimensions is not the same to sensor size then throws exception
				if self.width != width or self.height != height:
					QtGui.QMessageBox.warning(None, "Warning", "'" + self.name + "': File input size (" + width + " x " + height + ") is different from sensor size (" + self.width + " x " + self.height + ").", QtGui.QMessageBox.Ok)
					return

				# Put the pointer back to initial position
				self._file.seek(0)
			elif self.inputFormat == InputFormat.raw:
				self._file = open(fullFileName)

				# Create an instance class for an encoder given its module, class and constructor params
				self.encoder = getInstantiatedClass(self.encoderModule, self.encoderClass, self.encoderParams)

				# If encoder size is not the same to sensor size then throws exception
				encoderSize = self.encoder.getWidth()
				sensorSize = self.width * self.height
				if encoderSize > sensorSize:
					QtGui.QMessageBox.warning(None, "Warning", "'" + self.name + "': Encoder size (" + str(encoderSize) + ") is different from sensor size (" + str(self.width) + " x " + str(self.height) + " = " + str(sensorSize) + ").", QtGui.QMessageBox.Ok)
					return

		elif self.dataSourceType == DataSourceType.database:
			pass

		# Create Classifier instance with appropriate parameters
		self.minProbabilityThreshold = 0.0001
		self.steps = []
		for step in range(maxFutureSteps):
			self.steps.append(step+1)
		self.classifier = CLAClassifier(steps=self.steps)
예제 #18
0
class Sensor(Node):
	"""
	A super class only to group properties related to sensors.
	"""

	#region Constructor

	def __init__(self, parentNode, name):
		"""
		Initializes a new instance of this class.
		"""

		Node.__init__(self, parentNode, name, NodeType.sensor)

		#region Instance fields

		self.bits = []
		"""An array of the bit objects that compose the current output of this node."""

		self.dataSourceType = DataSourceType.file
		"""Type of the data source (File or Database)"""

		self.fileName = ''
		"""The input file name to be handled. Returns the input file name only if it is in the project directory, full path otherwise."""

		self._file = None
		"""File stream to handle the file."""

		self.databaseConnectionString = ""
		"""Connection string of the database."""

		self.databaseTable = ''
		"""Target table of the database."""

		self.databaseField = ''
		"""Target field of the database table."""

		self.inputFormat = InputFormat.htm
		"""Format of the node (HTM or raw data)"""

		self.inputRawDataType = InputRawDataType.string
		"""Data type of the raw input"""

		self.encoder = None
		"""Optional encoder to convert raw data to htm input and vice-versa."""

		self.encoderModule = ""
		"""Module name which encoder class is imported."""

		self.encoderClass = ""
		"""Class name which encode or decode values."""

		self.encoderParams = ""
		"""Parameters passed to the encoder class constructor."""

		self.predictionsMethod = PredictionsMethod.reconstruction
		"""Method used to get predicted values and their probabilities."""

		self.enableClassificationLearning = True
		"""Switch for classification learning"""

		self.enableClassificationInference = True
		"""Switch for classification inference"""

		self.currentValue = [None] * maxPreviousSteps
		"""Raw value encoded to network."""

		self.predictedValues = [None] * maxPreviousSteps
		"""Raw value decoded from network."""

		#endregion

		#region Statistics properties

		self.statsPrecisionRate = 0.

		#endregion

	#endregion

	#region Methods

	def getBit(self, x, y):
		"""
		Return the bit located at given position
		"""

		bit = self.bits[(y * self.width) + x]

		return bit

	def initialize(self):
		"""
		Initialize this node.
		"""

		Node.initialize(self)

		# Initialize input bits
		self.bits = []
		for x in range(self.width):
			for y in range(self.height):
				bit = Bit()
				bit.x = x
				bit.y = y
				self.bits.append(bit)

		if self.dataSourceType == DataSourceType.file:
			"""
			Initialize this node opening the file and place cursor on the first record.
			"""

			# If file name provided is a relative path, use project file path
			if self.fileName != '' and os.path.dirname(self.fileName) == '':
				fullFileName = os.path.dirname(Global.project.fileName) + '/' + self.fileName
			else:
				fullFileName = self.fileName

			# Open file
			if not os.path.isfile(fullFileName):
				QtGui.QMessageBox.warning(None, "Warning", "Input stream file '" + fullFileName + "' was not found or specified.", QtGui.QMessageBox.Ok)
				return

			if self.inputFormat == InputFormat.htm:
				self._file = open(fullFileName, "rb")

				# Get dimensions of the record
				width = 0
				height = 0
				character = 0
				while True:
					# Read next character
					character = self._file.read(1)

					# Check if character is 'return' and not a number, i.e. if the first record was read
					if character == '\r':
						character = self._file.read(1)
					if character == '\n':
						break

					# Pass over the line until find a 'return' character in order to get the width
					width = 0
					while character != '\n':
						width += 1
						character = self._file.read(1)
						if character == '\r':
							character = self._file.read(1)

					# Increments height
					height += 1

				# If current file record dimensions is not the same to sensor size then throws exception
				if self.width != width or self.height != height:
					QtGui.QMessageBox.warning(None, "Warning", "'" + self.name + "': File input size (" + width + " x " + height + ") is different from sensor size (" + self.width + " x " + self.height + ").", QtGui.QMessageBox.Ok)
					return

				# Put the pointer back to initial position
				self._file.seek(0)
			elif self.inputFormat == InputFormat.raw:
				self._file = open(fullFileName)

				# Create an instance class for an encoder given its module, class and constructor params
				self.encoder = getInstantiatedClass(self.encoderModule, self.encoderClass, self.encoderParams)

				# If encoder size is not the same to sensor size then throws exception
				encoderSize = self.encoder.getWidth()
				sensorSize = self.width * self.height
				if encoderSize > sensorSize:
					QtGui.QMessageBox.warning(None, "Warning", "'" + self.name + "': Encoder size (" + str(encoderSize) + ") is different from sensor size (" + str(self.width) + " x " + str(self.height) + " = " + str(sensorSize) + ").", QtGui.QMessageBox.Ok)
					return

		elif self.dataSourceType == DataSourceType.database:
			pass

		# Create Classifier instance with appropriate parameters
		self.minProbabilityThreshold = 0.0001
		self.steps = []
		for step in range(maxFutureSteps):
			self.steps.append(step+1)
		self.classifier = CLAClassifier(steps=self.steps)

	def nextStep(self):
		"""
		Performs actions related to time step progression.
		"""

		# Update states machine by remove the first element and add a new element in the end
		if self.inputFormat == InputFormat.raw:
			if len(self.currentValue) > maxPreviousSteps:
				self.currentValue.remove(self.currentValue[0])
				self.predictedValues.remove(self.predictedValues[0])
			self.currentValue.append(None)
			self.predictedValues.append(None)

		Node.nextStep(self)
		for bit in self.bits:
			bit.nextStep()

		# Get record value from data source
		recordValue = None
		if self.dataSourceType == DataSourceType.file:
			recordValue = self.__getNextFileRecord()
		elif self.dataSourceType == DataSourceType.database:
			pass

		# Handle the value according to its type
		self._output = []
		if self.inputFormat == InputFormat.htm:

			# Initialize the array for representing the current record
			self._output = recordValue
		elif self.inputFormat == InputFormat.raw:

			# Convert the value to its respective data type
			rawValue = None
			if self.inputRawDataType == InputRawDataType.boolean:
				rawValue = bool(recordValue)
			elif self.inputRawDataType == InputRawDataType.integer:
				rawValue = int(recordValue)
			elif self.inputRawDataType == InputRawDataType.decimal:
				rawValue = float(recordValue)
			elif self.inputRawDataType == InputRawDataType.dateTime:
				rawValue = datetime.datetime.strptime(recordValue, "%m/%d/%y %H:%M")
			elif self.inputRawDataType == InputRawDataType.string:
				rawValue = str(recordValue)
			self.currentValue[maxPreviousSteps - 1] = rawValue

			# Pass raw value to encoder and get its respective array
			self._output = self.encoder.encode(rawValue)

		# Update sensor bits
		for i in range(len(self._output)):
			if self._output[i] > 0.:
				self.bits[i].isActive[maxPreviousSteps - 1] = True
			else:
				self.bits[i].isActive[maxPreviousSteps - 1] = False

		# Mark falsely predicted bits
		for bit in self.bits:
			if bit.isPredicted[maxPreviousSteps - 2] and not bit.isActive[maxPreviousSteps - 1]:
				bit.isFalselyPredicted[maxPreviousSteps - 1] = True

	def getPredictions(self):
		"""
		Get the predictions after an iteration.
		"""

		if self.inputFormat == InputFormat.raw:

			if self.predictionsMethod == PredictionsMethod.reconstruction:

				# Prepare list with predictions to be classified
				# This list contains the indexes of all bits that are predicted
				output = []
				for i in range(len(self.bits)):
					if self.bits[i].isPredicted[maxPreviousSteps - 1]:
						output.append(1)
					else:
						output.append(0)
				output = numpy.array(output)

				# Decode output and create predictions list
				fieldsDict, fieldsOrder = self.encoder.decode(output)
				self.predictedValues[maxPreviousSteps - 1] = dict()
				predictions = []
				if len(fieldsOrder) > 0:
					fieldName = fieldsOrder[0]
					predictedLabels = fieldsDict[fieldName][1].split(', ')
					predictedValues = fieldsDict[fieldName][0]
					for i in range(len(predictedLabels)):
						predictions.append([predictedValues[i], predictedLabels[i]])

				self.predictedValues[maxPreviousSteps - 1][1] = predictions

			elif self.predictionsMethod == PredictionsMethod.classification:
				# A classification involves estimate which are the likely values to occurs in the next time step.

				# Prepare list with predictions to be classified
				# This list contains the indexes of all bits that are predicted
				patternNZ = []
				for i in range(len(self.bits)):
					if self.bits[i].isActive[maxPreviousSteps - 1]:
						patternNZ.append(i)

				# Get the bucket index of the current value at the encoder
				actualValue = self.currentValue[maxPreviousSteps - 1]
				bucketIdx = self.encoder.getBucketIndices(actualValue)[0]

				# Perform classification
				clasResults = self.classifier.compute(recordNum=Global.currStep, patternNZ=patternNZ, classification={'bucketIdx': bucketIdx, 'actValue': actualValue}, learn=self.enableClassificationLearning, infer=self.enableClassificationInference)

				self.predictedValues[maxPreviousSteps - 1] = dict()
				for step in self.steps:

					# Calculate probability for each predicted value
					predictions = dict()
					for (actValue, prob) in zip(clasResults['actualValues'], clasResults[step]):
						if actValue in predictions:
							predictions[actValue] += prob
						else:
							predictions[actValue] = prob

					# Remove predictions with low probabilities
					maxVal = (None, None)
					for (actValue, prob) in predictions.items():
						if len(predictions) <= 1:
							break
						if maxVal[0] is None or prob >= maxVal[1]:
							if maxVal[0] is not None and maxVal[1] < self.minProbabilityThreshold:
								del predictions[maxVal[0]]
							maxVal = (actValue, prob)
						elif prob < self.minProbabilityThreshold:
							del predictions[actValue]

					# Sort the list of values from more probable to less probable values
					# an decrease the list length to max predictions per step limit
					predictions = sorted(predictions.iteritems(), key=operator.itemgetter(1), reverse=True)
					predictions = predictions[:maxFutureSteps]

					self.predictedValues[maxPreviousSteps - 1][step] = predictions

	def calculateStatistics(self):
		"""
		Calculate statistics after an iteration.
		"""

		if Global.currStep > 0:
			precision = 0.

			if self.inputFormat == InputFormat.htm:
				# Calculate the prediction precision comparing with bits are equal between the predicted array and the active array
				# The prediction precision is the percentage of shared bits over the sum of all bits
				numSharedBitStates = 0
				numNonSharedBitStates = 0
				for bit in self.bits:
					if bit.isPredicted[maxPreviousSteps - 2] or bit.isActive[maxPreviousSteps - 1]:
						if bit.isPredicted[maxPreviousSteps - 2] == bit.isActive[maxPreviousSteps - 1]:
							numSharedBitStates += 1
						else:
							numNonSharedBitStates += 1
				precision = (numSharedBitStates / float(numNonSharedBitStates + numSharedBitStates)) * 100

			elif self.inputFormat == InputFormat.raw:
				# Calculate the prediction precision comparing if the current value is in the range of any prediction.
				predictions = self.predictedValues[maxPreviousSteps - 2][1]
				for predictedValue in predictions:
					min = 0.
					max = 0.
					value = predictedValue[0]
					if self.predictionsMethod == PredictionsMethod.reconstruction:
						min = math.floor(value[0])
						max = math.ceil(value[1])
					elif self.predictionsMethod == PredictionsMethod.classification:
						min = math.floor(value)
						max = math.ceil(value)
					if min <= self.currentValue[maxPreviousSteps - 1] <= max:
						precision = 100.
						break

			# The precision rate is the average of the precision calculated in every step
			self.statsPrecisionRate = (self.statsPrecisionRate + precision) / 2
		else:
			self.statsPrecisionRate = 0.

		for bit in self.bits:
			bit.calculateStatistics()

	def __getNextFileRecord(self):
		"""
		Get the next record from file.
		If file end is reached then start reading from scratch again.
		"""

		recordValue = None

		# If end of file was reached then place cursor on the first byte again
		if self._file.tell() == os.fstat(self._file.fileno()).st_size:
			self._file.seek(0)

		if self.inputFormat == InputFormat.htm:

			# Start reading from last position
			outputList = []
			character = 0
			for y in range(self.height):
				for x in range(self.width):
					character = self._file.read(1)
					if character == '1':
						outputList.append(1.)
					elif character == '0':
						outputList.append(0.)
					else:
						raise Exception("Invalid file format.")

				# Check if next char is a 'return', i.e. the row end
				character = self._file.read(1)
				if character == '\r':
					character = self._file.read(1)
				if character != '\n':
					raise Exception("Invalid file format.")

			# Check if next char is a 'return' character, i.e. the record end
			character = self._file.read(1)
			if character == '\r':
				character = self._file.read(1)
			if character != '\n' and character != -1:
				raise Exception("Invalid file format.")

			# Return the output list as record value
			recordValue = numpy.array(outputList)

		elif self.inputFormat == InputFormat.raw:

			# Return the raw value as record value
			recordValue = self._file.readline()
			recordValue = recordValue.rstrip('\r\n').rstrip('\n')

		return recordValue

	#endregion
예제 #19
0
class Model(object):
    def __init__(self, params):
        """

        :param params: A dict of modelParams in the format
         {'clParams':{'alpha':float,'steps':'1,2,3'},
          'sensorParams':{'encoders':{}
        """

        modelParams = params['modelParams']
        self._encoders = {
            field: getattr(nupic.encoders, args['type'])(**dict(
                (arg, val) for arg, val in args.items()
                if arg not in ['type', 'fieldname']))
            for field, args in modelParams['sensorParams']['encoders'].items()
            if args is not None
        }

        self.predicted_field = modelParams['predictedField']
        modelParams['spParams']['inputWidth'] = sum(
            map(lambda x: x.getWidth(), self._encoders.values()))
        self.sp = SpatialPooler(**modelParams['spParams'])
        self.sp.initialize(None, None)
        self.tm = TemporalMemory(**modelParams['tpParams'])
        self.tm.initialize(None, None)
        self.classifier = CLAClassifier(**modelParams['clParams'])

        self.spOutputs = {
            'bottomUpOut':
            np.zeros(modelParams['spParams']['columnCount'], dtype=np.float32),
            'anomalyScore':
            np.zeros(modelParams['spParams']['columnCount'], dtype=np.float32)
        }
        self.tmOutputs = {
            'bottomUpOut':
            np.zeros(modelParams['tpParams']['columnCount'] *
                     modelParams['tpParams']['cellsPerColumn'],
                     dtype=np.float32)
        }

        self.recordNum = 0

    def encode(self, inputs):
        """

        :param inputs: dict of input names to their values
        inputs
        :return: encoded inputs concatenated
        """
        return np.concatenate([
            encoder.encode(inputs[name])
            for name, encoder in self._encoders.iteritems()
        ])

    def run(self, inputs):
        """
        Runs a single timestep
        :param inputs: a dict mapping input names to their values
        :return:  a dict of predictions-++
        """
        self.recordNum += 1
        encodings = self.encode(inputs)
        predictedValue = inputs[self.predicted_field]
        bucketIdx = self._encoders[self.predicted_field].getBucketIndices(
            predictedValue)[0]
        self.recordNum += 1

        self.sp.compute({'bottomUpIn': encodings}, self.spOutputs)
        self.tm.compute({'bottomUpIn': self.spOutputs['bottomUpOut']},
                        self.tmOutputs)

        return self.classifier.compute(self.recordNum,
                                       self.tmOutputs['bottomUpOut'], {
                                           'bucketIdx': bucketIdx,
                                           'actValue': predictedValue
                                       }, True, True)