def init():
	global numColumns, numInputs, inputDimensions, columnDimensions, connectedSynapses
	global numColumns

	inputDimensions = [32, 32]
	columnDimensions = [64, 64]
	initConnectedPct = 0.5

	# Check the matrix SM_01_32_32

	inputDimensions = numpy.array(inputDimensions, ndmin=1)
	columnDimensions = numpy.array(columnDimensions, ndmin=1)
	numColumns = columnDimensions.prod()
	numInputs = inputDimensions.prod()

	potentialPools = SparseBinaryMatrix(numInputs)
	potentialPools.resize(numColumns, numInputs)
	permances = SparseMatrix(numColumns, numInputs)

	random = NupicRandom()
	tieBreaker = 0.01*numpy.array([random.getReal64() for i in xrange(numColumns)])

	connectedSynapses = SparseBinaryMatrix(numInputs)
	connectedSynapses.resize(numColumns, numInputs)

	connectedCounts = numpy.zeros(numColumns, dtype=realDType)

	potentialPools.replaceSparseRow(0, numpy.array([0, 1], dtype='int'))
Beispiel #2
0
    def __init__(self, width, seed):
        # Arbitrary value that's compatible with UInt32 in the proto schema
        # for testing serialization of python-native property
        self.width = width

        # For testing serialization of object implemented in the extension
        self.rand = Random(seed)
 def testEquals(self):
   r1 = Random(42)
   v1 = r1.getReal64()
   r2 = Random(42)
   v2 = r2.getReal64()
   self.assertEquals(v1, v2)
   self.assertEquals(r1, r2)
Beispiel #4
0
  def testNupicRandomPickling(self):
    """Test pickling / unpickling of NuPIC randomness."""

    # Simple test: make sure that dumping / loading works...
    r = Random(42)
    pickledR = pickle.dumps(r)

    test1 = [r.getUInt32() for _ in xrange(10)]
    r = pickle.loads(pickledR)
    test2 = [r.getUInt32() for _ in xrange(10)]

    self.assertEqual(test1, test2,
                     "Simple NuPIC random pickle/unpickle failed.")

    # A little tricker: dump / load _after_ some numbers have been generated
    # (in the first test).  Things should still work...
    # ...the idea of this test is to make sure that the pickle code isn't just
    # saving the initial seed...
    pickledR = pickle.dumps(r)

    test3 = [r.getUInt32() for _ in xrange(10)]
    r = pickle.loads(pickledR)
    test4 = [r.getUInt32() for _ in xrange(10)]

    self.assertEqual(
        test3, test4,
        "NuPIC random pickle/unpickle didn't work for saving later state.")

    self.assertNotEqual(test1, test3,
                        "NuPIC random gave the same result twice?!?")
Beispiel #5
0
  def testShuffleEmpty(self):
    r = Random(42)
    arr = numpy.zeros([0], dtype="uint32")

    r.shuffle(arr)

    self.assertEqual(arr.size, 0)
  def testShuffleEmpty(self):
    r = Random(42)
    arr = numpy.zeros([0], dtype="uint32")

    r.shuffle(arr)

    self.assertEqual(arr.size, 0)
def main():
  """Measure serialization performance of Random
  """
  r = Random(42)

  # Measure serialization
  startSerializationTime = time.time()

  for i in range(_SERIALIZATION_LOOPS):
    r.saveToFile("RandonSerialization.stream")

  elapsedSerializationTime = time.time() - startSerializationTime

  # Measure deserialization
  startDeserializationTime = time.time()

  for _ in range(_DESERIALIZATION_LOOPS):
    r.loadFromFile("RandonSerialization.stream")

  elapsedDeserializationTime = time.time() - startDeserializationTime

  # Print report
  print(_SERIALIZATION_LOOPS, "Serialization loops in", \
        elapsedSerializationTime, "seconds.")
  print("\t", elapsedSerializationTime/_SERIALIZATION_LOOPS, "seconds per loop.")

  print(deserializationCount, "Deserialization loops in", \
        elapsedDeserializationTime, "seconds.")
  print("\t", elapsedDeserializationTime/deserializationCount, "seconds per loop.")
Beispiel #8
0
 def _seed(self, seed=-1):
   """
   Initialize the random seed
   """
   if seed != -1:
     self.random = NupicRandom(seed)
   else:
     self.random = NupicRandom()
Beispiel #9
0
    def testSampleNone(self):
        r = Random(42)
        population = numpy.array([1, 2, 3, 4], dtype="uint32")

        # Just make sure there is no exception thrown.
        choices = r.sample(population, 0)

        self.assertEqual(len(choices), 0)
Beispiel #10
0
    def testSample(self):
        r = Random(42)
        population = numpy.array([1, 2, 3, 4], dtype="uint32")

        choices = r.sample(population, 2)

        self.assertEqual(choices[0], 2)
        self.assertEqual(choices[1], 1)
Beispiel #11
0
 def _seed(self, seed=-1):
   """
   Initialize the random seed
   """
   if seed != -1:
     self.random = NupicRandom(seed)
   else:
     self.random = NupicRandom()
Beispiel #12
0
    def __init__(self,
                 columnDimensions=(2048, ),
                 cellsPerColumn=32,
                 activationThreshold=13,
                 initialPermanence=0.21,
                 connectedPermanence=0.50,
                 minThreshold=10,
                 maxNewSynapseCount=20,
                 permanenceIncrement=0.10,
                 permanenceDecrement=0.10,
                 predictedSegmentDecrement=0.0,
                 maxSegmentsPerCell=255,
                 maxSynapsesPerSegment=255,
                 seed=42,
                 **kwargs):
        # Error checking
        if not len(columnDimensions):
            raise ValueError(
                "Number of column dimensions must be greater than 0")

        if cellsPerColumn <= 0:
            raise ValueError(
                "Number of cells per column must be greater than 0")

        if minThreshold > activationThreshold:
            raise ValueError(
                "The min threshold can't be greater than the activation threshold"
            )

        # TODO: Validate all parameters (and add validation tests)

        # Save member variables
        self.columnDimensions = columnDimensions
        self.cellsPerColumn = cellsPerColumn
        self.activationThreshold = activationThreshold
        self.initialPermanence = initialPermanence
        self.connectedPermanence = connectedPermanence
        self.minThreshold = minThreshold
        self.maxNewSynapseCount = maxNewSynapseCount
        self.permanenceIncrement = permanenceIncrement
        self.permanenceDecrement = permanenceDecrement
        self.predictedSegmentDecrement = predictedSegmentDecrement
        self.maxSegmentsPerCell = maxSegmentsPerCell
        self.maxSynapsesPerSegment = maxSynapsesPerSegment

        # Initialize member variables
        self.connections = self.connectionsFactory(self.numberOfCells())
        self._random = Random(seed)
        self.activeCells = []
        self.winnerCells = []
        self.activeSegments = []
        self.matchingSegments = []

        self.numActiveConnectedSynapsesForSegment = []
        self.numActivePotentialSynapsesForSegment = []

        self.iteration = 0
        self.lastUsedIterationForSegment = []
Beispiel #13
0
  def testSample(self):
    r = Random(42)
    population = numpy.array([1, 2, 3, 4], dtype="uint32")
    choices = numpy.zeros([2], dtype="uint32")

    r.sample(population, choices)

    self.assertEqual(choices[0], 2)
    self.assertEqual(choices[1], 4)
Beispiel #14
0
  def testSampleNone(self):
    r = Random(42)
    population = numpy.array([1, 2, 3, 4], dtype="uint32")
    choices = numpy.zeros([0], dtype="uint32")

    # Just make sure there is no exception thrown.
    r.sample(population, choices)

    self.assertEqual(choices.size, 0)
Beispiel #15
0
    def __init__(self,
                 columnDimensions=(2048, ),
                 cellsPerColumn=32,
                 activationThreshold=13,
                 initialPermanence=0.21,
                 connectedPermanence=0.50,
                 minThreshold=10,
                 maxNewSynapseCount=20,
                 permanenceIncrement=0.10,
                 permanenceDecrement=0.10,
                 predictedSegmentDecrement=0.0,
                 seed=42):
        """
    @param columnDimensions          (list)  Dimensions of the column space
    @param cellsPerColumn            (int)   Number of cells per column
    @param activationThreshold       (int)   If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active.
    @param initialPermanence         (float) Initial permanence of a new synapse.
    @param connectedPermanence       (float) If the permanence value for a synapse is greater than this value, it is said to be connected.
    @param minThreshold              (int)   If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column.
    @param maxNewSynapseCount        (int)   The maximum number of synapses added to a segment during learning.
    @param permanenceIncrement       (float) Amount by which permanences of synapses are incremented during learning.
    @param permanenceDecrement       (float) Amount by which permanences of synapses are decremented during learning.
    @param predictedSegmentDecrement (float) Amount by which active permanences of synapses of previously predicted but inactive segments are decremented.
    @param seed                      (int)   Seed for the random number generator.
    """
        # Error checking
        if not len(columnDimensions):
            raise ValueError(
                "Number of column dimensions must be greater than 0")

        if not cellsPerColumn > 0:
            raise ValueError(
                "Number of cells per column must be greater than 0")

        # TODO: Validate all parameters (and add validation tests)

        # Save member variables
        self.columnDimensions = columnDimensions
        self.cellsPerColumn = cellsPerColumn
        self.activationThreshold = activationThreshold
        self.initialPermanence = initialPermanence
        self.connectedPermanence = connectedPermanence
        self.minThreshold = minThreshold
        self.maxNewSynapseCount = maxNewSynapseCount
        self.permanenceIncrement = permanenceIncrement
        self.permanenceDecrement = permanenceDecrement
        self.predictedSegmentDecrement = predictedSegmentDecrement
        # Initialize member variables
        self.connections = Connections(self.numberOfCells())
        self._random = Random(seed)

        self.activeCells = set()
        self.predictiveCells = set()
        self.activeSegments = set()
        self.winnerCells = set()
        self.matchingSegments = set()
        self.matchingCells = set()
  def testSampleNone(self):
    r = Random(42)
    population = numpy.array([1, 2, 3, 4], dtype="uint32")
    choices = numpy.zeros([0], dtype="uint32")

    # Just make sure there is no exception thrown.
    r.sample(population, choices)

    self.assertEqual(choices.size, 0)
Beispiel #17
0
    def testSample(self):
        r = Random(42)
        population = cupy.array([1, 2, 3, 4], dtype="uint32")
        choices = cupy.zeros([2], dtype="uint32")

        r.sample(population, choices)

        self.assertEqual(choices[0], 1)
        self.assertEqual(choices[1], 3)
  def testShuffle(self):
    r = Random(42)
    arr = numpy.array([1, 2, 3, 4], dtype="uint32")

    r.shuffle(arr)

    self.assertEqual(arr[0], 2)
    self.assertEqual(arr[1], 1)
    self.assertEqual(arr[2], 4)
    self.assertEqual(arr[3], 3)
    def __init__(self, dataWidth, randomSeed):
        if dataWidth <= 0:
            raise ValueError("Parameter dataWidth must be > 0")

        # Arbitrary value that's compatible with UInt32 in the proto schema
        # for testing serialization of python-native property
        self._dataWidth = dataWidth

        # For testing serialization of object implemented in the extension
        self._rand = Random(randomSeed)
Beispiel #20
0
  def testShuffle(self):
    r = Random(42)
    arr = numpy.array([1, 2, 3, 4], dtype="uint32")

    r.shuffle(arr)

    self.assertEqual(arr[0], 3)
    self.assertEqual(arr[1], 4)
    self.assertEqual(arr[2], 2)
    self.assertEqual(arr[3], 1)
Beispiel #21
0
    def setRandomSeed(self, seed):
        """
    Reset the nupic random generator. This is necessary to reset random seed to
    generate new sequences.

    @param seed       (int)
        Seed for nupic.bindings.Random.
    """
        self.seed = seed
        self._random = Random()
        self._random.setSeed(seed)
Beispiel #22
0
  def _orderForCoordinate(cls, coordinate):
    """
    Returns the order for a coordinate.

    @param coordinate (numpy.array) Coordinate
    @return (float) A value in the interval [0, 1), representing the
                    order of the coordinate
    """
    seed = cls._hashCoordinate(coordinate)
    rng = Random(seed)
    return rng.getReal64()
Beispiel #23
0
    def _bitForCoordinate(cls, coordinate, n):
        """
    Maps the coordinate to a bit in the SDR.

    @param coordinate (numpy.array) Coordinate
    @param n (int) The number of available bits in the SDR
    @return (int) The index to a bit in the SDR
    """
        seed = cls._hashCoordinate(coordinate)
        rng = Random(seed)
        return rng.getUInt32(n)
Beispiel #24
0
    def _orderForCoordinate(cls, coordinate):
        """
    Returns the order for a coordinate.

    @param coordinate (numpy.array) Coordinate
    @return (float) A value in the interval [0, 1), representing the
                    order of the coordinate
    """
        seed = cls._hashCoordinate(coordinate)
        rng = Random(seed)
        return rng.getReal64()
Beispiel #25
0
  def __init__(self,
               patternMachine,
               seed=42):
    """
    @param patternMachine (PatternMachine) Pattern machine instance
    """
    # Save member variables
    self.patternMachine = patternMachine

    # Initialize member variables
    self._random = Random(seed)
Beispiel #26
0
  def _bitForCoordinate(cls, coordinate, n):
    """
    Maps the coordinate to a bit in the SDR.

    @param coordinate (numpy.array) Coordinate
    @param n (int) The number of available bits in the SDR
    @return (int) The index to a bit in the SDR
    """
    seed = cls._hashCoordinate(coordinate)
    rng = Random(seed)
    return rng.getUInt32(n)
Beispiel #27
0
    def read(cls, proto):
        """
    Reads deserialized data from proto object

    @param proto (DynamicStructBuilder) Proto object

    @return (TemporalMemory) TemporalMemory instance
    """
        tm = object.__new__(cls)

        tm.columnDimensions = list(proto.columnDimensions)
        tm.cellsPerColumn = int(proto.cellsPerColumn)
        tm.activationThreshold = int(proto.activationThreshold)
        tm.initialPermanence = proto.initialPermanence
        tm.connectedPermanence = proto.connectedPermanence
        tm.minThreshold = int(proto.minThreshold)
        tm.maxNewSynapseCount = int(proto.maxNewSynapseCount)
        tm.permanenceIncrement = proto.permanenceIncrement
        tm.permanenceDecrement = proto.permanenceDecrement
        tm.predictedSegmentDecrement = proto.predictedSegmentDecrement

        tm.connections = Connections.read(proto.connections)
        tm._random = Random()
        tm._random.read(proto.random)

        tm.activeCells = set([int(x) for x in proto.activeCells])
        tm.predictiveCells = set([int(x) for x in proto.predictiveCells])
        tm.activeSegments = set([int(x) for x in proto.activeSegments])
        tm.winnerCells = set([int(x) for x in proto.winnerCells])
        tm.matchingSegments = set([int(x) for x in proto.matchingSegments])
        tm.matchingCells = set([int(x) for x in proto.matchingCells])

        return tm
Beispiel #28
0
    def read(cls, proto):
        """ Reads deserialized data from proto object

    @param proto (DynamicStructBuilder) Proto object

    @return (TemporalMemory) TemporalMemory instance
    """
        tm = object.__new__(cls)

        # capnp fails to save a tuple, so proto.columnDimensions was forced to
        # serialize as a list.  We prefer a tuple, however, because columnDimensions
        # should be regarded as immutable.
        tm.columnDimensions = tuple(proto.columnDimensions)
        tm.cellsPerColumn = int(proto.cellsPerColumn)
        tm.activationThreshold = int(proto.activationThreshold)
        tm.initialPermanence = proto.initialPermanence
        tm.connectedPermanence = proto.connectedPermanence
        tm.minThreshold = int(proto.minThreshold)
        tm.maxNewSynapseCount = int(proto.maxNewSynapseCount)
        tm.permanenceIncrement = proto.permanenceIncrement
        tm.permanenceDecrement = proto.permanenceDecrement
        tm.predictedSegmentDecrement = proto.predictedSegmentDecrement

        tm.connections = Connections.read(proto.connections)
        #pylint: disable=W0212
        tm._random = Random()
        tm._random.read(proto.random)
        #pylint: enable=W0212

        tm.activeCells = [int(x) for x in proto.activeCells]
        tm.winnerCells = [int(x) for x in proto.winnerCells]

        flatListLength = tm.connections.segmentFlatListLength()
        tm.numActiveConnectedSynapsesForSegment = [0] * flatListLength
        tm.numActivePotentialSynapsesForSegment = [0] * flatListLength

        tm.activeSegments = []
        tm.matchingSegments = []

        for i in xrange(len(proto.activeSegmentOverlaps)):
            protoSegmentOverlap = proto.activeSegmentOverlaps[i]

            segment = tm.connections.getSegment(protoSegmentOverlap.cell,
                                                protoSegmentOverlap.segment)
            tm.activeSegments.append(segment)

            overlap = protoSegmentOverlap.overlap
            tm.numActiveConnectedSynapsesForSegment[segment.flatIdx] = overlap

        for i in xrange(len(proto.matchingSegmentOverlaps)):
            protoSegmentOverlap = proto.matchingSegmentOverlaps[i]

            segment = tm.connections.getSegment(protoSegmentOverlap.cell,
                                                protoSegmentOverlap.segment)
            tm.matchingSegments.append(segment)

            overlap = protoSegmentOverlap.overlap
            tm.numActivePotentialSynapsesForSegment[segment.flatIdx] = overlap

        return tm
  def testSampleWrongDimensionsChoices(self):
    """Check that passing a multi-dimensional array throws a ValueError."""
    r = Random(42)
    population = numpy.array([1, 2, 3, 4], dtype="uint32")
    choices = numpy.zeros([2, 2], dtype="uint32")

    self.assertRaises(ValueError, r.sample, population, choices)
  def testSamplePopulationTooSmall(self):
    r = Random(42)
    population = numpy.array([1, 2, 3, 4], dtype="uint32")
    choices = numpy.zeros([5], dtype="uint32")

    self.assertRaises(
        ValueError, r.sample, population, choices)
Beispiel #31
0
  def __init__(self,
               columnDimensions=(2048,),
               cellsPerColumn=32,
               activationThreshold=13,
               learningRadius=2048,
               initialPermanence=0.21,
               connectedPermanence=0.50,
               minThreshold=10,
               maxNewSynapseCount=20,
               permanenceIncrement=0.10,
               permanenceDecrement=0.10,
               seed=42):
    """
    @param columnDimensions    (list)  Dimensions of the column space
    @param cellsPerColumn      (int)   Number of cells per column
    @param activationThreshold (int)   If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active.
    @param learningRadius      (int)   Radius around cell from which it can sample to form distal dendrite connections.
    @param initialPermanence   (float) Initial permanence of a new synapse.
    @param connectedPermanence (float) If the permanence value for a synapse is greater than this value, it is said to be connected.
    @param minThreshold        (int)   If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column.
    @param maxNewSynapseCount  (int)   The maximum number of synapses added to a segment during learning.
    @param permanenceIncrement (float) Amount by which permanences of synapses are incremented during learning.
    @param permanenceDecrement (float) Amount by which permanences of synapses are decremented during learning.
    @param seed                (int)   Seed for the random number generator.
    """
    # TODO: Validate all parameters (and add validation tests)

    # Initialize member variables
    self.connections = Connections(columnDimensions, cellsPerColumn)
    self._random = Random(seed)

    self.activeCells = set()
    self.predictiveCells = set()
    self.activeSegments = set()
    self.numActiveSynapsesForSegment = dict()
    self.winnerCells = set()

    # Save member variables
    self.activationThreshold = activationThreshold
    self.learningRadius = learningRadius
    self.initialPermanence = initialPermanence
    self.connectedPermanence = connectedPermanence
    self.minThreshold = minThreshold
    self.maxNewSynapseCount = maxNewSynapseCount
    self.permanenceIncrement = permanenceIncrement
    self.permanenceDecrement = permanenceDecrement
Beispiel #32
0
  def __setstate__(self, state):
    self.__dict__.update(state)

    # Initialize self.random as an instance of NupicRandom derived from the
    # previous numpy random state
    randomState = state["random"]
    if isinstance(randomState, numpy.random.mtrand.RandomState):
      self.random = NupicRandom(randomState.randint(sys.maxsize))
Beispiel #33
0
  def __setstate__(self, state):
    self.__dict__.update(state)

    # Initialize self.random as an instance of NupicRandom derived from the
    # previous numpy random state
    randomState = state["random"]
    if isinstance(randomState, numpy.random.mtrand.RandomState):
      self.random = NupicRandom(randomState.randint(sys.maxint))
Beispiel #34
0
  def __init__(self,
               trnCellShape=(32, 32),
               relayCellShape=(32, 32),
               inputShape=(32, 32),
               l6CellCount=1024,
               trnThreshold=10,
               relayThreshold=1,
               seed=42):
    """

    :param trnCellShape:
      a 2D shape for the TRN

    :param relayCellShape:
      a 2D shape for the relay cells

    :param l6CellCount:
      number of L6 cells

    :param trnThreshold:
      dendritic threshold for TRN cells. This is the min number of active L6
      cells on a dendrite for the TRN cell to recognize a pattern on that
      dendrite.

    :param relayThreshold:
      dendritic threshold for relay cells. This is the min number of active TRN
      cells on a dendrite for the relay cell to recognize a pattern on that
      dendrite.

    :param seed:
        Seed for the random number generator.
    """

    self.trnCellShape = trnCellShape
    self.trnWidth = trnCellShape[0]
    self.trnHeight = trnCellShape[1]
    self.relayCellShape = relayCellShape
    self.relayWidth = relayCellShape[0]
    self.relayHeight = relayCellShape[1]
    self.l6CellCount = l6CellCount
    self.trnThreshold = trnThreshold
    self.relayThreshold = relayThreshold
    self.inputShape = inputShape
    self.seed = seed
    self.rng = Random(seed)
    self.trnActivationThreshold = 5

    self.trnConnections = SparseMatrixConnections(
      trnCellShape[0]*trnCellShape[1], l6CellCount)

    self.relayConnections = SparseMatrixConnections(
      relayCellShape[0]*relayCellShape[1],
      trnCellShape[0]*trnCellShape[1])

    # Initialize/reset variables that are updated with calls to compute
    self.reset()

    self._initializeTRNToRelayCellConnections()
 def testEquals(self):
   r1 = Random(42)
   v1 = r1.getReal64()
   i1 = r1.getUInt32()
   r2 = Random(42)
   v2 = r2.getReal64()
   i2 = r2.getUInt32()
   self.assertEqual(v1, v2)
   self.assertEqual(r1, r2)
   self.assertEqual(i1, i2)
Beispiel #36
0
    def __init__(self, n, w, num=100, seed=42):
        """
    @param n   (int)      Number of available bits in pattern
    @param w   (int/list) Number of on bits in pattern
                          If list, each pattern will have a `w` randomly
                          selected from the list.
    @param num (int)      Number of available patterns
    """
        # Save member variables
        self._n = n
        self._w = w
        self._num = num

        # Initialize member variables
        self._random = Random(seed)
        self._patterns = dict()

        self._generate()
Beispiel #37
0
    def testSampleSequenceRaisesTypeError(self):
        """Check that passing lists throws a TypeError.

    This behavior may change if sample is extended to understand sequences.
    """
        r = Random(42)
        population = [1, 2, 3, 4]

        self.assertRaises(TypeError, r.sample, population, 2)
Beispiel #38
0
class PythonDummyRegion(object):
    """This class represents a serializable object that contains both native
  python properties as well as properties implemented in an extension.
  """
    def __init__(self, width, seed):
        # Arbitrary value that's compatible with UInt32 in the proto schema
        # for testing serialization of python-native property
        self.width = width

        # For testing serialization of object implemented in the extension
        self.rand = Random(seed)

    def write(self, proto):
        """ Serialize this instance into PyRegionProto builder. Emulates `PyRegion.write`.

    NOTE Called from nupic.bindings extension.

    :param proto: PyRegionProto builder
    """
        regionImpl = proto.regionImpl.as_struct(PythonDummyRegionProto)
        self.writeToProto(regionImpl)

    def writeToProto(self, proto):
        """ Serialize this instance into PythonDummyRegionProto builder

    :param proto: PythonDummyRegionProto builder
    """
        proto.width = self.width
        proto.random = self.rand.writeOut()

    @classmethod
    def read(cls, proto):
        """Deserialize from the given PyRegionProto reader. Emulates `PyRegion.read`.

    NOTE Called from nupic.bindings extension.

    :param proto: PyRegionProto reader

    :returns: Instance of PythonDummyRegion initialized from proto
    """
        regionImpl = proto.regionImpl.as_struct(PythonDummyRegionProto)
        return cls.readFromProto(regionImpl)

    @classmethod
    def readFromProto(cls, proto):
        """Deserialize from the given PythonDummyRegionProto reader

    :param proto: PythonDummyRegionProto reader

    :returns: Instance of PythonDummyRegion initialized from proto
    """
        obj = object.__new__(cls)
        obj.width = proto.width
        obj.rand = Random.readIn(proto.random)

        return obj
  def __init__(self, dataWidth, randomSeed):
    if dataWidth <= 0:
      raise ValueError("Parameter dataWidth must be > 0")

    # Arbitrary value that's compatible with UInt32 in the proto schema
    # for testing serialization of python-native property
    self._dataWidth = dataWidth

    # For testing serialization of object implemented in the extension
    self._rand = Random(randomSeed)
Beispiel #40
0
  def __init__(self,
               columnDimensions=(2048,),
               cellsPerColumn=32,
               activationThreshold=13,
               initialPermanence=0.21,
               connectedPermanence=0.50,
               minThreshold=10,
               maxNewSynapseCount=20,
               permanenceIncrement=0.10,
               permanenceDecrement=0.10,
               predictedSegmentDecrement = 0.004,
               seed=42):
    """
    @param columnDimensions          (list)  Dimensions of the column space
    @param cellsPerColumn            (int)   Number of cells per column
    @param activationThreshold       (int)   If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active.
    @param initialPermanence         (float) Initial permanence of a new synapse.
    @param connectedPermanence       (float) If the permanence value for a synapse is greater than this value, it is said to be connected.
    @param minThreshold              (int)   If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column.
    @param maxNewSynapseCount        (int)   The maximum number of synapses added to a segment during learning.
    @param permanenceIncrement       (float) Amount by which permanences of synapses are incremented during learning.
    @param permanenceDecrement       (float) Amount by which permanences of synapses are decremented during learning.
    @param predictedSegmentDecrement (float) Amount by which active permanences of synapses of previously predicted but inactive segments are decremented.
    @param seed                      (int)   Seed for the random number generator.
    """
    # Error checking
    if not len(columnDimensions):
      raise ValueError("Number of column dimensions must be greater than 0")

    if not cellsPerColumn > 0:
      raise ValueError("Number of cells per column must be greater than 0")

    # TODO: Validate all parameters (and add validation tests)

    # Save member variables
    self.columnDimensions = columnDimensions
    self.cellsPerColumn = cellsPerColumn
    self.activationThreshold = activationThreshold
    self.initialPermanence = initialPermanence
    self.connectedPermanence = connectedPermanence
    self.minThreshold = minThreshold
    self.maxNewSynapseCount = maxNewSynapseCount
    self.permanenceIncrement = permanenceIncrement
    self.permanenceDecrement = permanenceDecrement
    self.predictedSegmentDecrement = predictedSegmentDecrement
    # Initialize member variables
    self.connections = Connections(self.numberOfCells())
    self._random = Random(seed)

    self.activeCells = set()
    self.predictiveCells = set()
    self.activeSegments = set()
    self.winnerCells = set()
    self.matchingSegments = set()
    self.matchingCells = set()
Beispiel #41
0
  def __init__(self,
               columnDimensions=(2048,),
               cellsPerColumn=32,
               activationThreshold=13,
               initialPermanence=0.21,
               connectedPermanence=0.50,
               minThreshold=10,
               maxNewSynapseCount=20,
               permanenceIncrement=0.10,
               permanenceDecrement=0.10,
               predictedSegmentDecrement=0.0,
               maxSegmentsPerCell=255,
               maxSynapsesPerSegment=255,
               seed=42,
               **kwargs):
    # Error checking
    if not len(columnDimensions):
      raise ValueError("Number of column dimensions must be greater than 0")

    if cellsPerColumn <= 0:
      raise ValueError("Number of cells per column must be greater than 0")

    if minThreshold > activationThreshold:
      raise ValueError(
        "The min threshold can't be greater than the activation threshold")

    # TODO: Validate all parameters (and add validation tests)

    # Save member variables
    self.columnDimensions = columnDimensions
    self.cellsPerColumn = cellsPerColumn
    self.activationThreshold = activationThreshold
    self.initialPermanence = initialPermanence
    self.connectedPermanence = connectedPermanence
    self.minThreshold = minThreshold
    self.maxNewSynapseCount = maxNewSynapseCount
    self.permanenceIncrement = permanenceIncrement
    self.permanenceDecrement = permanenceDecrement
    self.predictedSegmentDecrement = predictedSegmentDecrement
    self.maxSegmentsPerCell = maxSegmentsPerCell
    self.maxSynapsesPerSegment = maxSynapsesPerSegment

    # Initialize member variables
    self.connections = self.connectionsFactory(self.numberOfCells())
    self._random = Random(seed)
    self.activeCells = []
    self.winnerCells = []
    self.activeSegments = []
    self.matchingSegments = []

    self.numActiveConnectedSynapsesForSegment = []
    self.numActivePotentialSynapsesForSegment = []

    self.iteration = 0
    self.lastUsedIterationForSegment = []
Beispiel #42
0
  def __init__(self,
               patternMachine,
               seed=42):
    """
    @param patternMachine (PatternMachine) Pattern machine instance
    """
    # Save member variables
    self.patternMachine = patternMachine

    # Initialize member variables
    self._random = Random(seed)
Beispiel #43
0
    def setRandomSeed(self, seed):
        """
    Reset the nupic random generator. This is necessary to reset random seed to
    generate new sequences.

    @param seed       (int)
        Seed for nupic.bindings.Random.
    """
        self.seed = seed
        self._random = Random()
        self._random.setSeed(seed)
Beispiel #44
0
    def readFromProto(cls, proto):
        """Deserialize from the given PythonDummyRegionProto reader

    :param proto: PythonDummyRegionProto reader

    :returns: Instance of PythonDummyRegion initialized from proto
    """
        obj = object.__new__(cls)
        obj.width = proto.width
        obj.rand = Random.readIn(proto.random)

        return obj
  def testNupicRandomPickling(self):
    """Test pickling / unpickling of NuPIC randomness."""

    # Simple test: make sure that dumping / loading works...
    r = Random(42)
    pickledR = pickle.dumps(r)

    test1 = [r.getUInt32() for _ in range(10)]
    r = pickle.loads(pickledR)
    test2 = [r.getUInt32() for _ in range(10)]

    self.assertEqual(test1, test2,
                     "Simple NuPIC random pickle/unpickle failed.")

    # A little tricker: dump / load _after_ some numbers have been generated
    # (in the first test).  Things should still work...
    # ...the idea of this test is to make sure that the pickle code isn't just
    # saving the initial seed...
    pickledR = pickle.dumps(r)

    test3 = [r.getUInt32() for _ in range(10)]
    r = pickle.loads(pickledR)
    test4 = [r.getUInt32() for _ in range(10)]

    self.assertEqual(
        test3, test4,
        "NuPIC random pickle/unpickle didn't work for saving later state.")

    self.assertNotEqual(test1, test3,
                        "NuPIC random gave the same result twice?!?")
def main():
    """Measure serialization performance of Random
  """
    r = Random(42)

    # Measure serialization
    startSerializationTime = time.time()

    for i in range(_SERIALIZATION_LOOPS):
        r.saveToFile("RandonSerialization.stream")

    elapsedSerializationTime = time.time() - startSerializationTime

    # Measure deserialization
    startDeserializationTime = time.time()

    for _ in range(_DESERIALIZATION_LOOPS):
        r.loadFromFile("RandonSerialization.stream")

    elapsedDeserializationTime = time.time() - startDeserializationTime

    # Print report
    print(_SERIALIZATION_LOOPS, "Serialization loops in", \
          elapsedSerializationTime, "seconds.")
    print("\t", elapsedSerializationTime / _SERIALIZATION_LOOPS,
          "seconds per loop.")

    print(deserializationCount, "Deserialization loops in", \
          elapsedDeserializationTime, "seconds.")
    print("\t", elapsedDeserializationTime / deserializationCount,
          "seconds per loop.")
 def testEquals(self):
     r1 = Random(42)
     v1 = r1.getReal64()
     r2 = Random(42)
     v2 = r2.getReal64()
     self.assertEquals(v1, v2)
     self.assertEquals(r1, r2)
  def __init__(self,
               columnDimensions=(2048,),
               basalInputDimensions=(),
               apicalInputDimensions=(),
               cellsPerColumn=32,
               activationThreshold=13,
               initialPermanence=0.21,
               connectedPermanence=0.50,
               minThreshold=10,
               sampleSize=20,
               permanenceIncrement=0.1,
               permanenceDecrement=0.1,
               predictedSegmentDecrement=0.0,
               maxNewSynapseCount=None,
               maxSynapsesPerSegment=-1,
               maxSegmentsPerCell=None,
               seed=42):

    self.columnDimensions = columnDimensions
    self.numColumns = self._numPoints(columnDimensions)
    self.basalInputDimensions = basalInputDimensions
    self.apicalInputDimensions = apicalInputDimensions

    self.cellsPerColumn = cellsPerColumn
    self.initialPermanence = initialPermanence
    self.connectedPermanence = connectedPermanence
    self.minThreshold = minThreshold

    self.sampleSize = sampleSize
    if maxNewSynapseCount is not None:
      print "Parameter 'maxNewSynapseCount' is deprecated. Use 'sampleSize'."
      self.sampleSize = maxNewSynapseCount

    if maxSegmentsPerCell is not None:
      print "Warning: ignoring parameter 'maxSegmentsPerCell'"

    self.permanenceIncrement = permanenceIncrement
    self.permanenceDecrement = permanenceDecrement
    self.predictedSegmentDecrement = predictedSegmentDecrement
    self.activationThreshold = activationThreshold
    self.maxSynapsesPerSegment = maxSynapsesPerSegment

    self.basalConnections = SparseMatrixConnections(
      self.numColumns*cellsPerColumn, self._numPoints(basalInputDimensions))
    self.apicalConnections = SparseMatrixConnections(
      self.numColumns*cellsPerColumn, self._numPoints(apicalInputDimensions))
    self.rng = Random(seed)
    self.activeCells = EMPTY_UINT_ARRAY
    self.winnerCells = EMPTY_UINT_ARRAY
    self.prevPredictedCells = EMPTY_UINT_ARRAY
Beispiel #49
0
  def __init__(self,
               columnDimensions=(2048,),
               cellsPerColumn=32,
               activationThreshold=13,
               learningRadius=2048,
               initialPermanence=0.21,
               connectedPermanence=0.50,
               minThreshold=10,
               maxNewSynapseCount=20,
               permanenceIncrement=0.10,
               permanenceDecrement=0.10,
               seed=42):
    """
    @param columnDimensions    (list)  Dimensions of the column space
    @param cellsPerColumn      (int)   Number of cells per column
    @param activationThreshold (int)   If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active.
    @param learningRadius      (int)   Radius around cell from which it can sample to form distal dendrite connections.
    @param initialPermanence   (float) Initial permanence of a new synapse.
    @param connectedPermanence (float) If the permanence value for a synapse is greater than this value, it is said to be connected.
    @param minThreshold        (int)   If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column.
    @param maxNewSynapseCount  (int)   The maximum number of synapses added to a segment during learning.
    @param permanenceIncrement (float) Amount by which permanences of synapses are incremented during learning.
    @param permanenceDecrement (float) Amount by which permanences of synapses are decremented during learning.
    @param seed                (int)   Seed for the random number generator.
    """
    # TODO: Validate all parameters (and add validation tests)

    # Initialize member variables
    self.connections = Connections(columnDimensions, cellsPerColumn)
    self._random = Random(seed)

    self.activeCells = set()
    self.predictiveCells = set()
    self.activeSegments = set()
    self.winnerCells = set()

    # Save member variables
    self.activationThreshold = activationThreshold
    self.learningRadius = learningRadius
    self.initialPermanence = initialPermanence
    self.connectedPermanence = connectedPermanence
    self.minThreshold = minThreshold
    self.maxNewSynapseCount = maxNewSynapseCount
    self.permanenceIncrement = permanenceIncrement
    self.permanenceDecrement = permanenceDecrement
Beispiel #50
0
  def __init__(self,
               n,
               w,
               num=100,
               seed=42):
    """
    @param n   (int)      Number of available bits in pattern
    @param w   (int/list) Number of on bits in pattern
                          If list, each pattern will have a `w` randomly
                          selected from the list.
    @param num (int)      Number of available patterns
    """
    # Save member variables
    self._n = n
    self._w = w
    self._num = num

    # Initialize member variables
    self._random = Random(seed)
    self._patterns = dict()

    self._generate()
Beispiel #51
0
class TemporalMemory(object):
  """
  Class implementing the Temporal Memory algorithm.
  """

  def __init__(self,
               columnDimensions=(2048,),
               cellsPerColumn=32,
               activationThreshold=13,
               learningRadius=2048,
               initialPermanence=0.21,
               connectedPermanence=0.50,
               minThreshold=10,
               maxNewSynapseCount=20,
               permanenceIncrement=0.10,
               permanenceDecrement=0.10,
               seed=42):
    """
    @param columnDimensions    (list)  Dimensions of the column space
    @param cellsPerColumn      (int)   Number of cells per column
    @param activationThreshold (int)   If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active.
    @param learningRadius      (int)   Radius around cell from which it can sample to form distal dendrite connections.
    @param initialPermanence   (float) Initial permanence of a new synapse.
    @param connectedPermanence (float) If the permanence value for a synapse is greater than this value, it is said to be connected.
    @param minThreshold        (int)   If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column.
    @param maxNewSynapseCount  (int)   The maximum number of synapses added to a segment during learning.
    @param permanenceIncrement (float) Amount by which permanences of synapses are incremented during learning.
    @param permanenceDecrement (float) Amount by which permanences of synapses are decremented during learning.
    @param seed                (int)   Seed for the random number generator.
    """
    # Error checking
    if not len(columnDimensions):
      raise ValueError("Number of column dimensions must be greater than 0")

    if not cellsPerColumn > 0:
      raise ValueError("Number of cells per column must be greater than 0")

    # TODO: Validate all parameters (and add validation tests)

    # Save member variables
    self.columnDimensions = columnDimensions
    self.cellsPerColumn = cellsPerColumn
    self.activationThreshold = activationThreshold
    self.learningRadius = learningRadius
    self.initialPermanence = initialPermanence
    self.connectedPermanence = connectedPermanence
    self.minThreshold = minThreshold
    self.maxNewSynapseCount = maxNewSynapseCount
    self.permanenceIncrement = permanenceIncrement
    self.permanenceDecrement = permanenceDecrement

    # Initialize member variables
    self.connections = Connections(self.numberOfCells())
    self._random = Random(seed)

    self.activeCells = set()
    self.predictiveCells = set()
    self.activeSegments = set()
    self.winnerCells = set()


  # ==============================
  # Main functions
  # ==============================

  def compute(self, activeColumns, learn=True):
    """
    Feeds input record through TM, performing inference and learning.
    Updates member variables with new state.

    @param activeColumns (set) Indices of active columns in `t`
    """
    (activeCells,
     winnerCells,
     activeSegments,
     predictiveCells,
     predictedColumns) = self.computeFn(activeColumns,
                                        self.predictiveCells,
                                        self.activeSegments,
                                        self.activeCells,
                                        self.winnerCells,
                                        self.connections,
                                        learn=learn)

    self.activeCells = activeCells
    self.winnerCells = winnerCells
    self.activeSegments = activeSegments
    self.predictiveCells = predictiveCells


  def computeFn(self,
                activeColumns,
                prevPredictiveCells,
                prevActiveSegments,
                prevActiveCells,
                prevWinnerCells,
                connections,
                learn=True):
    """
    'Functional' version of compute.
    Returns new state.

    @param activeColumns       (set)         Indices of active columns in `t`
    @param prevPredictiveCells (set)         Indices of predictive cells in `t-1`
    @param prevActiveSegments  (set)         Indices of active segments in `t-1`
    @param prevActiveCells     (set)         Indices of active cells in `t-1`
    @param prevWinnerCells     (set)         Indices of winner cells in `t-1`
    @param connections         (Connections) Connectivity of layer
    @param learn               (bool)        Whether or not learning is enabled

    @return (tuple) Contains:
                      `activeCells`     (set),
                      `winnerCells`     (set),
                      `activeSegments`  (set),
                      `predictiveCells` (set)
    """
    activeCells = set()
    winnerCells = set()

    (_activeCells,
     _winnerCells,
     predictedColumns) = self.activateCorrectlyPredictiveCells(
       prevPredictiveCells,
       activeColumns)

    activeCells.update(_activeCells)
    winnerCells.update(_winnerCells)

    (_activeCells,
     _winnerCells,
     learningSegments) = self.burstColumns(activeColumns,
                                           predictedColumns,
                                           prevActiveCells,
                                           prevWinnerCells,
                                           connections)

    activeCells.update(_activeCells)
    winnerCells.update(_winnerCells)

    if learn:
      self.learnOnSegments(prevActiveSegments,
                           learningSegments,
                           prevActiveCells,
                           winnerCells,
                           prevWinnerCells,
                           connections)

    (activeSegments,
     predictiveCells) = self.computePredictiveCells(activeCells, connections)

    return (activeCells,
            winnerCells,
            activeSegments,
            predictiveCells,
            predictedColumns)


  def reset(self):
    """
    Indicates the start of a new sequence. Resets sequence state of the TM.
    """
    self.activeCells = set()
    self.predictiveCells = set()
    self.activeSegments = set()
    self.winnerCells = set()


  # ==============================
  # Phases
  # ==============================

  def activateCorrectlyPredictiveCells(self,
                                       prevPredictiveCells,
                                       activeColumns):
    """
    Phase 1: Activate the correctly predictive cells.

    Pseudocode:

      - for each prev predictive cell
        - if in active column
          - mark it as active
          - mark it as winner cell
          - mark column as predicted

    @param prevPredictiveCells (set) Indices of predictive cells in `t-1`
    @param activeColumns       (set) Indices of active columns in `t`

    @return (tuple) Contains:
                      `activeCells`      (set),
                      `winnerCells`      (set),
                      `predictedColumns` (set)
    """
    activeCells = set()
    winnerCells = set()
    predictedColumns = set()

    for cell in prevPredictiveCells:
      column = self.columnForCell(cell)

      if column in activeColumns:
        activeCells.add(cell)
        winnerCells.add(cell)
        predictedColumns.add(column)

    return activeCells, winnerCells, predictedColumns


  def burstColumns(self,
                   activeColumns,
                   predictedColumns,
                   prevActiveCells,
                   prevWinnerCells,
                   connections):
    """
    Phase 2: Burst unpredicted columns.

    Pseudocode:

      - for each unpredicted active column
        - mark all cells as active
        - mark the best matching cell as winner cell
          - (learning)
            - if it has no matching segment
              - (optimization) if there are prev winner cells
                - add a segment to it
            - mark the segment as learning

    @param activeColumns                   (set)         Indices of active columns in `t`
    @param predictedColumns                (set)         Indices of predicted columns in `t`
    @param prevActiveCells                 (set)         Indices of active cells in `t-1`
    @param prevWinnerCells                 (set)         Indices of winner cells in `t-1`
    @param connections                     (Connections) Connectivity of layer

    @return (tuple) Contains:
                      `activeCells`      (set),
                      `winnerCells`      (set),
                      `learningSegments` (set)
    """
    activeCells = set()
    winnerCells = set()
    learningSegments = set()

    unpredictedColumns = activeColumns - predictedColumns

    for column in unpredictedColumns:
      cells = self.cellsForColumn(column)
      activeCells.update(cells)

      (bestCell,
       bestSegment) = self.bestMatchingCell(cells,
                                            prevActiveCells,
                                            connections)
      winnerCells.add(bestCell)

      if bestSegment is None and len(prevWinnerCells):
        bestSegment = connections.createSegment(bestCell)

      if bestSegment is not None:
        learningSegments.add(bestSegment)

    return activeCells, winnerCells, learningSegments


  def learnOnSegments(self,
                      prevActiveSegments,
                      learningSegments,
                      prevActiveCells,
                      winnerCells,
                      prevWinnerCells,
                      connections):
    """
    Phase 3: Perform learning by adapting segments.

    Pseudocode:

      - (learning) for each prev active or learning segment
        - if learning segment or from winner cell
          - strengthen active synapses
          - weaken inactive synapses
        - if learning segment
          - add some synapses to the segment
            - subsample from prev winner cells

    @param prevActiveSegments           (set)         Indices of active segments in `t-1`
    @param learningSegments             (set)         Indices of learning segments in `t`
    @param prevActiveCells              (set)         Indices of active cells in `t-1`
    @param winnerCells                  (set)         Indices of winner cells in `t`
    @param prevWinnerCells              (set)         Indices of winner cells in `t-1`
    @param connections                  (Connections) Connectivity of layer
    """
    for segment in prevActiveSegments | learningSegments:
      isLearningSegment = segment in learningSegments
      isFromWinnerCell = connections.cellForSegment(segment) in winnerCells

      activeSynapses = self.activeSynapsesForSegment(
        segment, prevActiveCells, connections)

      if isLearningSegment or isFromWinnerCell:
        self.adaptSegment(segment, activeSynapses, connections)

      if isLearningSegment:
        n = self.maxNewSynapseCount - len(activeSynapses)

        for presynapticCell in self.pickCellsToLearnOn(n,
                                                       segment,
                                                       prevWinnerCells,
                                                       connections):
          connections.createSynapse(segment,
                                    presynapticCell,
                                    self.initialPermanence)


  def computePredictiveCells(self, activeCells, connections):
    """
    Phase 4: Compute predictive cells due to lateral input
    on distal dendrites.

    Pseudocode:

      - for each distal dendrite segment with activity >= activationThreshold
        - mark the segment as active
        - mark the cell as predictive

    Forward propagates activity from active cells to the synapses that touch
    them, to determine which synapses are active.

    @param activeCells (set)         Indices of active cells in `t`
    @param connections (Connections) Connectivity of layer

    @return (tuple) Contains:
                      `activeSegments`  (set),
                      `predictiveCells` (set)
    """
    numActiveConnectedSynapsesForSegment = defaultdict(lambda: 0)
    activeSegments = set()
    predictiveCells = set()

    for cell in activeCells:
      for synapseData in connections.synapsesForPresynapticCell(cell).values():
        segment = synapseData.segment
        permanence = synapseData.permanence

        if permanence >= self.connectedPermanence:
          numActiveConnectedSynapsesForSegment[segment] += 1

          if (numActiveConnectedSynapsesForSegment[segment] >=
              self.activationThreshold):
            activeSegments.add(segment)
            predictiveCells.add(connections.cellForSegment(segment))

    return activeSegments, predictiveCells


  # ==============================
  # Helper functions
  # ==============================

  def bestMatchingCell(self, cells, activeCells, connections):
    """
    Gets the cell with the best matching segment
    (see `TM.bestMatchingSegment`) that has the largest number of active
    synapses of all best matching segments.

    If none were found, pick the least used cell (see `TM.leastUsedCell`).

    @param cells                       (set)         Indices of cells
    @param activeCells                 (set)         Indices of active cells
    @param connections                 (Connections) Connectivity of layer

    @return (tuple) Contains:
                      `cell`        (int),
                      `bestSegment` (int)
    """
    maxSynapses = 0
    bestCell = None
    bestSegment = None

    for cell in cells:
      segment, numActiveSynapses = self.bestMatchingSegment(
        cell, activeCells, connections)

      if segment is not None and numActiveSynapses > maxSynapses:
        maxSynapses = numActiveSynapses
        bestCell = cell
        bestSegment = segment

    if bestCell is None:
      bestCell = self.leastUsedCell(cells, connections)

    return bestCell, bestSegment


  def bestMatchingSegment(self, cell, activeCells, connections):
    """
    Gets the segment on a cell with the largest number of activate synapses,
    including all synapses with non-zero permanences.

    @param cell                        (int)         Cell index
    @param activeCells                 (set)         Indices of active cells
    @param connections                 (Connections) Connectivity of layer

    @return (tuple) Contains:
                      `segment`                 (int),
                      `connectedActiveSynapses` (set)
    """
    maxSynapses = self.minThreshold
    bestSegment = None
    bestNumActiveSynapses = None

    for segment in connections.segmentsForCell(cell):
      numActiveSynapses = 0

      for synapse in connections.synapsesForSegment(segment):
        synapseData = connections.dataForSynapse(synapse)
        if synapseData.presynapticCell in activeCells:
          numActiveSynapses += 1

      if numActiveSynapses >= maxSynapses:
        maxSynapses = numActiveSynapses
        bestSegment = segment
        bestNumActiveSynapses = numActiveSynapses

    return bestSegment, bestNumActiveSynapses


  def leastUsedCell(self, cells, connections):
    """
    Gets the cell with the smallest number of segments.
    Break ties randomly.

    @param cells       (set)         Indices of cells
    @param connections (Connections) Connectivity of layer

    @return (int) Cell index
    """
    leastUsedCells = set()
    minNumSegments = float("inf")

    for cell in cells:
      numSegments = len(connections.segmentsForCell(cell))

      if numSegments < minNumSegments:
        minNumSegments = numSegments
        leastUsedCells = set()

      if numSegments == minNumSegments:
        leastUsedCells.add(cell)

    i = self._random.getUInt32(len(leastUsedCells))
    return sorted(leastUsedCells)[i]


  @staticmethod
  def activeSynapsesForSegment(segment, activeCells, connections):
    """
    Returns the synapses on a segment that are active due to lateral input
    from active cells.

    @param segment     (int)         Segment index
    @param activeCells (set)         Indices of active cells
    @param connections (Connections) Connectivity of layer

    @return (set) Indices of active synapses on segment
    """
    synapses = set()

    for synapse in connections.synapsesForSegment(segment):
      synapseData = connections.dataForSynapse(synapse)

      if synapseData.presynapticCell in activeCells:
        synapses.add(synapse)

    return synapses


  def adaptSegment(self, segment, activeSynapses, connections):
    """
    Updates synapses on segment.
    Strengthens active synapses; weakens inactive synapses.

    @param segment        (int)         Segment index
    @param activeSynapses (set)         Indices of active synapses
    @param connections    (Connections) Connectivity of layer
    """
    for synapse in connections.synapsesForSegment(segment):
      synapseData = connections.dataForSynapse(synapse)
      permanence = synapseData.permanence

      if synapse in activeSynapses:
        permanence += self.permanenceIncrement
      else:
        permanence -= self.permanenceDecrement

      # Keep permanence within min/max bounds
      permanence = max(0.0, min(1.0, permanence))

      connections.updateSynapsePermanence(synapse, permanence)


  def pickCellsToLearnOn(self, n, segment, winnerCells, connections):
    """
    Pick cells to form distal connections to.

    TODO: Respect topology and learningRadius

    @param n           (int)         Number of cells to pick
    @param segment     (int)         Segment index
    @param winnerCells (set)         Indices of winner cells in `t`
    @param connections (Connections) Connectivity of layer

    @return (set) Indices of cells picked
    """
    candidates = set(winnerCells)

    # Remove cells that are already synapsed on by this segment
    for synapse in connections.synapsesForSegment(segment):
      synapseData = connections.dataForSynapse(synapse)
      presynapticCell = synapseData.presynapticCell

      if presynapticCell in candidates:
        candidates.remove(presynapticCell)

    n = min(n, len(candidates))
    candidates = sorted(candidates)
    cells = set()

    # Pick n cells randomly
    for _ in range(n):
      i = self._random.getUInt32(len(candidates))
      cells.add(candidates[i])
      del candidates[i]

    return cells


  def columnForCell(self, cell):
    """
    Returns the index of the column that a cell belongs to.

    @param cell (int) Cell index

    @return (int) Column index
    """
    self._validateCell(cell)

    return int(cell / self.cellsPerColumn)


  def cellsForColumn(self, column):
    """
    Returns the indices of cells that belong to a column.

    @param column (int) Column index

    @return (set) Cell indices
    """
    self._validateColumn(column)

    start = self.cellsPerColumn * column
    end = start + self.cellsPerColumn
    return set([cell for cell in range(start, end)])


  def numberOfColumns(self):
    """
    Returns the number of columns in this layer.

    @return (int) Number of columns
    """
    return reduce(mul, self.columnDimensions, 1)


  def numberOfCells(self):
    """
    Returns the number of cells in this layer.

    @return (int) Number of cells
    """
    return self.numberOfColumns() * self.cellsPerColumn


  def mapCellsToColumns(self, cells):
    """
    Maps cells to the columns they belong to

    @param cells (set) Cells

    @return (dict) Mapping from columns to their cells in `cells`
    """
    cellsForColumns = defaultdict(set)

    for cell in cells:
      column = self.columnForCell(cell)
      cellsForColumns[column].add(cell)

    return cellsForColumns


  def _validateColumn(self, column):
    """
    Raises an error if column index is invalid.

    @param column (int) Column index
    """
    if column >= self.numberOfColumns() or column < 0:
      raise IndexError("Invalid column")


  def _validateCell(self, cell):
    """
    Raises an error if cell index is invalid.

    @param cell (int) Cell index
    """
    if cell >= self.numberOfCells() or cell < 0:
      raise IndexError("Invalid cell")


  @classmethod
  def getCellIndices(cls, cells):
    return [cls.getCellIndex(c) for c in cells]


  @staticmethod
  def getCellIndex(cell):
    return cell.idx
Beispiel #52
0
class KNNClassifierRegion(PyRegion):
  """
  KNNClassifierRegion implements the k Nearest Neighbor classification algorithm.
  By default it will implement vanilla 1-nearest neighbor using the L2 (Euclidean)
  distance norm.  There are options for using different norms as well as
  various ways of sparsifying the input.

  Note: categories are ints >= 0.
  """

  __VERSION__ = 1


  @classmethod
  def getSpec(cls):
    ns = dict(
        description=KNNClassifierRegion.__doc__,
        singleNodeOnly=True,
        inputs=dict(
          categoryIn=dict(
            description='Vector of categories of the input sample',
            dataType='Real32',
            count=0,
            required=True,
            regionLevel=True,
            isDefaultInput=False,
            requireSplitterMap=False),

          bottomUpIn=dict(
            description='Belief values over children\'s groups',
            dataType='Real32',
            count=0,
            required=True,
            regionLevel=False,
            isDefaultInput=True,
            requireSplitterMap=False),

          partitionIn=dict(
            description='Partition ID of the input sample',
            dataType='Real32',
            count=0,
            required=True,
            regionLevel=True,
            isDefaultInput=False,
            requireSplitterMap=False),

          auxDataIn=dict(
            description='Auxiliary data from the sensor',
            dataType='Real32',
            count=0,
            required=False,
            regionLevel=True,
            isDefaultInput=False,
            requireSplitterMap=False)
        ),


        outputs=dict(
          categoriesOut=dict(
          description='A vector representing, for each category '
                      'index, the likelihood that the input to the node belongs '
                      'to that category based on the number of neighbors of '
                      'that category that are among the nearest K.',
          dataType='Real32',
          count=0,
          regionLevel=True,
          isDefaultOutput=True),

          bestPrototypeIndices=dict(
          description='A vector that lists, in descending order of '
                      'the match, the positions of the prototypes '
                      'that best match the input pattern.',
          dataType='Real32',
          count=0,
          regionLevel=True,
          isDefaultOutput=False),

          categoryProbabilitiesOut=dict(
          description='A vector representing, for each category '
                      'index, the probability that the input to the node belongs '
                      'to that category based on the distance to the nearest '
                      'neighbor of each category.',
          dataType='Real32',
          count=0,
          regionLevel=True,
          isDefaultOutput=True),

        ),

        parameters=dict(
          learningMode=dict(
            description='Boolean (0/1) indicating whether or not a region '
                        'is in learning mode.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=1,
            accessMode='ReadWrite'),

          inferenceMode=dict(
            description='Boolean (0/1) indicating whether or not a region '
                        'is in inference mode.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=0,
            accessMode='ReadWrite'),

          acceptanceProbability=dict(
            description='During learning, inputs are learned with '
                        'probability equal to this parameter. '
                        'If set to 1.0, the default, '
                        'all inputs will be considered '
                        '(subject to other tests).',
            dataType='Real32',
            count=1,
            constraints='',
            defaultValue=1.0,
            #accessMode='Create'),
            accessMode='ReadWrite'), # and Create too

          confusion=dict(
            description='Confusion matrix accumulated during inference. '
                        'Reset with reset(). This is available to Python '
                        'client code only.',
            dataType='Handle',
            count=2,
            constraints='',
            defaultValue=None,
            accessMode='Read'),

          activeOutputCount=dict(
            description='The number of active elements in the '
                        '"categoriesOut" output.',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=0,
            accessMode='Read'),

          categoryCount=dict(
            description='An integer indicating the number of '
                        'categories that have been learned',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=None,
            accessMode='Read'),

          patternCount=dict(
            description='Number of patterns learned by the classifier.',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=None,
            accessMode='Read'),

          patternMatrix=dict(
            description='The actual patterns learned by the classifier, '
                        'returned as a matrix.',
            dataType='Handle',
            count=1,
            constraints='',
            defaultValue=None,
            accessMode='Read'),

          k=dict(
            description='The number of nearest neighbors to use '
                        'during inference.',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=1,
            accessMode='Create'),

          maxCategoryCount=dict(
            description='The maximal number of categories the '
                        'classifier will distinguish between.',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=2,
            accessMode='Create'),

          distanceNorm=dict(
            description='The norm to use for a distance metric (i.e., '
                        'the "p" in Lp-norm)',
            dataType='Real32',
            count=1,
            constraints='',
            defaultValue=2.0,
            accessMode='ReadWrite'),
            #accessMode='Create'),

          distanceMethod=dict(
            description='Method used to compute distances between inputs and'
              'prototypes. Possible options are norm, rawOverlap, '
              'pctOverlapOfLarger, and pctOverlapOfProto',
            dataType="Byte",
            count=0,
            constraints='enum: norm, rawOverlap, pctOverlapOfLarger, '
              'pctOverlapOfProto, pctOverlapOfInput',
            defaultValue='norm',
            accessMode='ReadWrite'),

          outputProbabilitiesByDist=dict(
            description='If True, categoryProbabilitiesOut is the probability of '
              'each category based on the distance to the nearest neighbor of '
              'each category. If False, categoryProbabilitiesOut is the '
              'percentage of neighbors among the top K that are of each category.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=0,
            accessMode='Create'),

          distThreshold=dict(
            description='Distance Threshold.  If a pattern that '
                        'is less than distThreshold apart from '
                        'the input pattern already exists in the '
                        'KNN memory, then the input pattern is '
                        'not added to KNN memory.',
            dataType='Real32',
            count=1,
            constraints='',
            defaultValue=0.0,
            accessMode='ReadWrite'),

          inputThresh=dict(
            description='Input binarization threshold, used if '
                        '"doBinarization" is True.',
            dataType='Real32',
            count=1,
            constraints='',
            defaultValue=0.5,
            accessMode='Create'),

          doBinarization=dict(
            description='Whether or not to binarize the input vectors.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=0,
            accessMode='Create'),

          useSparseMemory=dict(
            description='A boolean flag that determines whether or '
                        'not the KNNClassifier will use sparse Memory',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=1,
            accessMode='Create'),

          sparseThreshold=dict(
            description='If sparse memory is used, input variables '
                        'whose absolute value is less than this '
                        'threshold  will be stored as zero',
            dataType='Real32',
            count=1,
            constraints='',
            defaultValue=0.0,
            accessMode='Create'),

          relativeThreshold=dict(
            description='Whether to multiply sparseThreshold by max value '
                        ' in input',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=0,
            accessMode='Create'),

          winnerCount=dict(
            description='Only this many elements of the input are '
                       'stored. All elements are stored if 0.',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=0,
            accessMode='Create'),

          doSphering=dict(
            description='A boolean indicating whether or not data should'
              'be "sphered" (i.e. each dimension should be normalized such'
              'that its mean and variance are zero and one, respectively.) This'
              ' sphering normalization would be performed after all training '
              'samples had been received but before inference was performed. '
              'The dimension-specific normalization constants would then '
              ' be applied to all future incoming vectors prior to performing '
              ' conventional NN inference.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=0,
            accessMode='Create'),

          SVDSampleCount=dict(
            description='If not 0, carries out SVD transformation after '
                          'that many samples have been seen.',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=0,
            accessMode='Create'),

          SVDDimCount=dict(
            description='Number of dimensions to keep after SVD if greater '
                        'than 0. If set to -1 it is considered unspecified. '
                        'If set to 0 it is consider "adaptive" and the number '
                        'is chosen automatically.',
            dataType='Int32',
            count=1,
            constraints='',
            defaultValue=-1,
            accessMode='Create'),

          fractionOfMax=dict(
            description='The smallest singular value which is retained '
                        'as a fraction of the largest singular value. This is '
                        'used only if SVDDimCount==0 ("adaptive").',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=0,
            accessMode='Create'),

          useAuxiliary=dict(
            description='Whether or not the classifier should use auxiliary '
                        'input data.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=0,
            accessMode='Create'),

          justUseAuxiliary=dict(
            description='Whether or not the classifier should ONLUY use the '
                        'auxiliary input data.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=0,
            accessMode='Create'),

          clVerbosity=dict(
            description='An integer that controls the verbosity level, '
                        '0 means no verbose output, increasing integers '
                        'provide more verbosity.',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=0 ,
            accessMode='ReadWrite'),

          doSelfValidation=dict(
            description='A boolean flag that determines whether or'
                        'not the KNNClassifier should perform partitionID-based'
                        'self-validation during the finishLearning() step.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=None,
            accessMode='ReadWrite'),

          keepAllDistances=dict(
            description='Whether to store all the protoScores in an array, '
                        'rather than just the ones for the last inference. '
                        'When this parameter is changed from True to False, '
                        'all the scores are discarded except for the most '
                        'recent one.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=None,
            accessMode='ReadWrite'),

          replaceDuplicates=dict(
            description='A boolean flag that determines whether or'
                        'not the KNNClassifier should replace duplicates'
                        'during learning. This should be on when online'
                        'learning.',
            dataType='UInt32',
            count=1,
            constraints='bool',
            defaultValue=None,
            accessMode='ReadWrite'),

          cellsPerCol=dict(
            description='If >= 1, we assume the input is organized into columns, '
                        'in the same manner as the temporal pooler AND '
                        'whenever we store a new prototype, we only store the '
                        'start cell (first cell) in any column which is bursting.'
              'colum ',
            dataType='UInt32',
            count=1,
            constraints='',
            defaultValue=0,
            accessMode='Create'),

          maxStoredPatterns=dict(
            description='Limits the maximum number of the training patterns '
                        'stored. When KNN learns in a fixed capacity mode, '
                        'the unused patterns are deleted once the number '
                        'of stored patterns is greater than maxStoredPatterns'
                        'columns. [-1 is no limit] ',
            dataType='Int32',
            count=1,
            constraints='',
            defaultValue=-1,
            accessMode='Create'),
      ),
      commands=dict()
    )

    return ns


  def __init__(self,
               maxCategoryCount=0,
               bestPrototypeIndexCount=0,
               outputProbabilitiesByDist=False,
               k=1,
               distanceNorm=2.0,
               distanceMethod='norm',
               distThreshold=0.0,
               doBinarization=False,
               inputThresh=0.500,
               useSparseMemory=True,
               sparseThreshold=0.0,
               relativeThreshold=False,
               winnerCount=0,
               acceptanceProbability=1.0,
               seed=42,
               doSphering=False,
               SVDSampleCount=0,
               SVDDimCount=0,
               fractionOfMax=0,
               useAuxiliary=0,
               justUseAuxiliary=0,
               clVerbosity=0,
               doSelfValidation=False,
               replaceDuplicates=False,
               cellsPerCol=0,
               maxStoredPatterns=-1
               ):

    self.version = KNNClassifierRegion.__VERSION__

    # Convert various arguments to match the expectation
    # of the KNNClassifier
    if SVDSampleCount == 0:
      SVDSampleCount = None

    if SVDDimCount == -1:
      SVDDimCount = None
    elif SVDDimCount == 0:
      SVDDimCount = 'adaptive'

    if fractionOfMax == 0:
      fractionOfMax = None

    if useAuxiliary == 0:
      useAuxiliary = False

    if justUseAuxiliary == 0:
      justUseAuxiliary = False

    # KNN Parameters
    self.knnParams = dict(
        k=k,
        distanceNorm=distanceNorm,
        distanceMethod=distanceMethod,
        distThreshold=distThreshold,
        doBinarization=doBinarization,
        binarizationThreshold=inputThresh,
        useSparseMemory=useSparseMemory,
        sparseThreshold=sparseThreshold,
        relativeThreshold=relativeThreshold,
        numWinners=winnerCount,
        numSVDSamples=SVDSampleCount,
        numSVDDims=SVDDimCount,
        fractionOfMax=fractionOfMax,
        verbosity=clVerbosity,
        replaceDuplicates=replaceDuplicates,
        cellsPerCol=cellsPerCol,
        maxStoredPatterns=maxStoredPatterns
    )

    # Initialize internal structures
    self.outputProbabilitiesByDist = outputProbabilitiesByDist
    self.learningMode = True
    self.inferenceMode = False
    self._epoch = 0
    self.acceptanceProbability = acceptanceProbability
    self._rgen = Random(seed)
    self.confusion = numpy.zeros((1, 1))
    self.keepAllDistances = False
    self._protoScoreCount = 0
    self._useAuxiliary = useAuxiliary
    self._justUseAuxiliary = justUseAuxiliary

    # Sphering normalization
    self._doSphering = doSphering
    self._normOffset = None
    self._normScale  = None
    self._samples = None
    self._labels  = None

    # Debugging
    self.verbosity = clVerbosity

    # Boolean controlling whether or not the
    # region should perform partitionID-based
    # self-validation during the finishLearning()
    # step.
    self.doSelfValidation = doSelfValidation

    # Taps
    self._tapFileIn = None
    self._tapFileOut = None

    self._initEphemerals()

    self.maxStoredPatterns = maxStoredPatterns
    self.maxCategoryCount = maxCategoryCount
    self._bestPrototypeIndexCount = bestPrototypeIndexCount


  def _getEphemeralAttributes(self):
    """
    List of attributes to not save with serialized state.
    """
    return ['_firstComputeCall', '_accuracy', '_protoScores',
      '_categoryDistances']


  def _initEphemerals(self):
    """
    Initialize attributes that are not saved with the checkpoint.
    """
    self._firstComputeCall = True
    self._accuracy = None
    self._protoScores = None
    self._categoryDistances = None

    self._knn = KNNClassifier.KNNClassifier(**self.knnParams)

    for x in ('_partitions', '_useAuxialiary', '_doSphering',
              '_scanInfo', '_protoScores', 'doSelfValidation'):
      if not hasattr(self, x):
        setattr(self, x, None)


  def __setstate__(self, state):
    """Set state from serialized state."""

    if 'version' not in state:
      self.__dict__.update(state)
    elif state['version'] == 1:
      knnState = state['_knn_state']
      del state['_knn_state']

      self.__dict__.update(state)
      self._initEphemerals()
      self._knn.__setstate__(knnState)
    else:
      raise RuntimeError("Invalid KNNClassifierRegion version for __setstate__")

    # Set to current version
    self.version = KNNClassifierRegion.__VERSION__


  def __getstate__(self):
    """Get serializable state."""
    state = self.__dict__.copy()
    state['_knn_state'] = self._knn.__getstate__()
    del state['_knn']

    for field in self._getEphemeralAttributes():
      del state[field]
    return state


  def initialize(self, dims, splitterMaps):
    assert tuple(dims) == (1,) * len(dims)


  def _getActiveOutputCount(self):
    if self._knn._categoryList:
      return int(max(self._knn._categoryList)+1)
    else:
      return 0

  activeOutputCount = property(fget=_getActiveOutputCount)


  def _getSeenCategoryCount(self):
    return len(set(self._knn._categoryList))

  categoryCount = property(fget=_getSeenCategoryCount)


  def _getPatternMatrix(self):

    if self._knn._M is not None:
      return self._knn._M
    else:
      return self._knn._Memory


  def _getAccuracy(self):

    n = self.confusion.shape[0]
    assert n == self.confusion.shape[1], "Confusion matrix is non-square."
    return self.confusion[range(n), range(n)].sum(), self.confusion.sum()

  accuracy = property(fget=_getAccuracy)


  def clear(self):

    self._knn.clear()


  def getAlgorithmInstance(self):
    """Returns instance of the underlying KNNClassifier algorithm object."""
    return self._knn


  def getParameter(self, name, index=-1):
    """
    Get the value of the parameter.

    @param name -- the name of the parameter to retrieve, as defined
            by the Node Spec.
    """
    if name == "patternCount":
      return self._knn._numPatterns
    elif name == "patternMatrix":
      return self._getPatternMatrix()
    elif name == "k":
      return self._knn.k
    elif name == "distanceNorm":
      return self._knn.distanceNorm
    elif name == "distanceMethod":
      return self._knn.distanceMethod
    elif name == "distThreshold":
      return self._knn.distThreshold
    elif name == "inputThresh":
      return self._knn.binarizationThreshold
    elif name == "doBinarization":
      return self._knn.doBinarization
    elif name == "useSparseMemory":
      return self._knn.useSparseMemory
    elif name == "sparseThreshold":
      return self._knn.sparseThreshold
    elif name == "winnerCount":
      return self._knn.numWinners
    elif name == "relativeThreshold":
      return self._knn.relativeThreshold
    elif name == "SVDSampleCount":
      v = self._knn.numSVDSamples
      return v if v is not None else 0
    elif name == "SVDDimCount":
      v = self._knn.numSVDDims
      return v if v is not None else 0
    elif name == "fractionOfMax":
      v = self._knn.fractionOfMax
      return v if v is not None else 0
    elif name == "useAuxiliary":
      return self._useAuxiliary
    elif name == "justUseAuxiliary":
      return self._justUseAuxiliary
    elif name == "doSphering":
      return self._doSphering
    elif name == "cellsPerCol":
      return self._knn.cellsPerCol
    elif name == "maxStoredPatterns":
      return self.maxStoredPatterns
    elif name == 'categoryRecencyList':
      return self._knn._categoryRecencyList
    else:
      # If any spec parameter name is the same as an attribute, this call
      # will get it automatically, e.g. self.learningMode
      return PyRegion.getParameter(self, name, index)


  def setParameter(self, name, index, value):
    """
    Set the value of the parameter.

    @param name -- the name of the parameter to update, as defined
            by the Node Spec.
    @param value -- the value to which the parameter is to be set.
    """
    if name == "learningMode":
      if int(value) and not self.learningMode:
        self._restartLearning()
      self.learningMode = bool(int(value))
      self._epoch = 0
    elif name == "inferenceMode":
      self._epoch = 0
      if int(value) and not self.inferenceMode:
        self._finishLearning()
      self.inferenceMode = bool(int(value))
    elif name == "distanceNorm":
      self._knn.distanceNorm = value
    elif name == "distanceMethod":
      self._knn.distanceMethod = value
    elif name == "keepAllDistances":
      self.keepAllDistances = bool(value)
      if not self.keepAllDistances:
        # Discard all distances except the latest
        if self._protoScores is not None and self._protoScores.shape[0] > 1:
          self._protoScores = self._protoScores[-1,:]
        if self._protoScores is not None:
          self._protoScoreCount = 1
        else:
          self._protoScoreCount = 0
    elif name == "clVerbosity":
      self.verbosity = value
      self._knn.verbosity = value
    elif name == "doSelfValidation":
      self.doSelfValidation = value
    else:
      return PyRegion.setParameter(self, name, index, value)


  def reset(self):

    self.confusion = numpy.zeros((1, 1))


  def doInference(self, activeInput):
    """Explicitly run inference on a vector that is passed in and return the
    category id. Useful for debugging."""

    prediction, inference, allScores = self._knn.infer(activeInput)
    return inference


  def enableTap(self, tapPath):
    """
    Begin writing output tap files.

    @param tapPath -- base name of the output tap files to write.
    """

    self._tapFileIn = open(tapPath + '.in', 'w')
    self._tapFileOut = open(tapPath + '.out', 'w')


  def disableTap(self):
    """Disable writing of output tap files. """

    if self._tapFileIn is not None:
      self._tapFileIn.close()
      self._tapFileIn = None
    if self._tapFileOut is not None:
      self._tapFileOut.close()
      self._tapFileOut = None


  def handleLogInput(self, inputs):
    """Write inputs to output tap file."""

    if self._tapFileIn is not None:
      for input in inputs:
        for k in range(len(input)):
          print >> self._tapFileIn, input[k],
        print >> self._tapFileIn


  def handleLogOutput(self, output):
    """Write outputs to output tap file."""
    #raise Exception('MULTI-LINE DUMMY\nMULTI-LINE DUMMY')
    if self._tapFileOut is not None:
      for k in range(len(output)):
        print >> self._tapFileOut, output[k],
      print >> self._tapFileOut


  def _storeSample(self, inputVector, trueCatIndex, partition=0):
    """
    Store a training sample and associated category label
    """

    # If this is the first sample, then allocate a numpy array
    # of the appropriate size in which to store all samples.
    if self._samples is None:
      self._samples = numpy.zeros((0, len(inputVector)), dtype=RealNumpyDType)
      assert self._labels is None
      self._labels = []

    # Add the sample vector and category lable
    self._samples = numpy.concatenate((self._samples, numpy.atleast_2d(inputVector)), axis=0)
    self._labels += [trueCatIndex]

    # Add the parition ID
    if self._partitions is None:
      self._partitions = []
    if partition is None:
      partition = 0
    self._partitions += [partition]


  def compute(self, inputs, outputs):
    """
    Process one input sample. This method is called by the runtime engine.

    NOTE: the number of input categories may vary, but the array size is fixed
    to the max number of categories allowed (by a lower region), so "unused"
    indices of the input category array are filled with -1s.

    TODO: confusion matrix does not support multi-label classification
    """

    #raise Exception('MULTI-LINE DUMMY\nMULTI-LINE DUMMY')
    #For backward compatibility
    if self._useAuxiliary is None:
      self._useAuxiliary = False

    # If the first time being called, then print potential warning messsages
    if self._firstComputeCall:
      self._firstComputeCall = False
      if self._useAuxiliary:
        #print "\n  Auxiliary input stream from Image Sensor enabled."
        if self._justUseAuxiliary == True:
          print "  Warning: You have chosen to ignore the image data and instead just use the auxiliary data stream."


    # Format inputs
    #childInputs = [x.wvector(0) for x in inputs["bottomUpIn"]]
    #inputVector = numpy.concatenate([x.array() for x in childInputs])
    inputVector = inputs['bottomUpIn']

    # Look for auxiliary input
    if self._useAuxiliary==True:
      #auxVector = inputs['auxDataIn'][0].wvector(0).array()
      auxVector = inputs['auxDataIn']
      if auxVector.dtype != numpy.float32:
        raise RuntimeError, "KNNClassifierRegion expects numpy.float32 for the auxiliary data vector"
      if self._justUseAuxiliary == True:
        #inputVector = inputs['auxDataIn'][0].wvector(0).array()
        inputVector = inputs['auxDataIn']
      else:
        #inputVector = numpy.concatenate([inputVector, inputs['auxDataIn'][0].wvector(0).array()])
        inputVector = numpy.concatenate([inputVector, inputs['auxDataIn']])

    # Logging
    #self.handleLogInput(childInputs)
    self.handleLogInput([inputVector])

    # Read the category.
    assert "categoryIn" in inputs, "No linked category input."
    categories = inputs['categoryIn']

    # Read the partition ID.
    if "partitionIn" in inputs:
      assert len(inputs["partitionIn"]) == 1, "Must have exactly one link to partition input."
      #partInput = inputs["partitionIn"][0].wvector()
      partInput = inputs['partitionIn']
      assert len(partInput) == 1, "Partition input element count must be exactly 1."
      partition = int(partInput[0])
    else:
      partition = None


    # ---------------------------------------------------------------------
    # Inference (can be done simultaneously with learning)
    if self.inferenceMode:
      categoriesOut = outputs['categoriesOut']
      probabilitiesOut = outputs['categoryProbabilitiesOut']

      # If we are sphering, then apply normalization
      if self._doSphering:
        inputVector = (inputVector + self._normOffset) * self._normScale

      nPrototypes = 0
      if "bestPrototypeIndices" in outputs:
        #bestPrototypeIndicesOut = outputs["bestPrototypeIndices"].wvector()
        bestPrototypeIndicesOut = outputs["bestPrototypeIndices"]
        nPrototypes = len(bestPrototypeIndicesOut)

      winner, inference, protoScores, categoryDistances = \
                  self._knn.infer(inputVector, partitionId=partition)



      if not self.keepAllDistances:
        self._protoScores = protoScores
      else:
        # Keep all prototype scores in an array
        if self._protoScores is None:
          self._protoScores = numpy.zeros((1, protoScores.shape[0]),
                                          protoScores.dtype)
          self._protoScores[0,:] = protoScores#.reshape(1, protoScores.shape[0])
          self._protoScoreCount = 1
        else:
          if self._protoScoreCount == self._protoScores.shape[0]:
            # Double the size of the array
            newProtoScores = numpy.zeros((self._protoScores.shape[0] * 2,
                                          self._protoScores.shape[1]),
                                          self._protoScores.dtype)
            newProtoScores[:self._protoScores.shape[0],:] = self._protoScores
            self._protoScores = newProtoScores
          # Store the new prototype score
          self._protoScores[self._protoScoreCount,:] = protoScores
          self._protoScoreCount += 1
      self._categoryDistances = categoryDistances


      # --------------------------------------------------------------------
      # Compute the probability of each category
      if self.outputProbabilitiesByDist:
        scores = 1.0 - self._categoryDistances
      else:
        scores = inference

      # Probability is simply the scores/scores.sum()
      total = scores.sum()
      if total == 0:
        numScores = len(scores)
        probabilities = numpy.ones(numScores) / numScores
      else:
        probabilities = scores / total
      #print "probabilities:", probabilities
      #import pdb; pdb.set_trace()


      # -------------------------------------------------------------------
      # Fill the output vectors with our results
      nout = min(len(categoriesOut), len(inference))
      categoriesOut.fill(0)
      categoriesOut[0:nout] = inference[0:nout]

      probabilitiesOut.fill(0)
      probabilitiesOut[0:nout] = probabilities[0:nout]

      if self.verbosity >= 1:
        print "KNNRegion: categoriesOut: ", categoriesOut[0:nout]
        print "KNNRegion: probabilitiesOut: ", probabilitiesOut[0:nout]

      if self._scanInfo is not None:
        self._scanResults = [tuple(inference[:nout])]

      # Update the stored confusion matrix.
      for category in categories:
        if category >= 0:
          dims = max(category+1, len(inference))
          oldDims = len(self.confusion)
          if oldDims < dims:
            confusion = numpy.zeros((dims, dims))
            confusion[0:oldDims, 0:oldDims] = self.confusion
            self.confusion = confusion
          self.confusion[inference.argmax(), category] += 1

      # Calculate the best prototype indices
      if nPrototypes > 1:
        bestPrototypeIndicesOut.fill(0)
        if categoryDistances is not None:
          indices = categoryDistances.argsort()
          nout = min(len(indices), nPrototypes)
          bestPrototypeIndicesOut[0:nout] = indices[0:nout]
      elif nPrototypes == 1:
        if (categoryDistances is not None) and len(categoryDistances):
          bestPrototypeIndicesOut[0] = categoryDistances.argmin()
        else:
          bestPrototypeIndicesOut[0] = 0

      # Logging
      self.handleLogOutput(inference)

    # ---------------------------------------------------------------------
    # Learning mode
    if self.learningMode:
      if (self.acceptanceProbability < 1.0) and \
            (self._rgen.getReal64() > self.acceptanceProbability):
        pass

      else:
        # Accept the input
        for category in categories:
          if category >= 0:
            # category values of -1 are to be skipped (they are non-categories)
            if self._doSphering:
              # If we are sphering, then we can't provide the data to the KNN
              # library until we have computed per-dimension normalization
              # constants. So instead, we'll just store each training sample.
              self._storeSample(inputVector, category, partition)
            else:
              # Pass the raw training sample directly to the KNN library.
              self._knn.learn(inputVector, category, partition)

    self._epoch += 1


  def getCategoryList(self):
    """
    Public API for returning the category list
    This is a required API of the NearestNeighbor inspector.

    It returns an array which has one entry per stored prototype. The value
    of the entry is the category # of that stored prototype.
    """

    return self._knn._categoryList


  def removeCategory(self, categoryToRemove):
    return self._knn.removeCategory(categoryToRemove)


  def getLatestDistances(self):
    """
    Public API for returning the full scores
    (distance to each prototype) from the last
    compute() inference call.
    This is a required API of the NearestNeighbor inspector.

    It returns an array which has one entry per stored prototype. The value
    of the entry is distance of the most recenty inferred input from the
    stored prototype.
    """
    if self._protoScores is not None:
      if self.keepAllDistances:
        return self._protoScores[self._protoScoreCount - 1,:]
      else:
        return self._protoScores
    else:
      return None


  def getAllDistances(self):
    """
    Return all the prototype distances from all computes available.

    Like getLatestDistances, but returns all the scores if more than one set is
    available. getLatestDistances will always just return one set of scores.
    """

    if self._protoScores is None:
      return None
    return self._protoScores[:self._protoScoreCount, :]


  def calculateProbabilities(self):
    # Get the scores, from 0 to 1
    scores = 1.0 - self._categoryDistances

    # Probability is simply the score/scores.sum()
    total = scores.sum()
    if total == 0:
      numScores = len(scores)
      return numpy.ones(numScores) / numScores

    return scores / total


  def _restartLearning(self):
    """
    Currently, we allow learning mode to be "re-started" after being
    ended, but only if PCA and sphering (if any) operations have
    already been completed (for the sake of simplicity.)
    """
    self._knn.restartLearning()


  def _finishLearning(self):
    """Does nothing. Kept here for API compatibility """
    if self._doSphering:
      self._finishSphering()

    self._knn.finishLearning()

    # Compute leave-one-out validation accuracy if
    # we actually received non-trivial partition info
    self._accuracy = None
    if self.doSelfValidation:
      #partitions = self._knn._partitionIdList
      #if len(set(partitions)) > 1:
      if self._knn._partitionIdArray is not None:
        numSamples, numCorrect = self._knn.leaveOneOutTest()
        if numSamples:
          self._accuracy = float(numCorrect) / float(numSamples)
          print "Leave-one-out validation: %d of %d correct ==> %.3f%%" % \
                 (numCorrect, numSamples, self._accuracy * 100.0)


  def _finishSphering(self):
    """
    Compute normalization constants for each feature dimension
    based on the collected training samples.  Then normalize our
    training samples using these constants (so that each input
    dimension has mean and variance of zero and one, respectively.)
    Then feed these "sphered" training samples into the underlying
    SVM model.
    """

    # If we are sphering our data, we need to compute the
    # per-dimension normalization constants
    # First normalize the means (to zero)
    self._normOffset = self._samples.mean(axis=0) * -1.0
    self._samples += self._normOffset
    # Now normalize the variances (to one).  However, we need to be
    # careful because the variance could conceivably be zero for one
    # or more dimensions.
    variance = self._samples.var(axis=0)
    variance[numpy.where(variance == 0.0)] = 1.0
    self._normScale  = 1.0 / numpy.sqrt(variance)
    self._samples *= self._normScale

    # Now feed each "sphered" sample into the SVM library
    for sampleIndex in range(len(self._labels)):
      self._knn.learn(self._samples[sampleIndex],
                      self._labels[sampleIndex],
                      self._partitions[sampleIndex])


  def _arraysToLists(self, samplesArray, labelsArray):

    labelsList = list(labelsArray)
    samplesList = [[float(y) for y in x] for x in [list(x) for x in samplesArray]]
    return samplesList, labelsList


  def getOutputElementCount(self, name):
    """This method will be called only when the node is used in nuPIC 2"""
    if name == 'categoriesOut':
      return self.maxCategoryCount
    elif name == 'categoryProbabilitiesOut':
      return self.maxCategoryCount
    elif name == 'bestPrototypeIndices':
      return self._bestPrototypeIndexCount if self._bestPrototypeIndexCount else 0
    else:
      raise Exception('Unknown output: ' + name)
Beispiel #53
0
  def __init__(self,
               maxCategoryCount=0,
               bestPrototypeIndexCount=0,
               outputProbabilitiesByDist=False,
               k=1,
               distanceNorm=2.0,
               distanceMethod='norm',
               distThreshold=0.0,
               doBinarization=False,
               inputThresh=0.500,
               useSparseMemory=True,
               sparseThreshold=0.0,
               relativeThreshold=False,
               winnerCount=0,
               acceptanceProbability=1.0,
               seed=42,
               doSphering=False,
               SVDSampleCount=0,
               SVDDimCount=0,
               fractionOfMax=0,
               useAuxiliary=0,
               justUseAuxiliary=0,
               clVerbosity=0,
               doSelfValidation=False,
               replaceDuplicates=False,
               cellsPerCol=0,
               maxStoredPatterns=-1
               ):

    self.version = KNNClassifierRegion.__VERSION__

    # Convert various arguments to match the expectation
    # of the KNNClassifier
    if SVDSampleCount == 0:
      SVDSampleCount = None

    if SVDDimCount == -1:
      SVDDimCount = None
    elif SVDDimCount == 0:
      SVDDimCount = 'adaptive'

    if fractionOfMax == 0:
      fractionOfMax = None

    if useAuxiliary == 0:
      useAuxiliary = False

    if justUseAuxiliary == 0:
      justUseAuxiliary = False

    # KNN Parameters
    self.knnParams = dict(
        k=k,
        distanceNorm=distanceNorm,
        distanceMethod=distanceMethod,
        distThreshold=distThreshold,
        doBinarization=doBinarization,
        binarizationThreshold=inputThresh,
        useSparseMemory=useSparseMemory,
        sparseThreshold=sparseThreshold,
        relativeThreshold=relativeThreshold,
        numWinners=winnerCount,
        numSVDSamples=SVDSampleCount,
        numSVDDims=SVDDimCount,
        fractionOfMax=fractionOfMax,
        verbosity=clVerbosity,
        replaceDuplicates=replaceDuplicates,
        cellsPerCol=cellsPerCol,
        maxStoredPatterns=maxStoredPatterns
    )

    # Initialize internal structures
    self.outputProbabilitiesByDist = outputProbabilitiesByDist
    self.learningMode = True
    self.inferenceMode = False
    self._epoch = 0
    self.acceptanceProbability = acceptanceProbability
    self._rgen = Random(seed)
    self.confusion = numpy.zeros((1, 1))
    self.keepAllDistances = False
    self._protoScoreCount = 0
    self._useAuxiliary = useAuxiliary
    self._justUseAuxiliary = justUseAuxiliary

    # Sphering normalization
    self._doSphering = doSphering
    self._normOffset = None
    self._normScale  = None
    self._samples = None
    self._labels  = None

    # Debugging
    self.verbosity = clVerbosity

    # Boolean controlling whether or not the
    # region should perform partitionID-based
    # self-validation during the finishLearning()
    # step.
    self.doSelfValidation = doSelfValidation

    # Taps
    self._tapFileIn = None
    self._tapFileOut = None

    self._initEphemerals()

    self.maxStoredPatterns = maxStoredPatterns
    self.maxCategoryCount = maxCategoryCount
    self._bestPrototypeIndexCount = bestPrototypeIndexCount
Beispiel #54
0
class SMSequences(object):

    """
  Class generates sensorimotor sequences
  """

    def __init__(
        self,
        sensoryInputElements,
        spatialConfig,
        sensoryInputElementsPool=list("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz0123456789"),
        minDisplacement=1,
        maxDisplacement=1,
        numActiveBitsSensoryInput=9,
        numActiveBitsMotorInput=9,
        seed=42,
        verbosity=False,
        useRandomEncoder=False,
    ):
        """
    @param sensoryInputElements       (list)
        Strings or numbers representing the sensory elements that exist in your
        world. Elements can be repeated if multiple of the same exist.

    @param spatialConfig              (numpy.array)
        Array of size: (1, len(sensoryInputElements), dimension). It has a
        coordinate for every element in sensoryInputElements.

    @param sensoryInputElementsPool   (list)
        List of strings representing a readable version of all possible sensory
        elements in this world. Elements don't need to be in any order and there
        should be no duplicates. By default this contains the set of
        alphanumeric characters.

    @param maxDisplacement            (int)
        Maximum `distance` for a motor command. Distance is defined by the
        largest difference along any coordinate dimension.

    @param minDisplacement            (int)
        Minimum `distance` for a motor command. Distance is defined by the
        largest difference along any coordinate dimension.

    @param numActiveBitsSensoryInput  (int)
        Number of active bits for each sensory input.

    @param numActiveBitsMotorInput    (int)
        Number of active bits for each dimension of the motor input.

    @param seed                       (int)
        Random seed for nupic.bindings.Random.

    @param verbosity                  (int)
        Verbosity

    @param useRandomEncoder           (boolean)
        if True, use the random encoder SDRCategoryEncoder. If False,
        use CategoryEncoder. CategoryEncoder encodes categories using contiguous
        non-overlapping bits for each category, which makes it easier to debug.
    """

        # ---------------------------------------------------------------------------------
        # Store creation parameters
        self.sensoryInputElements = sensoryInputElements
        self.sensoryInputElementsPool = sensoryInputElementsPool
        self.spatialConfig = spatialConfig.astype(int)
        self.spatialLength = len(spatialConfig)
        self.maxDisplacement = maxDisplacement
        self.minDisplacement = minDisplacement
        self.numActiveBitsSensoryInput = numActiveBitsSensoryInput
        self.numActiveBitsMotorInput = numActiveBitsMotorInput
        self.verbosity = verbosity
        self.seed = seed

        self.initialize(useRandomEncoder)

    def initialize(self, useRandomEncoder):
        """
    Initialize the various data structures.
    """
        self.setRandomSeed(self.seed)

        self.dim = numpy.shape(self.spatialConfig)[-1]

        self.spatialMap = dict(zip(map(tuple, list(self.spatialConfig)), self.sensoryInputElements))

        self.lengthMotorInput1D = (2 * self.maxDisplacement + 1) * self.numActiveBitsMotorInput

        uniqueSensoryElements = list(set(self.sensoryInputElementsPool))

        if useRandomEncoder:
            self.sensoryEncoder = SDRCategoryEncoder(
                n=1024, w=self.numActiveBitsSensoryInput, categoryList=uniqueSensoryElements, forced=True
            )
            self.lengthSensoryInput = self.sensoryEncoder.getWidth()

        else:
            self.lengthSensoryInput = (len(self.sensoryInputElementsPool) + 1) * self.numActiveBitsSensoryInput

            self.sensoryEncoder = CategoryEncoder(
                w=self.numActiveBitsSensoryInput, categoryList=uniqueSensoryElements, forced=True
            )

        motorEncoder1D = ScalarEncoder(
            n=self.lengthMotorInput1D,
            w=self.numActiveBitsMotorInput,
            minval=-self.maxDisplacement,
            maxval=self.maxDisplacement,
            clipInput=True,
            forced=True,
        )

        self.motorEncoder = VectorEncoder(length=self.dim, encoder=motorEncoder1D)

    def generateSensorimotorSequence(self, sequenceLength):
        """
    Generate sensorimotor sequences of length sequenceLength.

    @param sequenceLength (int)
        Length of the sensorimotor sequence.

    @return (tuple) Contains:
            sensorySequence       (list)
                Encoded sensory input for whole sequence.

            motorSequence         (list)
                Encoded motor input for whole sequence.

            sensorimotorSequence  (list)
                Encoder sensorimotor input for whole sequence. This is useful
                when you want to give external input to temporal memory.
    """
        motorSequence = []
        sensorySequence = []
        sensorimotorSequence = []
        currentEyeLoc = self.nupicRandomChoice(self.spatialConfig)

        for i in xrange(sequenceLength):

            currentSensoryInput = self.spatialMap[tuple(currentEyeLoc)]

            nextEyeLoc, currentEyeV = self.getNextEyeLocation(currentEyeLoc)

            if self.verbosity:
                print "sensory input = ", currentSensoryInput, "eye location = ", currentEyeLoc, " motor command = ", currentEyeV

            sensoryInput = self.encodeSensoryInput(currentSensoryInput)
            motorInput = self.encodeMotorInput(list(currentEyeV))
            sensorimotorInput = numpy.concatenate((sensoryInput, motorInput))

            sensorySequence.append(sensoryInput)
            motorSequence.append(motorInput)
            sensorimotorSequence.append(sensorimotorInput)

            currentEyeLoc = nextEyeLoc

        return (sensorySequence, motorSequence, sensorimotorSequence)

    def encodeSensorimotorSequence(self, eyeLocs):
        """
    Encode sensorimotor sequence given the eye movements. Sequence will have
    length len(eyeLocs) - 1 because only the differences of eye locations can be
    used to encoder motor commands.

    @param eyeLocs  (list)
        Numpy coordinates describing where the eye is looking.

    @return (tuple) Contains:
            sensorySequence       (list)
                Encoded sensory input for whole sequence.

            motorSequence         (list)
                Encoded motor input for whole sequence.

            sensorimotorSequence  (list)
                Encoder sensorimotor input for whole sequence. This is useful
                when you want to give external input to temporal memory.
    """
        sequenceLength = len(eyeLocs) - 1

        motorSequence = []
        sensorySequence = []
        sensorimotorSequence = []

        for i in xrange(sequenceLength):
            currentEyeLoc = eyeLocs[i]
            nextEyeLoc = eyeLocs[i + 1]

            currentSensoryInput = self.spatialMap[currentEyeLoc]

            currentEyeV = nextEyeLoc - currentEyeLoc

            if self.verbosity:
                print "sensory input = ", currentSensoryInput, "eye location = ", currentEyeLoc, " motor command = ", currentEyeV

            sensoryInput = self.encodeSensoryInput(currentSensoryInput)
            motorInput = self.encodeMotorInput(list(currentEyeV))
            sensorimotorInput = numpy.concatenate((sensoryInput, motorInput))

            sensorySequence.append(sensoryInput)
            motorSequence.append(motorInput)
            sensorimotorSequence.append(sensorimotorInput)

        return (sensorySequence, motorSequence, sensorimotorSequence)

    def getNextEyeLocation(self, currentEyeLoc):
        """
    Generate next eye location based on current eye location.

    @param currentEyeLoc (numpy.array)
        Current coordinate describing the eye location in the world.

    @return (tuple) Contains:
            nextEyeLoc  (numpy.array)
                Coordinate of the next eye location.

            eyeDiff     (numpy.array)
                Vector describing change from currentEyeLoc to nextEyeLoc.
    """
        possibleEyeLocs = []
        for loc in self.spatialConfig:
            shift = abs(max(loc - currentEyeLoc))
            if self.minDisplacement <= shift <= self.maxDisplacement:
                possibleEyeLocs.append(loc)

        nextEyeLoc = self.nupicRandomChoice(possibleEyeLocs)

        eyeDiff = nextEyeLoc - currentEyeLoc

        return nextEyeLoc, eyeDiff

    def setRandomSeed(self, seed):
        """
    Reset the nupic random generator. This is necessary to reset random seed to
    generate new sequences.

    @param seed       (int)
        Seed for nupic.bindings.Random.
    """
        self.seed = seed
        self._random = Random()
        self._random.setSeed(seed)

    def nupicRandomChoice(self, array):
        """
    Chooses a random element from an array using the nupic random number
    generator.

    @param array  (list or numpy.array)
        Array to choose random element from.

    @return       (element)
        Element chosen at random.
    """
        return array[self._random.getUInt32(len(array))]

    def encodeMotorInput(self, motorInput):
        """
    Encode motor command to bit vector.

    @param motorInput (1D numpy.array)
        Motor command to be encoded.

    @return           (1D numpy.array)
        Encoded motor command.
    """
        if not hasattr(motorInput, "__iter__"):
            motorInput = list([motorInput])

        return self.motorEncoder.encode(motorInput)

    def decodeMotorInput(self, motorInputPattern):
        """
    Decode motor command from bit vector.

    @param motorInputPattern (1D numpy.array)
        Encoded motor command.

    @return                  (1D numpy.array)
        Decoded motor command.

    """
        key = self.motorEncoder.decode(motorInputPattern)[0].keys()[0]
        motorCommand = self.motorEncoder.decode(motorInputPattern)[0][key][1][0]
        return motorCommand

    def encodeSensoryInput(self, sensoryInputElement):
        """
    Encode sensory input to bit vector

    @param sensoryElement (1D numpy.array)
        Sensory element to be encoded.

    @return               (1D numpy.array)
        Encoded sensory element.
    """
        return self.sensoryEncoder.encode(sensoryInputElement)

    def decodeSensoryInput(self, sensoryInputPattern):
        """
    Decode sensory input from bit vector.

    @param sensoryInputPattern  (1D numpy.array)
        Encoded sensory element.

    @return                     (1D numpy.array)
        Decoded sensory element.
    """
        return self.sensoryEncoder.decode(sensoryInputPattern)[0]["category"][1]

    def printSensoryCodingScheme(self):
        """
    Print sensory inputs along with their encoded versions.
    """
        print "\nsensory coding scheme: "
        for loc in self.spatialConfig:
            sensoryElement = self.spatialMap[tuple(loc)]
            print sensoryElement, "%s : " % loc,
            printSequence(self.encodeSensoryInput(sensoryElement))

    def printMotorCodingScheme(self):
        """
    Print motor commands (displacement vector) along with their encoded
    versions.
    """
        print "\nmotor coding scheme: "
        self.build(self.dim, [])

    def build(self, n, vec):
        """
    Recursive function to help print motor coding scheme.
    """
        for i in range(-self.maxDisplacement, self.maxDisplacement + 1):
            next = vec + [i]
            if n == 1:
                print "{:>5}\t".format(next), " = ",
                printSequence(self.encodeMotorInput(next))
            else:
                self.build(n - 1, next)
class TemporalMemory(object):
  """
  Class implementing the Temporal Memory algorithm.
  """

  def __init__(self,
               columnDimensions=(2048,),
               cellsPerColumn=32,
               activationThreshold=13,
               initialPermanence=0.21,
               connectedPermanence=0.50,
               minThreshold=10,
               maxNewSynapseCount=20,
               permanenceIncrement=0.10,
               permanenceDecrement=0.10,
               predictedSegmentDecrement=0.0,
               maxSegmentsPerCell=255,
               maxSynapsesPerSegment=255,
               seed=42,
               **kwargs):
    """
    @param columnDimensions          (list)  Dimensions of the column space
    @param cellsPerColumn            (int)   Number of cells per column
    @param activationThreshold       (int)   If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active.
    @param initialPermanence         (float) Initial permanence of a new synapse.
    @param connectedPermanence       (float) If the permanence value for a synapse is greater than this value, it is said to be connected.
    @param minThreshold              (int)   If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column.
    @param maxNewSynapseCount        (int)   The maximum number of synapses added to a segment during learning.
    @param permanenceIncrement       (float) Amount by which permanences of synapses are incremented during learning.
    @param permanenceDecrement       (float) Amount by which permanences of synapses are decremented during learning.
    @param predictedSegmentDecrement (float) Amount by which active permanences of synapses of previously predicted but inactive segments are decremented.
    @param seed                      (int)   Seed for the random number generator.
    Notes:
    predictedSegmentDecrement: A good value is just a bit larger than
    (the column-level sparsity * permanenceIncrement). So, if column-level
    sparsity is 2% and permanenceIncrement is 0.01, this parameter should be
    something like 4% * 0.01 = 0.0004).
    """
    # Error checking
    if not len(columnDimensions):
      raise ValueError("Number of column dimensions must be greater than 0")

    if not cellsPerColumn > 0:
      raise ValueError("Number of cells per column must be greater than 0")

    # TODO: Validate all parameters (and add validation tests)

    # Save member variables
    self.columnDimensions = columnDimensions
    self.cellsPerColumn = cellsPerColumn
    self.activationThreshold = activationThreshold
    self.initialPermanence = initialPermanence
    self.connectedPermanence = connectedPermanence
    self.minThreshold = minThreshold
    self.maxNewSynapseCount = maxNewSynapseCount
    self.permanenceIncrement = permanenceIncrement
    self.permanenceDecrement = permanenceDecrement
    self.predictedSegmentDecrement = predictedSegmentDecrement
    # Initialize member variables
    self.connections = Connections(self.numberOfCells(),
                                   maxSegmentsPerCell=maxSegmentsPerCell,
                                   maxSynapsesPerSegment=maxSynapsesPerSegment)
    self._random = Random(seed)

    self.activeCells = set()
    self.predictiveCells = set()
    self.activeSegments = set()
    self.winnerCells = set()
    self.matchingSegments = set()
    self.matchingCells = set()

  # ==============================
  # Main functions
  # ==============================

  def compute(self, activeColumns, learn=True):
    """
    Feeds input record through TM, performing inference and learning.
    @param activeColumns (set)  Indices of active columns
    @param learn         (bool) Whether or not learning is enabled
    Updates member variables:
      - `activeCells`     (set)
      - `winnerCells`     (set)
      - `activeSegments`  (set)
      - `predictiveCells` (set)
      - `matchingSegments`(set)
      - `matchingCells`   (set)
    """
    prevPredictiveCells = self.predictiveCells
    prevActiveSegments = self.activeSegments
    prevActiveCells = self.activeCells
    prevWinnerCells = self.winnerCells
    prevMatchingSegments = self.matchingSegments
    prevMatchingCells = self.matchingCells

    activeCells = set()
    winnerCells = set()

    (_activeCells,
     _winnerCells,
     predictedActiveColumns,
     predictedInactiveCells) = self.activateCorrectlyPredictiveCells(
       prevPredictiveCells,
       prevMatchingCells,
       activeColumns)

    activeCells.update(_activeCells)
    winnerCells.update(_winnerCells)

    (_activeCells,
     _winnerCells,
     learningSegments) = self.burstColumns(activeColumns,
                                           predictedActiveColumns,
                                           prevActiveCells,
                                           prevWinnerCells,
                                           self.connections)

    activeCells.update(_activeCells)
    winnerCells.update(_winnerCells)

    if learn:
      self.learnOnSegments(prevActiveSegments,
                           learningSegments,
                           prevActiveCells,
                           winnerCells,
                           prevWinnerCells,
                           self.connections,
                           predictedInactiveCells,
                           prevMatchingSegments)

    (activeSegments,
     predictiveCells,
     matchingSegments,
     matchingCells) = self.computePredictiveCells(activeCells, self.connections)

    self.activeCells = activeCells
    self.winnerCells = winnerCells
    self.activeSegments = activeSegments
    self.predictiveCells = predictiveCells
    self.matchingSegments = matchingSegments
    self.matchingCells = matchingCells


  def reset(self):
    """
    Indicates the start of a new sequence. Resets sequence state of the TM.
    """
    self.activeCells = set()
    self.predictiveCells = set()
    self.activeSegments = set()
    self.winnerCells = set()


  # ==============================
  # Phases
  # ==============================

  def activateCorrectlyPredictiveCells(self,
                                       prevPredictiveCells,
                                       prevMatchingCells,
                                       activeColumns):
    """
    Phase 1: Activate the correctly predictive cells.
    Pseudocode:
      - for each prev predictive cell
        - if in active column
          - mark it as active
          - mark it as winner cell
          - mark column as predicted => active
        - if not in active column
          - mark it as an predicted but inactive cell
    @param prevPredictiveCells (set) Indices of predictive cells in `t-1`
    @param activeColumns       (set) Indices of active columns in `t`
    @return (tuple) Contains:
                      `activeCells`               (set),
                      `winnerCells`               (set),
                      `predictedActiveColumns`    (set),
                      `predictedInactiveCells`    (set)
    """
    activeCells = set()
    winnerCells = set()
    predictedActiveColumns = set()
    predictedInactiveCells = set()

    for cell in prevPredictiveCells:
      column = self.columnForCell(cell)

      if column in activeColumns:
        activeCells.add(cell)
        winnerCells.add(cell)
        predictedActiveColumns.add(column)

    if self.predictedSegmentDecrement > 0:
      for cell in prevMatchingCells:
        column = self.columnForCell(cell)

        if column not in activeColumns:
          predictedInactiveCells.add(cell)

    return (activeCells,
            winnerCells,
            predictedActiveColumns,
            predictedInactiveCells)


  def burstColumns(self,
                   activeColumns,
                   predictedActiveColumns,
                   prevActiveCells,
                   prevWinnerCells,
                   connections):
    """
    Phase 2: Burst unpredicted columns.
    Pseudocode:
      - for each unpredicted active column
        - mark all cells as active
        - mark the best matching cell as winner cell
          - (learning)
            - if it has no matching segment
              - (optimization) if there are prev winner cells
                - add a segment to it
            - mark the segment as learning
    @param activeColumns                   (set)         Indices of active columns in `t`
    @param predictedActiveColumns          (set)         Indices of predicted => active columns in `t`
    @param prevActiveCells                 (set)         Indices of active cells in `t-1`
    @param prevWinnerCells                 (set)         Indices of winner cells in `t-1`
    @param connections                     (Connections) Connectivity of layer
    @return (tuple) Contains:
                      `activeCells`      (set),
                      `winnerCells`      (set),
                      `learningSegments` (set)
    """
    activeCells = set()
    winnerCells = set()
    learningSegments = set()

    unpredictedActiveColumns = activeColumns - predictedActiveColumns

    # Sort unpredictedActiveColumns before iterating for compatibility with C++
    for column in sorted(unpredictedActiveColumns):
      cells = self.cellsForColumn(column)
      activeCells.update(cells)

      (bestCell,
       bestSegment) = self.bestMatchingCell(cells,
                                            prevActiveCells,
                                            connections)
      winnerCells.add(bestCell)

      if bestSegment is None and len(prevWinnerCells):
        bestSegment = connections.createSegment(bestCell)

      if bestSegment is not None:
        learningSegments.add(bestSegment)

    return activeCells, winnerCells, learningSegments


  def learnOnSegments(self,
                      prevActiveSegments,
                      learningSegments,
                      prevActiveCells,
                      winnerCells,
                      prevWinnerCells,
                      connections,
                      predictedInactiveCells,
                      prevMatchingSegments):
    """
    Phase 3: Perform learning by adapting segments.
    Pseudocode:
      - (learning) for each prev active or learning segment
        - if learning segment or from winner cell
          - strengthen active synapses
          - weaken inactive synapses
        - if learning segment
          - add some synapses to the segment
            - subsample from prev winner cells
      - if predictedSegmentDecrement > 0
        - for each previously matching segment
          - if cell is a predicted inactive cell
            - weaken active synapses but don't touch inactive synapses
    @param prevActiveSegments           (set)         Indices of active segments in `t-1`
    @param learningSegments             (set)         Indices of learning segments in `t`
    @param prevActiveCells              (set)         Indices of active cells in `t-1`
    @param winnerCells                  (set)         Indices of winner cells in `t`
    @param prevWinnerCells              (set)         Indices of winner cells in `t-1`
    @param connections                  (Connections) Connectivity of layer
    @param predictedInactiveCells       (set)         Indices of predicted inactive cells
    @param prevMatchingSegments         (set)         Indices of segments with
    """
    segments = prevActiveSegments | learningSegments

    # Sort segments before iterating for compatibility with C++
    # Sort with primary key = cell idx, secondary key = segment idx
    segments = sorted(
      segments,
      key=lambda segment: (connections.cellForSegment(segment), segment))

    for segment in segments:
      isLearningSegment = segment in learningSegments
      isFromWinnerCell = connections.cellForSegment(segment) in winnerCells

      activeSynapses = self.activeSynapsesForSegment(
        segment, prevActiveCells, connections)

      if isLearningSegment or isFromWinnerCell:
        self.adaptSegment(segment, activeSynapses, connections,
                          self.permanenceIncrement,
                          self.permanenceDecrement)

      if isLearningSegment:
        n = self.maxNewSynapseCount - len(activeSynapses)
        # Fix for NUP #3268 is commented out for now until test failures are
        # addressed.
        # n = min(self.maxNewSynapseCount,
        #         connections.maxSynapsesPerSegment
        #         - len(connections.synapsesForSegment(segment)))

        for presynapticCell in self.pickCellsToLearnOn(n,
                                                       segment,
                                                       prevWinnerCells,
                                                       connections):
          connections.createSynapse(segment,
                                    presynapticCell,
                                    self.initialPermanence)

    if self.predictedSegmentDecrement > 0:
      for segment in prevMatchingSegments:
        isPredictedInactiveCell = connections.cellForSegment(segment) in predictedInactiveCells
        activeSynapses = self.activeSynapsesForSegment(
          segment, prevActiveCells, connections)

        if isPredictedInactiveCell:
          self.adaptSegment(segment, activeSynapses, connections,
                            -self.predictedSegmentDecrement,
                            0.0)



  def computePredictiveCells(self, activeCells, connections):
    """
    Phase 4: Compute predictive cells due to lateral input
    on distal dendrites.
    Pseudocode:
      - for each distal dendrite segment with activity >= activationThreshold
        - mark the segment as active
        - mark the cell as predictive
      - if predictedSegmentDecrement > 0
        - for each distal dendrite segment with unconnected
          activity >=  minThreshold
          - mark the segment as matching
          - mark the cell as matching
    Forward propagates activity from active cells to the synapses that touch
    them, to determine which synapses are active.
    @param activeCells (set)         Indices of active cells in `t`
    @param connections (Connections) Connectivity of layer
    @return (tuple) Contains:
                      `activeSegments`  (set),
                      `predictiveCells` (set),
                      `matchingSegments` (set),
                      `matchingCells`    (set)
    """
    numActiveConnectedSynapsesForSegment = defaultdict(int)
    numActiveSynapsesForSegment = defaultdict(int)
    activeSegments = set()
    predictiveCells = set()

    matchingSegments = set()
    matchingCells = set()

    for cell in activeCells:
      for synapseData in connections.synapsesForPresynapticCell(cell).values():
        segment = synapseData.segment
        permanence = synapseData.permanence

        if permanence >= self.connectedPermanence:
          numActiveConnectedSynapsesForSegment[segment] += 1

          if (numActiveConnectedSynapsesForSegment[segment] >=
              self.activationThreshold):
            activeSegments.add(segment)
            predictiveCells.add(connections.cellForSegment(segment))

        if permanence > 0 and self.predictedSegmentDecrement > 0:
          numActiveSynapsesForSegment[segment] += 1

          if numActiveSynapsesForSegment[segment] >= self.minThreshold:
            matchingSegments.add(segment)
            matchingCells.add(connections.cellForSegment(segment))

    return activeSegments, predictiveCells, matchingSegments, matchingCells


  # ==============================
  # Helper functions
  # ==============================

  def bestMatchingCell(self, cells, activeCells, connections):
    """
    Gets the cell with the best matching segment
    (see `TM.bestMatchingSegment`) that has the largest number of active
    synapses of all best matching segments.
    If none were found, pick the least used cell (see `TM.leastUsedCell`).
    @param cells                       (set)         Indices of cells
    @param activeCells                 (set)         Indices of active cells
    @param connections                 (Connections) Connectivity of layer
    @return (tuple) Contains:
                      `cell`        (int),
                      `bestSegment` (int)
    """
    maxSynapses = 0
    bestCell = None
    bestSegment = None

    for cell in cells:
      segment, numActiveSynapses = self.bestMatchingSegment(
        cell, activeCells, connections)

      if segment is not None and numActiveSynapses > maxSynapses:
        maxSynapses = numActiveSynapses
        bestCell = cell
        bestSegment = segment

    if bestCell is None:
      bestCell = self.leastUsedCell(cells, connections)

    return bestCell, bestSegment


  def bestMatchingSegment(self, cell, activeCells, connections):
    """
    Gets the segment on a cell with the largest number of activate synapses,
    including all synapses with non-zero permanences.
    @param cell                        (int)         Cell index
    @param activeCells                 (set)         Indices of active cells
    @param connections                 (Connections) Connectivity of layer
    @return (tuple) Contains:
                      `segment`                 (int),
                      `connectedActiveSynapses` (set)
    """
    maxSynapses = self.minThreshold
    bestSegment = None
    bestNumActiveSynapses = None

    for segment in connections.segmentsForCell(cell):
      numActiveSynapses = 0

      for synapse in connections.synapsesForSegment(segment):
        synapseData = connections.dataForSynapse(synapse)
        if ( (synapseData.presynapticCell in activeCells) and
            synapseData.permanence > 0):
          numActiveSynapses += 1

      if numActiveSynapses >= maxSynapses:
        maxSynapses = numActiveSynapses
        bestSegment = segment
        bestNumActiveSynapses = numActiveSynapses

    return bestSegment, bestNumActiveSynapses


  def leastUsedCell(self, cells, connections):
    """
    Gets the cell with the smallest number of segments.
    Break ties randomly.
    @param cells       (set)         Indices of cells
    @param connections (Connections) Connectivity of layer
    @return (int) Cell index
    """
    leastUsedCells = set()
    minNumSegments = float("inf")

    for cell in cells:
      numSegments = len(connections.segmentsForCell(cell))

      if numSegments < minNumSegments:
        minNumSegments = numSegments
        leastUsedCells = set()

      if numSegments == minNumSegments:
        leastUsedCells.add(cell)

    i = self._random.getUInt32(len(leastUsedCells))
    return sorted(leastUsedCells)[i]


  @staticmethod
  def activeSynapsesForSegment(segment, activeCells, connections):
    """
    Returns the synapses on a segment that are active due to lateral input
    from active cells.
    @param segment     (int)         Segment index
    @param activeCells (set)         Indices of active cells
    @param connections (Connections) Connectivity of layer
    @return (set) Indices of active synapses on segment
    """
    synapses = set()

    for synapse in connections.synapsesForSegment(segment):
      synapseData = connections.dataForSynapse(synapse)

      if synapseData.presynapticCell in activeCells:
        synapses.add(synapse)

    return synapses


  def adaptSegment(self, segment, activeSynapses, connections,
                   permanenceIncrement, permanenceDecrement):
    """
    Updates synapses on segment.
    Strengthens active synapses; weakens inactive synapses.
    @param segment              (int)         Segment index
    @param activeSynapses       (set)         Indices of active synapses
    @param connections          (Connections) Connectivity of layer
    @param permanenceIncrement  (float)  Amount to increment active synapses
    @param permanenceDecrement  (float)  Amount to decrement inactive synapses
    """
    # Need to copy synapses for segment set below because it will be modified
    # during iteration by `destroySynapse`
    for synapse in set(connections.synapsesForSegment(segment)):
      synapseData = connections.dataForSynapse(synapse)
      permanence = synapseData.permanence

      if synapse in activeSynapses:
        permanence += permanenceIncrement
      else:
        permanence -= permanenceDecrement

      # Keep permanence within min/max bounds
      permanence = max(0.0, min(1.0, permanence))

      if (permanence < EPSILON):
        connections.destroySynapse(synapse)
      else:
        connections.updateSynapsePermanence(synapse, permanence)


  def pickCellsToLearnOn(self, n, segment, winnerCells, connections):
    """
    Pick cells to form distal connections to.
    TODO: Respect topology and learningRadius
    @param n           (int)         Number of cells to pick
    @param segment     (int)         Segment index
    @param winnerCells (set)         Indices of winner cells in `t`
    @param connections (Connections) Connectivity of layer
    @return (set) Indices of cells picked
    """
    candidates = set(winnerCells)

    # Remove cells that are already synapsed on by this segment
    for synapse in connections.synapsesForSegment(segment):
      synapseData = connections.dataForSynapse(synapse)
      presynapticCell = synapseData.presynapticCell

      if presynapticCell in candidates:
        candidates.remove(presynapticCell)

    n = min(n, len(candidates))
    candidates = sorted(candidates)
    cells = set()

    # Pick n cells randomly
    for _ in range(n):
      i = self._random.getUInt32(len(candidates))
      cells.add(candidates[i])
      del candidates[i]

    return cells


  def columnForCell(self, cell):
    """
    Returns the index of the column that a cell belongs to.
    @param cell (int) Cell index
    @return (int) Column index
    """
    self._validateCell(cell)

    return int(cell / self.cellsPerColumn)


  def cellsForColumn(self, column):
    """
    Returns the indices of cells that belong to a column.
    @param column (int) Column index
    @return (set) Cell indices
    """
    self._validateColumn(column)

    start = self.cellsPerColumn * self.getCellIndex(column)
    end = start + self.cellsPerColumn
    return set(xrange(start, end))


  def numberOfColumns(self):
    """
    Returns the number of columns in this layer.
    @return (int) Number of columns
    """
    return reduce(mul, self.columnDimensions, 1)


  def numberOfCells(self):
    """
    Returns the number of cells in this layer.
    @return (int) Number of cells
    """
    return self.numberOfColumns() * self.cellsPerColumn


  def getActiveCells(self):
    """
    Returns the indices of the active cells.
    @return (list) Indices of active cells.
    """
    return self.getCellIndices(self.activeCells)


  def getPredictiveCells(self):
    """
    Returns the indices of the predictive cells.
    @return (list) Indices of predictive cells.
    """
    return self.getCellIndices(self.predictiveCells)


  def getWinnerCells(self):
    """
    Returns the indices of the winner cells.
    @return (list) Indices of winner cells.
    """
    return self.getCellIndices(self.winnerCells)


  def getMatchingCells(self):
    """
    Returns the indices of the matching cells.
    @return (list) Indices of matching cells.
    """
    return self.getCellIndices(self.matchingCells)


  def getColumnDimensions(self):
    """
    Returns the dimensions of the columns in the region.
    @return (tuple) Column dimensions
    """
    return self.columnDimensions


  def getCellsPerColumn(self):
    """
    Returns the number of cells per column.
    @return (int) The number of cells per column.
    """
    return self.cellsPerColumn


  def getActivationThreshold(self):
    """
    Returns the activation threshold.
    @return (int) The activation threshold.
    """
    return self.activationThreshold


  def setActivationThreshold(self, activationThreshold):
    """
    Sets the activation threshold.
    @param activationThreshold (int) activation threshold.
    """
    self.activationThreshold = activationThreshold


  def getInitialPermanence(self):
    """
    Get the initial permanence.
    @return (float) The initial permanence.
    """
    return self.initialPermanence


  def setInitialPermanence(self, initialPermanence):
    """
    Sets the initial permanence.
    @param initialPermanence (float) The initial permanence.
    """
    self.initialPermanence = initialPermanence


  def getMinThreshold(self):
    """
    Returns the min threshold.
    @return (int) The min threshold.
    """
    return self.minThreshold


  def setMinThreshold(self, minThreshold):
    """
    Sets the min threshold.
    @param minThreshold (int) min threshold.
    """
    self.minThreshold = minThreshold


  def getMaxNewSynapseCount(self):
    """
    Returns the max new synapse count.
    @return (int) The max new synapse count.
    """
    return self.maxNewSynapseCount


  def setMaxNewSynapseCount(self, maxNewSynapseCount):
    """
    Sets the max new synapse count.
    @param maxNewSynapseCount (int) Max new synapse count.
    """
    self.maxNewSynapseCount = maxNewSynapseCount


  def getPermanenceIncrement(self):
    """
    Get the permanence increment.
    @return (float) The permanence increment.
    """
    return self.permanenceIncrement


  def setPermanenceIncrement(self, permanenceIncrement):
    """
    Sets the permanence increment.
    @param permanenceIncrement (float) The permanence increment.
    """
    self.permanenceIncrement = permanenceIncrement


  def getPermanenceDecrement(self):
    """
    Get the permanence decrement.
    @return (float) The permanence decrement.
    """
    return self.permanenceDecrement


  def setPermanenceDecrement(self, permanenceDecrement):
    """
    Sets the permanence decrement.
    @param permanenceDecrement (float) The permanence decrement.
    """
    self.permanenceDecrement = permanenceDecrement


  def getPredictedSegmentDecrement(self):
    """
    Get the predicted segment decrement.
    @return (float) The predicted segment decrement.
    """
    return self.predictedSegmentDecrement


  def setPredictedSegmentDecrement(self, predictedSegmentDecrement):
    """
    Sets the predicted segment decrement.
    @param predictedSegmentDecrement (float) The predicted segment decrement.
    """
    self.predictedSegmentDecrement = predictedSegmentDecrement


  def getConnectedPermanence(self):
    """
    Get the connected permanence.
    @return (float) The connected permanence.
    """
    return self.connectedPermanence


  def setConnectedPermanence(self, connectedPermanence):
    """
    Sets the connected permanence.
    @param connectedPermanence (float) The connected permanence.
    """
    self.connectedPermanence = connectedPermanence


  def mapCellsToColumns(self, cells):
    """
    Maps cells to the columns they belong to
    @param cells (set) Cells
    @return (dict) Mapping from columns to their cells in `cells`
    """
    cellsForColumns = defaultdict(set)

    for cell in cells:
      column = self.columnForCell(cell)
      cellsForColumns[column].add(cell)

    return cellsForColumns


  def write(self, proto):
    """
    Writes serialized data to proto object
    @param proto (DynamicStructBuilder) Proto object
    """
    proto.columnDimensions = self.columnDimensions
    proto.cellsPerColumn = self.cellsPerColumn
    proto.activationThreshold = self.activationThreshold
    proto.initialPermanence = self.initialPermanence
    proto.connectedPermanence = self.connectedPermanence
    proto.minThreshold = self.minThreshold
    proto.maxNewSynapseCount = self.maxNewSynapseCount
    proto.permanenceIncrement = self.permanenceIncrement
    proto.permanenceDecrement = self.permanenceDecrement
    proto.predictedSegmentDecrement = self.predictedSegmentDecrement

    self.connections.write(proto.connections)
    self._random.write(proto.random)

    proto.activeCells = list(self.activeCells)
    proto.predictiveCells = list(self.predictiveCells)
    proto.activeSegments = list(self.activeSegments)
    proto.winnerCells = list(self.winnerCells)
    proto.matchingSegments = list(self.matchingSegments)
    proto.matchingCells = list(self.matchingCells)


  @classmethod
  def read(cls, proto):
    """
    Reads deserialized data from proto object
    @param proto (DynamicStructBuilder) Proto object
    @return (TemporalMemory) TemporalMemory instance
    """
    tm = object.__new__(cls)

    tm.columnDimensions = list(proto.columnDimensions)
    tm.cellsPerColumn = int(proto.cellsPerColumn)
    tm.activationThreshold = int(proto.activationThreshold)
    tm.initialPermanence = proto.initialPermanence
    tm.connectedPermanence = proto.connectedPermanence
    tm.minThreshold = int(proto.minThreshold)
    tm.maxNewSynapseCount = int(proto.maxNewSynapseCount)
    tm.permanenceIncrement = proto.permanenceIncrement
    tm.permanenceDecrement = proto.permanenceDecrement
    tm.predictedSegmentDecrement = proto.predictedSegmentDecrement

    tm.connections = Connections.read(proto.connections)
    tm._random = Random()
    tm._random.read(proto.random)

    tm.activeCells = set([int(x) for x in proto.activeCells])
    tm.predictiveCells = set([int(x) for x in proto.predictiveCells])
    tm.activeSegments = set([int(x) for x in proto.activeSegments])
    tm.winnerCells = set([int(x) for x in proto.winnerCells])
    tm.matchingSegments = set([int(x) for x in proto.matchingSegments])
    tm.matchingCells = set([int(x) for x in proto.matchingCells])

    return tm


  def __eq__(self, other):
    """
    Equality operator for TemporalMemory instances.
    Checks if two instances are functionally identical
    (might have different internal state).
    @param other (TemporalMemory) TemporalMemory instance to compare to
    """
    if self.columnDimensions != other.columnDimensions: return False
    if self.cellsPerColumn != other.cellsPerColumn: return False
    if self.activationThreshold != other.activationThreshold: return False
    if abs(self.initialPermanence - other.initialPermanence) > EPSILON:
      return False
    if abs(self.connectedPermanence - other.connectedPermanence) > EPSILON:
      return False
    if self.minThreshold != other.minThreshold: return False
    if self.maxNewSynapseCount != other.maxNewSynapseCount: return False
    if abs(self.permanenceIncrement - other.permanenceIncrement) > EPSILON:
      return False
    if abs(self.permanenceDecrement - other.permanenceDecrement) > EPSILON:
      return False
    if abs(self.predictedSegmentDecrement - other.predictedSegmentDecrement) > EPSILON:
      return False

    if self.connections != other.connections: return False

    if self.activeCells != other.activeCells: return False
    if self.predictiveCells != other.predictiveCells: return False
    if self.winnerCells != other.winnerCells: return False
    if self.matchingSegments != other.matchingSegments: return False
    if self.matchingCells != other.matchingCells: return False

    return True


  def __ne__(self, other):
    """
    Non-equality operator for TemporalMemory instances.
    Checks if two instances are not functionally identical
    (might have different internal state).
    @param other (TemporalMemory) TemporalMemory instance to compare to
    """
    return not self.__eq__(other)


  def _validateColumn(self, column):
    """
    Raises an error if column index is invalid.
    @param column (int) Column index
    """
    if column >= self.numberOfColumns() or column < 0:
      raise IndexError("Invalid column")


  def _validateCell(self, cell):
    """
    Raises an error if cell index is invalid.
    @param cell (int) Cell index
    """
    if cell >= self.numberOfCells() or cell < 0:
      raise IndexError("Invalid cell")


  @classmethod
  def getCellIndices(cls, cells):
    return [cls.getCellIndex(c) for c in cells]


  @staticmethod
  def getCellIndex(cell):
    return cell
Beispiel #56
0
class SequenceMachine(object):
  """
  Base sequence machine class.
  """

  def __init__(self,
               patternMachine,
               seed=42):
    """
    @param patternMachine (PatternMachine) Pattern machine instance
    """
    # Save member variables
    self.patternMachine = patternMachine

    # Initialize member variables
    self._random = Random(seed)


  def generateFromNumbers(self, numbers):
    """
    Generate a sequence from a list of numbers.

    Note: Any `None` in the list of numbers is considered a reset.

    @param numbers (list) List of numbers

    @return (list) Generated sequence
    """
    sequence = []

    for number in numbers:
      if number is None:
        sequence.append(number)
      else:
        pattern = self.patternMachine.get(number)
        sequence.append(pattern)

    return sequence


  def addSpatialNoise(self, sequence, amount):
    """
    Add spatial noise to each pattern in the sequence.

    @param sequence (list)  Sequence
    @param amount   (float) Amount of spatial noise

    @return (list) Sequence with spatial noise
    """
    newSequence = []

    for pattern in sequence:
      if pattern is not None:
        pattern = self.patternMachine.addNoise(pattern, amount)
      newSequence.append(pattern)

    return newSequence


  def prettyPrintSequence(self, sequence, verbosity=1):
    """
    Pretty print a sequence.

    @param sequence  (list) Sequence
    @param verbosity (int)  Verbosity level

    @return (string) Pretty-printed text
    """
    text = ""

    for i in xrange(len(sequence)):
      pattern = sequence[i]

      if pattern is None:
        text += "<reset>"
        if i < len(sequence) - 1:
          text += "\n"
      else:
        text += self.patternMachine.prettyPrintPattern(pattern,
                                                       verbosity=verbosity)

    return text


  def generateNumbers(self, numSequences, sequenceLength, sharedRange=None):
    """
    @param numSequences   (int)   Number of sequences to return,
                                  separated by None
    @param sequenceLength (int)   Length of each sequence
    @param sharedRange    (tuple) (start index, end index) indicating range of
                                  shared subsequence in each sequence
                                  (None if no shared subsequences)
    @return (list) Numbers representing sequences
    """
    numbers = []

    if sharedRange:
      sharedStart, sharedEnd = sharedRange
      sharedLength = sharedEnd - sharedStart
      sharedNumbers = range(numSequences * sequenceLength,
                            numSequences * sequenceLength + sharedLength)

    for i in xrange(numSequences):
      start = i * sequenceLength
      newNumbers = np.array(range(start, start + sequenceLength), np.uint32)
      self._random.shuffle(newNumbers)
      newNumbers = list(newNumbers)

      if sharedRange is not None:
        newNumbers[sharedStart:sharedEnd] = sharedNumbers

      numbers += newNumbers
      numbers.append(None)

    return numbers
Beispiel #57
0
class PatternMachine(object):
  """
  Base pattern machine class.
  """

  def __init__(self,
               n,
               w,
               num=100,
               seed=42):
    """
    @param n   (int)      Number of available bits in pattern
    @param w   (int/list) Number of on bits in pattern
                          If list, each pattern will have a `w` randomly
                          selected from the list.
    @param num (int)      Number of available patterns
    """
    # Save member variables
    self._n = n
    self._w = w
    self._num = num

    # Initialize member variables
    self._random = Random(seed)
    self._patterns = dict()

    self._generate()


  def get(self, number):
    """
    Return a pattern for a number.

    @param number (int) Number of pattern

    @return (set) Indices of on bits
    """
    if not number in self._patterns:
      raise IndexError("Invalid number")

    return self._patterns[number]


  def addNoise(self, bits, amount):
    """
    Add noise to pattern.

    @param bits   (set)   Indices of on bits
    @param amount (float) Probability of switching an on bit with a random bit

    @return (set) Indices of on bits in noisy pattern
    """
    newBits = set()

    for bit in bits:
      if self._random.getReal64() < amount:
        newBits.add(self._random.getUInt32(self._n))
      else:
        newBits.add(bit)

    return newBits


  def numbersForBit(self, bit):
    """
    Return the set of pattern numbers that match a bit.

    @param bit (int) Index of bit

    @return (set) Indices of numbers
    """
    if bit >= self._n:
      raise IndexError("Invalid bit")

    numbers = set()

    for index, pattern in self._patterns.iteritems():
      if bit in pattern:
        numbers.add(index)

    return numbers


  def numberMapForBits(self, bits):
    """
    Return a map from number to matching on bits,
    for all numbers that match a set of bits.

    @param bits (set) Indices of bits

    @return (dict) Mapping from number => on bits.
    """
    numberMap = dict()

    for bit in bits:
      numbers = self.numbersForBit(bit)

      for number in numbers:
        if not number in numberMap:
          numberMap[number] = set()

        numberMap[number].add(bit)

    return numberMap


  def prettyPrintPattern(self, bits, verbosity=1):
    """
    Pretty print a pattern.

    @param bits      (set) Indices of on bits
    @param verbosity (int) Verbosity level

    @return (string) Pretty-printed text
    """
    numberMap = self.numberMapForBits(bits)
    text = ""

    numberList = []
    numberItems = sorted(numberMap.iteritems(),
                         key=lambda (number, bits): len(bits),
                         reverse=True)

    for number, bits in numberItems:

      if verbosity > 2:
        strBits = [str(n) for n in bits]
        numberText = "{0} (bits: {1})".format(number, ",".join(strBits))
      elif verbosity > 1:
        numberText = "{0} ({1} bits)".format(number, len(bits))
      else:
        numberText = str(number)

      numberList.append(numberText)

    text += "[{0}]".format(", ".join(numberList))

    return text


  def _generate(self):
    """
    Generates set of random patterns.
    """
    candidates = np.array(range(self._n), np.uint32)
    for i in xrange(self._num):
      self._random.shuffle(candidates)
      pattern = candidates[0:self._getW()]
      self._patterns[i] = set(pattern)


  def _getW(self):
    """
    Gets a value of `w` for use in generating a pattern.
    """
    w = self._w

    if type(w) is list:
      return w[self._random.getUInt32(len(w))]
    else:
      return w
Beispiel #58
0
class SDRCategoryEncoder(Encoder):
  """
  Encodes a list of discrete categories (described by strings), that aren't
  related to each other.

  Each  encoding is an SDR in which w out of n bits are turned on.

  Unknown categories are encoded as a single value.

  Internally we use a :class:`.ScalarEncoder` with a radius of 1, but since we
  only encode integers, we never get mixture outputs.

  The :class:`.CategoryEncoder` uses a different method to encode categories

  :param categoryList: list of discrete string categories, if ``None`` then
                       categories will automatically be added as they are
                       encountered
  :param forced: if True, skip checks for parameters' settings; see
                 :class:`.ScalarEncoder` for details. (default False)
  """


  def __init__(self, n, w, categoryList = None, name="category", verbosity=0,
               encoderSeed=1, forced=False):
    self.n = n
    self.w = w

    self._learningEnabled = True

    # initialize the random number generators
    self._seed(encoderSeed)

    if not forced:
      # -- this is just to catch bad parameter choices
      if (self.n/self.w) < 2: # w is 50% of total len
        raise ValueError("Number of ON bits in SDR (%d) must be much smaller than "
                           "the output width (%d)" % (self.w, self.n))

      # Another arbitrary cutoff to catch likely mistakes
      if self.w < 21:
        raise ValueError("Number of bits in the SDR (%d) must be greater than 2, and should be >= 21, pass forced=True to init() to override this check"
                           % self.w)

    self._initOverlap()

    self.verbosity = verbosity

    self.description = [(name, 0)]
    self.name = name

    self.categoryToIndex = dict()
    self.ncategories = 0
    self.categories = list()
    self.sdrs = None

    # Always include an 'unknown' category for
    # edge cases

    self._addCategory("<UNKNOWN>")
    if categoryList is None:
      self._learningEnabled = True
    else:
      self._learningEnabled = False
      for category in categoryList:
        self._addCategory(category)
      assert self.ncategories == len(categoryList) + 1

    # Not used by this class. Used for decoding (scalarsToStr())
    self.encoders = None

    # This matrix is used for the topDownCompute. We build it the first time
    #  topDownCompute is called
    self._topDownMappingM = None
    self._topDownValues = None


  def _initOverlap(self):
    # Calculate average overlap of SDRs for decoding
    # Density is fraction of bits on, and it is also the
    # probability that any individual bit is on.
    density = float(self.w) / self.n
    self.averageOverlap =  self.w * density
    # We can do a better job of calculating the threshold. For now, just
    # something quick and dirty, which is the midway point between average
    # and full overlap. averageOverlap is always < w,  so the threshold
    # is always < w.
    self.thresholdOverlap =  int((self.averageOverlap + self.w)/2)
    #  1.25 -- too sensitive for decode test, so make it less sensitive
    if self.thresholdOverlap < self.w - 3:
      self.thresholdOverlap = self.w - 3


  def __setstate__(self, state):
    self.__dict__.update(state)

    # Initialize self.random as an instance of NupicRandom derived from the
    # previous numpy random state
    randomState = state["random"]
    if isinstance(randomState, numpy.random.mtrand.RandomState):
      self.random = NupicRandom(randomState.randint(sys.maxint))


  def _seed(self, seed=-1):
    """
    Initialize the random seed
    """
    if seed != -1:
      self.random = NupicRandom(seed)
    else:
      self.random = NupicRandom()


  def getDecoderOutputFieldTypes(self):
    """ [Encoder class virtual method override]
    """
    # TODO: change back to string meta-type after the decoding logic is fixed
    #       to output strings instead of internal index values.
    return (FieldMetaType.string,)
    #return (FieldMetaType.integer,)


  def  _addCategory(self, category):
    if category in self.categories:
      raise RuntimeError("Attempt to add add encoder category '%s' "
                         "that already exists" % category)

    if self.sdrs is None:
      assert self.ncategories == 0
      assert len(self.categoryToIndex) == 0
      # Initial allocation -- 16 rows
      self.sdrs = numpy.zeros((16, self.n), dtype='uint8')
    elif self.ncategories > self.sdrs.shape[0] - 2:
      # Preallocated sdrs are used up. Double our size
      currentMax = self.sdrs.shape[0]
      newsdrs = numpy.zeros((currentMax * 2, self.n), dtype='uint8')
      newsdrs[0:currentMax] = self.sdrs[0:currentMax]
      self.sdrs = newsdrs

    newrep = self._newRep()
    self.sdrs[self.ncategories] = newrep
    self.categories.append(category)
    self.categoryToIndex[category] = self.ncategories
    self.ncategories += 1
    self._topDownMappingM = None


  def _newRep(self):
    """Generate a new and unique representation. Returns a numpy array
    of shape (n,). """
    maxAttempts = 1000

    for _ in xrange(maxAttempts):
      foundUnique = True
      population = numpy.arange(self.n, dtype=numpy.uint32)
      choices = numpy.arange(self.w, dtype=numpy.uint32)
      oneBits = sorted(self.random.sample(population, choices))
      sdr =  numpy.zeros(self.n, dtype='uint8')
      sdr[oneBits] = 1
      for i in xrange(self.ncategories):
        if (sdr == self.sdrs[i]).all():
          foundUnique = False
          break
      if foundUnique:
        break;
    if not foundUnique:
      raise RuntimeError("Error, could not find unique pattern %d after "
                         "%d attempts" % (self.ncategories, maxAttempts))
    return sdr


  def getWidth(self):
    return self.n


  def getDescription(self):
    return self.description


  def getScalars(self, input):
    """ See method description in base.py """
    if input == SENTINEL_VALUE_FOR_MISSING_DATA:
        return numpy.array([0])

    index = self.categoryToIndex.get(input, None)
    if index is None:
      if self._learningEnabled:
        self._addCategory(input)
        index = self.ncategories - 1
      else:
        # if not found, we encode category 0
        index = 0

    return numpy.array([index])


  def getBucketIndices(self, input):
    """ See method description in base.py """

    # For category encoder, the "scalar" we map to each category is the
    #  bucket index
    return self.getScalars(input)


  def encodeIntoArray(self, input, output):
    if input == SENTINEL_VALUE_FOR_MISSING_DATA:
      output[0:self.n] = 0
      index = 0
    else:
      index = self.getBucketIndices(input)[0]
      output[0:self.n] = self.sdrs[index,:]

    if self.verbosity >= 2:
      print "input:", input, "index:", index, "output:", output
      print "decoded:", self.decodedToStr(self.decode(output))


  def decode(self, encoded, parentFieldName=''):
    """ See the function description in base.py
    """

    assert (encoded[0:self.n] <= 1.0).all()

    resultString =  ""
    resultRanges = []

    overlaps =  (self.sdrs * encoded[0:self.n]).sum(axis=1)

    if self.verbosity >= 2:
      print "Overlaps for decoding:"
      for i in xrange(0, self.ncategories):
        print "%d %s" % (overlaps[i], self.categories[i])

    matchingCategories =  (overlaps > self.thresholdOverlap).nonzero()[0]

    for index in matchingCategories:
      if resultString != "":
        resultString += " "
      resultString +=  str(self.categories[index])
      resultRanges.append([int(index),int(index)])

    if parentFieldName != '':
      fieldName = "%s.%s" % (parentFieldName, self.name)
    else:
      fieldName = self.name
    return ({fieldName: (resultRanges, resultString)}, [fieldName])


  def _getTopDownMapping(self):
    """ Return the interal _topDownMappingM matrix used for handling the
    bucketInfo() and topDownCompute() methods. This is a matrix, one row per
    category (bucket) where each row contains the encoded output for that
    category.
    """

    # -------------------------------------------------------------------------
    # Do we need to build up our reverse mapping table?
    if self._topDownMappingM is None:

      # Each row represents an encoded output pattern
      self._topDownMappingM = SM32(self.ncategories, self.n)

      outputSpace = numpy.zeros(self.n, dtype=GetNTAReal())
      for i in xrange(self.ncategories):
        self.encodeIntoArray(self.categories[i], outputSpace)
        self._topDownMappingM.setRowFromDense(i, outputSpace)

    return self._topDownMappingM


  def getBucketValues(self):
    """ See the function description in base.py """

    return self.categories


  def getBucketInfo(self, buckets):
    """ See the function description in base.py
    """

    if self.ncategories==0:
      return 0

    topDownMappingM = self._getTopDownMapping()

    categoryIndex = buckets[0]
    category = self.categories[categoryIndex]
    encoding = topDownMappingM.getRow(categoryIndex)

    return [EncoderResult(value=category, scalar=categoryIndex,
                          encoding=encoding)]


  def topDownCompute(self, encoded):
    """ See the function description in base.py
    """

    if self.ncategories==0:
      return 0

    topDownMappingM = self._getTopDownMapping()

    categoryIndex = topDownMappingM.rightVecProd(encoded).argmax()
    category = self.categories[categoryIndex]
    encoding = topDownMappingM.getRow(categoryIndex)

    return EncoderResult(value=category, scalar=categoryIndex, encoding=encoding)


  def closenessScores(self, expValues, actValues, fractional=True):
    """ See the function description in base.py

    kwargs will have the keyword "fractional", which is ignored by this encoder
    """

    expValue = expValues[0]
    actValue = actValues[0]

    if expValue == actValue:
      closeness = 1.0
    else:
      closeness = 0.0

    if not fractional:
      closeness = 1.0 - closeness

    return numpy.array([closeness])


  @classmethod
  def getSchema(cls):
    return SDRCategoryEncoderProto

  @classmethod
  def read(cls, proto):
    encoder = object.__new__(cls)

    encoder.n = proto.n
    encoder.w = proto.w
    encoder.random = NupicRandom()
    encoder.random.read(proto.random)
    encoder.verbosity = proto.verbosity
    encoder.name = proto.name
    encoder.description = [(proto.name, 0)]
    encoder.categories = list(proto.categories)
    encoder.sdrs = numpy.array(proto.sdrs, dtype=numpy.uint8)

    encoder.categoryToIndex = {category:index
                               for index, category
                               in enumerate(encoder.categories)}
    encoder.ncategories = len(encoder.categories)
    encoder._learningEnabled = proto.learningEnabled
    encoder._initOverlap()
    encoder._topDownMappingM = None
    encoder._topDownValues = None
    encoder.encoders = None

    return encoder


  def write(self, proto):
    proto.n = self.n
    proto.w = self.w
    self.random.write(proto.random)
    proto.verbosity = self.verbosity
    proto.name = self.name
    proto.categories = self.categories
    proto.sdrs = self.sdrs.tolist()
    proto.learningEnabled = self._learningEnabled
class RandomDistributedScalarEncoder(Encoder):
  """
  A scalar encoder encodes a numeric (floating point) value into an array
  of bits.

  This class maps a scalar value into a random distributed representation that
  is suitable as scalar input into the spatial pooler. The encoding scheme is
  designed to replace a simple ScalarEncoder. It preserves the important
  properties around overlapping representations. Unlike ScalarEncoder the min
  and max range can be dynamically increased without any negative effects. The
  only required parameter is resolution, which determines the resolution of
  input values.

  Scalar values are mapped to a bucket. The class maintains a random distributed
  encoding for each bucket. The following properties are maintained by
  RandomDistributedEncoder:

  1) Similar scalars should have high overlap. Overlap should decrease smoothly
  as scalars become less similar. Specifically, neighboring bucket indices must
  overlap by a linearly decreasing number of bits.

  2) Dissimilar scalars should have very low overlap so that the SP does not
  confuse representations. Specifically, buckets that are more than w indices
  apart should have at most maxOverlap bits of overlap. We arbitrarily (and
  safely) define "very low" to be 2 bits of overlap or lower.

  Properties 1 and 2 lead to the following overlap rules for buckets i and j:

      If abs(i-j) < w then:
        overlap(i,j) = w - abs(i-j)
      else:
        overlap(i,j) <= maxOverlap

  3) The representation for a scalar must not change during the lifetime of
  the object. Specifically, as new buckets are created and the min/max range
  is extended, the representation for previously in-range sscalars and
  previously created buckets must not change.
  """


  def __init__(self, resolution, w=21, n=400, name=None, offset=None,
               seed=42, verbosity=0):
    """Constructor

    @param resolution A floating point positive number denoting the resolution
                    of the output representation. Numbers within
                    [offset-resolution/2, offset+resolution/2] will fall into
                    the same bucket and thus have an identical representation.
                    Adjacent buckets will differ in one bit. resolution is a
                    required parameter.

    @param w Number of bits to set in output. w must be odd to avoid centering
                    problems.  w must be large enough that spatial pooler
                    columns will have a sufficiently large overlap to avoid
                    false matches. A value of w=21 is typical.

    @param n Number of bits in the representation (must be > w). n must be
                    large enough such that there is enough room to select
                    new representations as the range grows. With w=21 a value
                    of n=400 is typical. The class enforces n > 6*w.

    @param name An optional string which will become part of the description.

    @param offset A floating point offset used to map scalar inputs to bucket
                    indices. The middle bucket will correspond to numbers in the
                    range [offset - resolution/2, offset + resolution/2). If set
                    to None, the very first input that is encoded will be used
                    to determine the offset.

    @param seed The seed used for numpy's random number generator. If set to -1
                    the generator will be initialized without a fixed seed.

    @param verbosity An integer controlling the level of debugging output. A
                    value of 0 implies no output. verbosity=1 may lead to
                    one-time printouts during construction, serialization or
                    deserialization. verbosity=2 may lead to some output per
                    encode operation. verbosity>2 may lead to significantly
                    more output.
    """
    # Validate inputs
    if (w <= 0) or (w%2 == 0):
      raise ValueError("w must be an odd positive integer")

    if resolution <= 0:
      raise ValueError("resolution must be a positive number")

    if (n <= 6*w) or (not isinstance(n, int)):
      raise ValueError("n must be an int strictly greater than 6*w. For "
                       "good results we recommend n be strictly greater "
                       "than 11*w")

    self.encoders = None
    self.verbosity = verbosity
    self.w = w
    self.n = n
    self.resolution = float(resolution)

    # The largest overlap we allow for non-adjacent encodings
    self._maxOverlap = 2

    # initialize the random number generators
    self._seed(seed)

    # Internal parameters for bucket mapping
    self.minIndex = None
    self.maxIndex = None
    self._offset = None
    self._initializeBucketMap(INITIAL_BUCKETS, offset)

    # A name used for debug printouts
    if name is not None:
      self.name = name
    else:
      self.name = "[%s]" % (self.resolution)

    if self.verbosity > 0:
      self.dump()


  def __setstate__(self, state):
    self.__dict__.update(state)

    # Initialize self.random as an instance of NupicRandom derived from the
    # previous numpy random state
    randomState = state["random"]
    if isinstance(randomState, numpy.random.mtrand.RandomState):
      self.random = NupicRandom(randomState.randint(sys.maxint))


  def _seed(self, seed=-1):
    """
    Initialize the random seed
    """
    if seed != -1:
      self.random = NupicRandom(seed)
    else:
      self.random = NupicRandom()


  def getDecoderOutputFieldTypes(self):
    """ See method description in base.py """
    return (FieldMetaType.float, )


  def getWidth(self):
    """ See method description in base.py """
    return self.n


  def getDescription(self):
    return [(self.name, 0)]


  def getBucketIndices(self, x):
    """ See method description in base.py """

    if ((isinstance(x, float) and math.isnan(x)) or
        x == SENTINEL_VALUE_FOR_MISSING_DATA):
      return [None]

    if self._offset is None:
      self._offset = x

    bucketIdx = (
        (self._maxBuckets/2) + int(round((x - self._offset) / self.resolution))
    )

    if bucketIdx < 0:
      bucketIdx = 0
    elif bucketIdx >= self._maxBuckets:
      bucketIdx = self._maxBuckets-1

    return [bucketIdx]


  def mapBucketIndexToNonZeroBits(self, index):
    """
    Given a bucket index, return the list of non-zero bits. If the bucket
    index does not exist, it is created. If the index falls outside our range
    we clip it.

    @param index The bucket index to get non-zero bits for.
    @returns numpy array of indices of non-zero bits for specified index.
    """
    if index < 0:
      index = 0

    if index >= self._maxBuckets:
      index = self._maxBuckets-1

    if not self.bucketMap.has_key(index):
      if self.verbosity >= 2:
        print "Adding additional buckets to handle index=", index
      self._createBucket(index)
    return self.bucketMap[index]


  def encodeIntoArray(self, x, output):
    """ See method description in base.py """

    if x is not None and not isinstance(x, numbers.Number):
      raise TypeError(
          "Expected a scalar input but got input of type %s" % type(x))

    # Get the bucket index to use
    bucketIdx = self.getBucketIndices(x)[0]

    # None is returned for missing value in which case we return all 0's.
    output[0:self.n] = 0
    if bucketIdx is not None:
      output[self.mapBucketIndexToNonZeroBits(bucketIdx)] = 1


  def _createBucket(self, index):
    """
    Create the given bucket index. Recursively create as many in-between
    bucket indices as necessary.
    """
    if index < self.minIndex:
      if index == self.minIndex - 1:
        # Create a new representation that has exactly w-1 overlapping bits
        # as the min representation
        self.bucketMap[index] = self._newRepresentation(self.minIndex,
                                                        index)
        self.minIndex = index
      else:
        # Recursively create all the indices above and then this index
        self._createBucket(index+1)
        self._createBucket(index)
    else:
      if index == self.maxIndex + 1:
        # Create a new representation that has exactly w-1 overlapping bits
        # as the max representation
        self.bucketMap[index] = self._newRepresentation(self.maxIndex,
                                                        index)
        self.maxIndex = index
      else:
        # Recursively create all the indices below and then this index
        self._createBucket(index-1)
        self._createBucket(index)


  def _newRepresentation(self, index, newIndex):
    """
    Return a new representation for newIndex that overlaps with the
    representation at index by exactly w-1 bits
    """
    newRepresentation = self.bucketMap[index].copy()

    # Choose the bit we will replace in this representation. We need to shift
    # this bit deterministically. If this is always chosen randomly then there
    # is a 1 in w chance of the same bit being replaced in neighboring
    # representations, which is fairly high
    ri = newIndex % self.w

    # Now we choose a bit such that the overlap rules are satisfied.
    newBit = self.random.getUInt32(self.n)
    newRepresentation[ri] = newBit
    while newBit in self.bucketMap[index] or \
          not self._newRepresentationOK(newRepresentation, newIndex):
      self.numTries += 1
      newBit = self.random.getUInt32(self.n)
      newRepresentation[ri] = newBit

    return newRepresentation


  def _newRepresentationOK(self, newRep, newIndex):
    """
    Return True if this new candidate representation satisfies all our overlap
    rules. Since we know that neighboring representations differ by at most
    one bit, we compute running overlaps.
    """
    if newRep.size != self.w:
      return False
    if (newIndex < self.minIndex-1) or (newIndex > self.maxIndex+1):
      raise ValueError("newIndex must be within one of existing indices")

    # A binary representation of newRep. We will use this to test containment
    newRepBinary = numpy.array([False]*self.n)
    newRepBinary[newRep] = True

    # Midpoint
    midIdx = self._maxBuckets/2

    # Start by checking the overlap at minIndex
    runningOverlap = self._countOverlap(self.bucketMap[self.minIndex], newRep)
    if not self._overlapOK(self.minIndex, newIndex, overlap=runningOverlap):
      return False

    # Compute running overlaps all the way to the midpoint
    for i in range(self.minIndex+1, midIdx+1):
      # This is the bit that is going to change
      newBit = (i-1)%self.w

      # Update our running overlap
      if newRepBinary[self.bucketMap[i-1][newBit]]:
        runningOverlap -= 1
      if newRepBinary[self.bucketMap[i][newBit]]:
        runningOverlap += 1

      # Verify our rules
      if not self._overlapOK(i, newIndex, overlap=runningOverlap):
        return False

    # At this point, runningOverlap contains the overlap for midIdx
    # Compute running overlaps all the way to maxIndex
    for i in range(midIdx+1, self.maxIndex+1):
      # This is the bit that is going to change
      newBit = i%self.w

      # Update our running overlap
      if newRepBinary[self.bucketMap[i-1][newBit]]:
        runningOverlap -= 1
      if newRepBinary[self.bucketMap[i][newBit]]:
        runningOverlap += 1

      # Verify our rules
      if not self._overlapOK(i, newIndex, overlap=runningOverlap):
        return False

    return True


  def _countOverlapIndices(self, i, j):
    """
    Return the overlap between bucket indices i and j
    """
    if self.bucketMap.has_key(i) and self.bucketMap.has_key(j):
      iRep = self.bucketMap[i]
      jRep = self.bucketMap[j]
      return self._countOverlap(iRep, jRep)
    else:
      raise ValueError("Either i or j don't exist")


  @staticmethod
  def _countOverlap(rep1, rep2):
    """
    Return the overlap between two representations. rep1 and rep2 are lists of
    non-zero indices.
    """
    overlap = 0
    for e in rep1:
      if e in rep2:
        overlap += 1
    return overlap


  def _overlapOK(self, i, j, overlap=None):
    """
    Return True if the given overlap between bucket indices i and j are
    acceptable. If overlap is not specified, calculate it from the bucketMap
    """
    if overlap is None:
      overlap = self._countOverlapIndices(i, j)
    if abs(i-j) < self.w:
      if overlap == (self.w - abs(i-j)):
        return True
      else:
        return False
    else:
      if overlap <= self._maxOverlap:
        return True
      else:
        return False


  def _initializeBucketMap(self, maxBuckets, offset):
    """
    Initialize the bucket map assuming the given number of maxBuckets.
    """
    # The first bucket index will be _maxBuckets / 2 and bucket indices will be
    # allowed to grow lower or higher as long as they don't become negative.
    # _maxBuckets is required because the current CLA Classifier assumes bucket
    # indices must be non-negative. This normally does not need to be changed
    # but if altered, should be set to an even number.
    self._maxBuckets = maxBuckets
    self.minIndex = self._maxBuckets / 2
    self.maxIndex = self._maxBuckets / 2

    # The scalar offset used to map scalar values to bucket indices. The middle
    # bucket will correspond to numbers in the range
    # [offset-resolution/2, offset+resolution/2).
    # The bucket index for a number x will be:
    #     maxBuckets/2 + int( round( (x-offset)/resolution ) )
    self._offset = offset

    # This dictionary maps a bucket index into its bit representation
    # We initialize the class with a single bucket with index 0
    self.bucketMap = {}

    def _permutation(n):
      r = numpy.arange(n, dtype=numpy.uint32)
      self.random.shuffle(r)
      return r

    self.bucketMap[self.minIndex] = _permutation(self.n)[0:self.w]

    # How often we need to retry when generating valid encodings
    self.numTries = 0


  def dump(self):
    print "RandomDistributedScalarEncoder:"
    print "  minIndex:   %d" % self.minIndex
    print "  maxIndex:   %d" % self.maxIndex
    print "  w:          %d" % self.w
    print "  n:          %d" % self.getWidth()
    print "  resolution: %g" % self.resolution
    print "  offset:     %s" % str(self._offset)
    print "  numTries:   %d" % self.numTries
    print "  name:       %s" % self.name
    if self.verbosity > 2:
      print "  All buckets:     "
      pprint.pprint(self.bucketMap)


  @classmethod
  def read(cls, proto):
    encoder = object.__new__(cls)
    encoder.resolution = proto.resolution
    encoder.w = proto.w
    encoder.n = proto.n
    encoder.name = proto.name
    encoder._offset = proto.offset
    encoder.random = NupicRandom()
    encoder.random.read(proto.random)
    encoder.resolution = proto.resolution
    encoder.verbosity = proto.verbosity
    encoder.minIndex = proto.minIndex
    encoder.maxIndex = proto.maxIndex
    encoder.encoders = None
    encoder._maxBuckets = INITIAL_BUCKETS
    encoder.bucketMap = {x.key: numpy.array(x.value, dtype=numpy.uint32)
                         for x in proto.bucketMap}

    return encoder


  def write(self, proto):
    proto.resolution = self.resolution
    proto.w = self.w
    proto.n = self.n
    proto.name = self.name
    proto.offset = self._offset
    self.random.write(proto.random)
    proto.verbosity = self.verbosity
    proto.minIndex = self.minIndex
    proto.maxIndex = self.maxIndex
    proto.bucketMap = [{"key": key, "value": value.tolist()}
                       for key, value in self.bucketMap.items()]