Example #1
0
  def testShuffleEmpty(self):
    r = Random(42)
    arr = numpy.zeros([0], dtype="uint32")

    r.shuffle(arr)

    self.assertEqual(arr.size, 0)
Example #2
0
  def testShuffleEmpty(self):
    r = Random(42)
    arr = numpy.zeros([0], dtype="uint32")

    r.shuffle(arr)

    self.assertEqual(arr.size, 0)
Example #3
0
  def testShuffle(self):
    r = Random(42)
    arr = numpy.array([1, 2, 3, 4], dtype="uint32")

    r.shuffle(arr)

    self.assertEqual(arr[0], 3)
    self.assertEqual(arr[1], 4)
    self.assertEqual(arr[2], 2)
    self.assertEqual(arr[3], 1)
Example #4
0
  def testShuffle(self):
    r = Random(42)
    arr = numpy.array([1, 2, 3, 4], dtype="uint32")

    r.shuffle(arr)

    self.assertEqual(arr[0], 2)
    self.assertEqual(arr[1], 1)
    self.assertEqual(arr[2], 4)
    self.assertEqual(arr[3], 3)
Example #5
0
class PatternMachine(object):
    """
  Base pattern machine class.
  """
    def __init__(self, n, w, num=100, seed=42):
        """
    @param n   (int)      Number of available bits in pattern
    @param w   (int/list) Number of on bits in pattern
                          If list, each pattern will have a `w` randomly
                          selected from the list.
    @param num (int)      Number of available patterns
    """
        # Save member variables
        self._n = n
        self._w = w
        self._num = num

        # Initialize member variables
        self._random = Random(seed)
        self._patterns = dict()

        self._generate()

    def get(self, number):
        """
    Return a pattern for a number.

    @param number (int) Number of pattern

    @return (set) Indices of on bits
    """
        if not number in self._patterns:
            raise IndexError("Invalid number")

        return self._patterns[number]

    def addNoise(self, bits, amount):
        """
    Add noise to pattern.

    @param bits   (set)   Indices of on bits
    @param amount (float) Probability of switching an on bit with a random bit

    @return (set) Indices of on bits in noisy pattern
    """
        newBits = set()

        for bit in bits:
            if self._random.getReal64() < amount:
                newBits.add(self._random.getUInt32(self._n))
            else:
                newBits.add(bit)

        return newBits

    def numbersForBit(self, bit):
        """
    Return the set of pattern numbers that match a bit.

    @param bit (int) Index of bit

    @return (set) Indices of numbers
    """
        if bit >= self._n:
            raise IndexError("Invalid bit")

        numbers = set()

        for index, pattern in self._patterns.items():
            if bit in pattern:
                numbers.add(index)

        return numbers

    def numberMapForBits(self, bits):
        """
    Return a map from number to matching on bits,
    for all numbers that match a set of bits.

    @param bits (set) Indices of bits

    @return (dict) Mapping from number => on bits.
    """
        numberMap = dict()

        for bit in bits:
            numbers = self.numbersForBit(bit)

            for number in numbers:
                if not number in numberMap:
                    numberMap[number] = set()

                numberMap[number].add(bit)

        return numberMap

    def prettyPrintPattern(self, bits, verbosity=1):
        """
    Pretty print a pattern.

    @param bits      (set) Indices of on bits
    @param verbosity (int) Verbosity level

    @return (string) Pretty-printed text
    """
        numberMap = self.numberMapForBits(bits)
        text = ""

        numberList = []
        numberItems = sorted(iter(numberMap.items()),
                             key=lambda number_bits: len(number_bits[1]),
                             reverse=True)

        for number, bits in numberItems:

            if verbosity > 2:
                strBits = [str(n) for n in bits]
                numberText = "{0} (bits: {1})".format(number,
                                                      ",".join(strBits))
            elif verbosity > 1:
                numberText = "{0} ({1} bits)".format(number, len(bits))
            else:
                numberText = str(number)

            numberList.append(numberText)

        text += "[{0}]".format(", ".join(numberList))

        return text

    def _generate(self):
        """
    Generates set of random patterns.
    """
        candidates = np.array(list(range(self._n)), np.uint32)
        for i in range(self._num):
            self._random.shuffle(candidates)
            pattern = candidates[0:self._getW()]
            self._patterns[i] = set(pattern)

    def _getW(self):
        """
    Gets a value of `w` for use in generating a pattern.
    """
        w = self._w

        if type(w) is list:
            return w[self._random.getUInt32(len(w))]
        else:
            return w
class RandomDistributedScalarEncoder(Encoder):
  """
  A scalar encoder encodes a numeric (floating point) value into an array
  of bits.

  This class maps a scalar value into a random distributed representation that
  is suitable as scalar input into the spatial pooler. The encoding scheme is
  designed to replace a simple ScalarEncoder. It preserves the important
  properties around overlapping representations. Unlike ScalarEncoder the min
  and max range can be dynamically increased without any negative effects. The
  only required parameter is resolution, which determines the resolution of
  input values.

  Scalar values are mapped to a bucket. The class maintains a random distributed
  encoding for each bucket. The following properties are maintained by
  RandomDistributedEncoder:

  1) Similar scalars should have high overlap. Overlap should decrease smoothly
  as scalars become less similar. Specifically, neighboring bucket indices must
  overlap by a linearly decreasing number of bits.

  2) Dissimilar scalars should have very low overlap so that the SP does not
  confuse representations. Specifically, buckets that are more than w indices
  apart should have at most maxOverlap bits of overlap. We arbitrarily (and
  safely) define "very low" to be 2 bits of overlap or lower.

  Properties 1 and 2 lead to the following overlap rules for buckets i and j:

      If abs(i-j) < w then:
        overlap(i,j) = w - abs(i-j)
      else:
        overlap(i,j) <= maxOverlap

  3) The representation for a scalar must not change during the lifetime of
  the object. Specifically, as new buckets are created and the min/max range
  is extended, the representation for previously in-range sscalars and
  previously created buckets must not change.
  """


  def __init__(self, resolution, w=21, n=400, name=None, offset=None,
               seed=42, verbosity=0):
    """Constructor

    @param resolution A floating point positive number denoting the resolution
                    of the output representation. Numbers within
                    [offset-resolution/2, offset+resolution/2] will fall into
                    the same bucket and thus have an identical representation.
                    Adjacent buckets will differ in one bit. resolution is a
                    required parameter.

    @param w Number of bits to set in output. w must be odd to avoid centering
                    problems.  w must be large enough that spatial pooler
                    columns will have a sufficiently large overlap to avoid
                    false matches. A value of w=21 is typical.

    @param n Number of bits in the representation (must be > w). n must be
                    large enough such that there is enough room to select
                    new representations as the range grows. With w=21 a value
                    of n=400 is typical. The class enforces n > 6*w.

    @param name An optional string which will become part of the description.

    @param offset A floating point offset used to map scalar inputs to bucket
                    indices. The middle bucket will correspond to numbers in the
                    range [offset - resolution/2, offset + resolution/2). If set
                    to None, the very first input that is encoded will be used
                    to determine the offset.

    @param seed The seed used for numpy's random number generator. If set to -1
                    the generator will be initialized without a fixed seed.

    @param verbosity An integer controlling the level of debugging output. A
                    value of 0 implies no output. verbosity=1 may lead to
                    one-time printouts during construction, serialization or
                    deserialization. verbosity=2 may lead to some output per
                    encode operation. verbosity>2 may lead to significantly
                    more output.
    """
    # Validate inputs
    if (w <= 0) or (w%2 == 0):
      raise ValueError("w must be an odd positive integer")

    if resolution <= 0:
      raise ValueError("resolution must be a positive number")

    if (n <= 6*w) or (not isinstance(n, int)):
      raise ValueError("n must be an int strictly greater than 6*w. For "
                       "good results we recommend n be strictly greater "
                       "than 11*w")

    self.encoders = None
    self.verbosity = verbosity
    self.w = w
    self.n = n
    self.resolution = float(resolution)

    # The largest overlap we allow for non-adjacent encodings
    self._maxOverlap = 2

    # initialize the random number generators
    self._seed(seed)

    # Internal parameters for bucket mapping
    self.minIndex = None
    self.maxIndex = None
    self._offset = None
    self._initializeBucketMap(INITIAL_BUCKETS, offset)

    # A name used for debug printouts
    if name is not None:
      self.name = name
    else:
      self.name = "[%s]" % (self.resolution)

    if self.verbosity > 0:
      self.dump()


  def __setstate__(self, state):
    self.__dict__.update(state)

    # Initialize self.random as an instance of NupicRandom derived from the
    # previous numpy random state
    randomState = state["random"]
    if isinstance(randomState, numpy.random.mtrand.RandomState):
      self.random = NupicRandom(randomState.randint(sys.maxint))


  def _seed(self, seed=-1):
    """
    Initialize the random seed
    """
    if seed != -1:
      self.random = NupicRandom(seed)
    else:
      self.random = NupicRandom()


  def getDecoderOutputFieldTypes(self):
    """ See method description in base.py """
    return (FieldMetaType.float, )


  def getWidth(self):
    """ See method description in base.py """
    return self.n


  def getDescription(self):
    return [(self.name, 0)]


  def getBucketIndices(self, x):
    """ See method description in base.py """

    if ((isinstance(x, float) and math.isnan(x)) or
        x == SENTINEL_VALUE_FOR_MISSING_DATA):
      return [None]

    if self._offset is None:
      self._offset = x

    bucketIdx = (
        (self._maxBuckets/2) + int(round((x - self._offset) / self.resolution))
    )

    if bucketIdx < 0:
      bucketIdx = 0
    elif bucketIdx >= self._maxBuckets:
      bucketIdx = self._maxBuckets-1

    return [bucketIdx]


  def mapBucketIndexToNonZeroBits(self, index):
    """
    Given a bucket index, return the list of non-zero bits. If the bucket
    index does not exist, it is created. If the index falls outside our range
    we clip it.

    @param index The bucket index to get non-zero bits for.
    @returns numpy array of indices of non-zero bits for specified index.
    """
    if index < 0:
      index = 0

    if index >= self._maxBuckets:
      index = self._maxBuckets-1

    if not self.bucketMap.has_key(index):
      if self.verbosity >= 2:
        print "Adding additional buckets to handle index=", index
      self._createBucket(index)
    return self.bucketMap[index]


  def encodeIntoArray(self, x, output):
    """ See method description in base.py """

    if x is not None and not isinstance(x, numbers.Number):
      raise TypeError(
          "Expected a scalar input but got input of type %s" % type(x))

    # Get the bucket index to use
    bucketIdx = self.getBucketIndices(x)[0]

    # None is returned for missing value in which case we return all 0's.
    output[0:self.n] = 0
    if bucketIdx is not None:
      output[self.mapBucketIndexToNonZeroBits(bucketIdx)] = 1


  def _createBucket(self, index):
    """
    Create the given bucket index. Recursively create as many in-between
    bucket indices as necessary.
    """
    if index < self.minIndex:
      if index == self.minIndex - 1:
        # Create a new representation that has exactly w-1 overlapping bits
        # as the min representation
        self.bucketMap[index] = self._newRepresentation(self.minIndex,
                                                        index)
        self.minIndex = index
      else:
        # Recursively create all the indices above and then this index
        self._createBucket(index+1)
        self._createBucket(index)
    else:
      if index == self.maxIndex + 1:
        # Create a new representation that has exactly w-1 overlapping bits
        # as the max representation
        self.bucketMap[index] = self._newRepresentation(self.maxIndex,
                                                        index)
        self.maxIndex = index
      else:
        # Recursively create all the indices below and then this index
        self._createBucket(index-1)
        self._createBucket(index)


  def _newRepresentation(self, index, newIndex):
    """
    Return a new representation for newIndex that overlaps with the
    representation at index by exactly w-1 bits
    """
    newRepresentation = self.bucketMap[index].copy()

    # Choose the bit we will replace in this representation. We need to shift
    # this bit deterministically. If this is always chosen randomly then there
    # is a 1 in w chance of the same bit being replaced in neighboring
    # representations, which is fairly high
    ri = newIndex % self.w

    # Now we choose a bit such that the overlap rules are satisfied.
    newBit = self.random.getUInt32(self.n)
    newRepresentation[ri] = newBit
    while newBit in self.bucketMap[index] or \
          not self._newRepresentationOK(newRepresentation, newIndex):
      self.numTries += 1
      newBit = self.random.getUInt32(self.n)
      newRepresentation[ri] = newBit

    return newRepresentation


  def _newRepresentationOK(self, newRep, newIndex):
    """
    Return True if this new candidate representation satisfies all our overlap
    rules. Since we know that neighboring representations differ by at most
    one bit, we compute running overlaps.
    """
    if newRep.size != self.w:
      return False
    if (newIndex < self.minIndex-1) or (newIndex > self.maxIndex+1):
      raise ValueError("newIndex must be within one of existing indices")

    # A binary representation of newRep. We will use this to test containment
    newRepBinary = numpy.array([False]*self.n)
    newRepBinary[newRep] = True

    # Midpoint
    midIdx = self._maxBuckets/2

    # Start by checking the overlap at minIndex
    runningOverlap = self._countOverlap(self.bucketMap[self.minIndex], newRep)
    if not self._overlapOK(self.minIndex, newIndex, overlap=runningOverlap):
      return False

    # Compute running overlaps all the way to the midpoint
    for i in range(self.minIndex+1, midIdx+1):
      # This is the bit that is going to change
      newBit = (i-1)%self.w

      # Update our running overlap
      if newRepBinary[self.bucketMap[i-1][newBit]]:
        runningOverlap -= 1
      if newRepBinary[self.bucketMap[i][newBit]]:
        runningOverlap += 1

      # Verify our rules
      if not self._overlapOK(i, newIndex, overlap=runningOverlap):
        return False

    # At this point, runningOverlap contains the overlap for midIdx
    # Compute running overlaps all the way to maxIndex
    for i in range(midIdx+1, self.maxIndex+1):
      # This is the bit that is going to change
      newBit = i%self.w

      # Update our running overlap
      if newRepBinary[self.bucketMap[i-1][newBit]]:
        runningOverlap -= 1
      if newRepBinary[self.bucketMap[i][newBit]]:
        runningOverlap += 1

      # Verify our rules
      if not self._overlapOK(i, newIndex, overlap=runningOverlap):
        return False

    return True


  def _countOverlapIndices(self, i, j):
    """
    Return the overlap between bucket indices i and j
    """
    if self.bucketMap.has_key(i) and self.bucketMap.has_key(j):
      iRep = self.bucketMap[i]
      jRep = self.bucketMap[j]
      return self._countOverlap(iRep, jRep)
    else:
      raise ValueError("Either i or j don't exist")


  @staticmethod
  def _countOverlap(rep1, rep2):
    """
    Return the overlap between two representations. rep1 and rep2 are lists of
    non-zero indices.
    """
    overlap = 0
    for e in rep1:
      if e in rep2:
        overlap += 1
    return overlap


  def _overlapOK(self, i, j, overlap=None):
    """
    Return True if the given overlap between bucket indices i and j are
    acceptable. If overlap is not specified, calculate it from the bucketMap
    """
    if overlap is None:
      overlap = self._countOverlapIndices(i, j)
    if abs(i-j) < self.w:
      if overlap == (self.w - abs(i-j)):
        return True
      else:
        return False
    else:
      if overlap <= self._maxOverlap:
        return True
      else:
        return False


  def _initializeBucketMap(self, maxBuckets, offset):
    """
    Initialize the bucket map assuming the given number of maxBuckets.
    """
    # The first bucket index will be _maxBuckets / 2 and bucket indices will be
    # allowed to grow lower or higher as long as they don't become negative.
    # _maxBuckets is required because the current CLA Classifier assumes bucket
    # indices must be non-negative. This normally does not need to be changed
    # but if altered, should be set to an even number.
    self._maxBuckets = maxBuckets
    self.minIndex = self._maxBuckets / 2
    self.maxIndex = self._maxBuckets / 2

    # The scalar offset used to map scalar values to bucket indices. The middle
    # bucket will correspond to numbers in the range
    # [offset-resolution/2, offset+resolution/2).
    # The bucket index for a number x will be:
    #     maxBuckets/2 + int( round( (x-offset)/resolution ) )
    self._offset = offset

    # This dictionary maps a bucket index into its bit representation
    # We initialize the class with a single bucket with index 0
    self.bucketMap = {}

    def _permutation(n):
      r = numpy.arange(n, dtype=numpy.uint32)
      self.random.shuffle(r)
      return r

    self.bucketMap[self.minIndex] = _permutation(self.n)[0:self.w]

    # How often we need to retry when generating valid encodings
    self.numTries = 0


  def dump(self):
    print "RandomDistributedScalarEncoder:"
    print "  minIndex:   %d" % self.minIndex
    print "  maxIndex:   %d" % self.maxIndex
    print "  w:          %d" % self.w
    print "  n:          %d" % self.getWidth()
    print "  resolution: %g" % self.resolution
    print "  offset:     %s" % str(self._offset)
    print "  numTries:   %d" % self.numTries
    print "  name:       %s" % self.name
    if self.verbosity > 2:
      print "  All buckets:     "
      pprint.pprint(self.bucketMap)


  @classmethod
  def read(cls, proto):
    encoder = object.__new__(cls)
    encoder.resolution = proto.resolution
    encoder.w = proto.w
    encoder.n = proto.n
    encoder.name = proto.name
    encoder._offset = proto.offset
    encoder.random = NupicRandom()
    encoder.random.read(proto.random)
    encoder.resolution = proto.resolution
    encoder.verbosity = proto.verbosity
    encoder.minIndex = proto.minIndex
    encoder.maxIndex = proto.maxIndex
    encoder.encoders = None
    encoder._maxBuckets = INITIAL_BUCKETS
    encoder.bucketMap = {x.key: numpy.array(x.value, dtype=numpy.uint32)
                         for x in proto.bucketMap}

    return encoder


  def write(self, proto):
    proto.resolution = self.resolution
    proto.w = self.w
    proto.n = self.n
    proto.name = self.name
    proto.offset = self._offset
    self.random.write(proto.random)
    proto.verbosity = self.verbosity
    proto.minIndex = self.minIndex
    proto.maxIndex = self.maxIndex
    proto.bucketMap = [{"key": key, "value": value.tolist()}
                       for key, value in self.bucketMap.items()]
Example #7
0
class PatternMachine(object):
  """
  Base pattern machine class.
  """

  def __init__(self,
               n,
               w,
               num=100,
               seed=42):
    """
    @param n   (int)      Number of available bits in pattern
    @param w   (int/list) Number of on bits in pattern
                          If list, each pattern will have a `w` randomly
                          selected from the list.
    @param num (int)      Number of available patterns
    """
    # Save member variables
    self._n = n
    self._w = w
    self._num = num

    # Initialize member variables
    self._random = Random(seed)
    self._patterns = dict()

    self._generate()


  def get(self, number):
    """
    Return a pattern for a number.

    @param number (int) Number of pattern

    @return (set) Indices of on bits
    """
    if not number in self._patterns:
      raise IndexError("Invalid number")

    return self._patterns[number]


  def addNoise(self, bits, amount):
    """
    Add noise to pattern.

    @param bits   (set)   Indices of on bits
    @param amount (float) Probability of switching an on bit with a random bit

    @return (set) Indices of on bits in noisy pattern
    """
    newBits = set()

    for bit in bits:
      if self._random.getReal64() < amount:
        newBits.add(self._random.getUInt32(self._n))
      else:
        newBits.add(bit)

    return newBits


  def numbersForBit(self, bit):
    """
    Return the set of pattern numbers that match a bit.

    @param bit (int) Index of bit

    @return (set) Indices of numbers
    """
    if bit >= self._n:
      raise IndexError("Invalid bit")

    numbers = set()

    for index, pattern in self._patterns.iteritems():
      if bit in pattern:
        numbers.add(index)

    return numbers


  def numberMapForBits(self, bits):
    """
    Return a map from number to matching on bits,
    for all numbers that match a set of bits.

    @param bits (set) Indices of bits

    @return (dict) Mapping from number => on bits.
    """
    numberMap = dict()

    for bit in bits:
      numbers = self.numbersForBit(bit)

      for number in numbers:
        if not number in numberMap:
          numberMap[number] = set()

        numberMap[number].add(bit)

    return numberMap


  def prettyPrintPattern(self, bits, verbosity=1):
    """
    Pretty print a pattern.

    @param bits      (set) Indices of on bits
    @param verbosity (int) Verbosity level

    @return (string) Pretty-printed text
    """
    numberMap = self.numberMapForBits(bits)
    text = ""

    numberList = []
    numberItems = sorted(numberMap.iteritems(),
                         key=lambda (number, bits): len(bits),
                         reverse=True)

    for number, bits in numberItems:

      if verbosity > 2:
        strBits = [str(n) for n in bits]
        numberText = "{0} (bits: {1})".format(number, ",".join(strBits))
      elif verbosity > 1:
        numberText = "{0} ({1} bits)".format(number, len(bits))
      else:
        numberText = str(number)

      numberList.append(numberText)

    text += "[{0}]".format(", ".join(numberList))

    return text


  def _generate(self):
    """
    Generates set of random patterns.
    """
    candidates = np.array(range(self._n), np.uint32)
    for i in xrange(self._num):
      self._random.shuffle(candidates)
      pattern = candidates[0:self._getW()]
      self._patterns[i] = set(pattern)


  def _getW(self):
    """
    Gets a value of `w` for use in generating a pattern.
    """
    w = self._w

    if type(w) is list:
      return w[self._random.getUInt32(len(w))]
    else:
      return w
Example #8
0
class SequenceMachine(object):
  """
  Base sequence machine class.
  """

  def __init__(self,
               patternMachine,
               seed=42):
    """
    @param patternMachine (PatternMachine) Pattern machine instance
    """
    # Save member variables
    self.patternMachine = patternMachine

    # Initialize member variables
    self._random = Random(seed)


  def generateFromNumbers(self, numbers):
    """
    Generate a sequence from a list of numbers.

    Note: Any `None` in the list of numbers is considered a reset.

    @param numbers (list) List of numbers

    @return (list) Generated sequence
    """
    sequence = []

    for number in numbers:
      if number is None:
        sequence.append(number)
      else:
        pattern = self.patternMachine.get(number)
        sequence.append(pattern)

    return sequence


  def addSpatialNoise(self, sequence, amount):
    """
    Add spatial noise to each pattern in the sequence.

    @param sequence (list)  Sequence
    @param amount   (float) Amount of spatial noise

    @return (list) Sequence with spatial noise
    """
    newSequence = []

    for pattern in sequence:
      if pattern is not None:
        pattern = self.patternMachine.addNoise(pattern, amount)
      newSequence.append(pattern)

    return newSequence


  def prettyPrintSequence(self, sequence, verbosity=1):
    """
    Pretty print a sequence.

    @param sequence  (list) Sequence
    @param verbosity (int)  Verbosity level

    @return (string) Pretty-printed text
    """
    text = ""

    for i in xrange(len(sequence)):
      pattern = sequence[i]

      if pattern is None:
        text += "<reset>"
        if i < len(sequence) - 1:
          text += "\n"
      else:
        text += self.patternMachine.prettyPrintPattern(pattern,
                                                       verbosity=verbosity)

    return text


  def generateNumbers(self, numSequences, sequenceLength, sharedRange=None):
    """
    @param numSequences   (int)   Number of sequences to return,
                                  separated by None
    @param sequenceLength (int)   Length of each sequence
    @param sharedRange    (tuple) (start index, end index) indicating range of
                                  shared subsequence in each sequence
                                  (None if no shared subsequences)
    @return (list) Numbers representing sequences
    """
    numbers = []

    if sharedRange:
      sharedStart, sharedEnd = sharedRange
      sharedLength = sharedEnd - sharedStart
      sharedNumbers = range(numSequences * sequenceLength,
                            numSequences * sequenceLength + sharedLength)

    for i in xrange(numSequences):
      start = i * sequenceLength
      newNumbers = np.array(range(start, start + sequenceLength), np.uint32)
      self._random.shuffle(newNumbers)
      newNumbers = list(newNumbers)

      if sharedRange is not None:
        newNumbers[sharedStart:sharedEnd] = sharedNumbers

      numbers += newNumbers
      numbers.append(None)

    return numbers
Example #9
0
class RandomDistributedScalarEncoder(Encoder):
    """
  A scalar encoder encodes a numeric (floating point) value into an array
  of bits.

  This class maps a scalar value into a random distributed representation that
  is suitable as scalar input into the spatial pooler. The encoding scheme is
  designed to replace a simple ScalarEncoder. It preserves the important
  properties around overlapping representations. Unlike ScalarEncoder the min
  and max range can be dynamically increased without any negative effects. The
  only required parameter is resolution, which determines the resolution of
  input values.

  Scalar values are mapped to a bucket. The class maintains a random distributed
  encoding for each bucket. The following properties are maintained by
  RandomDistributedEncoder:

  1) Similar scalars should have high overlap. Overlap should decrease smoothly
  as scalars become less similar. Specifically, neighboring bucket indices must
  overlap by a linearly decreasing number of bits.

  2) Dissimilar scalars should have very low overlap so that the SP does not
  confuse representations. Specifically, buckets that are more than w indices
  apart should have at most maxOverlap bits of overlap. We arbitrarily (and
  safely) define "very low" to be 2 bits of overlap or lower.

  Properties 1 and 2 lead to the following overlap rules for buckets i and j:

      If abs(i-j) < w then:
        overlap(i,j) = w - abs(i-j)
      else:
        overlap(i,j) <= maxOverlap

  3) The representation for a scalar must not change during the lifetime of
  the object. Specifically, as new buckets are created and the min/max range
  is extended, the representation for previously in-range sscalars and
  previously created buckets must not change.
  """
    def __init__(self,
                 resolution,
                 w=21,
                 n=400,
                 name=None,
                 offset=None,
                 seed=42,
                 verbosity=0):
        """Constructor

    @param resolution A floating point positive number denoting the resolution
                    of the output representation. Numbers within
                    [offset-resolution/2, offset+resolution/2] will fall into
                    the same bucket and thus have an identical representation.
                    Adjacent buckets will differ in one bit. resolution is a
                    required parameter.

    @param w Number of bits to set in output. w must be odd to avoid centering
                    problems.  w must be large enough that spatial pooler
                    columns will have a sufficiently large overlap to avoid
                    false matches. A value of w=21 is typical.

    @param n Number of bits in the representation (must be > w). n must be
                    large enough such that there is enough room to select
                    new representations as the range grows. With w=21 a value
                    of n=400 is typical. The class enforces n > 6*w.

    @param name An optional string which will become part of the description.

    @param offset A floating point offset used to map scalar inputs to bucket
                    indices. The middle bucket will correspond to numbers in the
                    range [offset - resolution/2, offset + resolution/2). If set
                    to None, the very first input that is encoded will be used
                    to determine the offset.

    @param seed The seed used for numpy's random number generator. If set to -1
                    the generator will be initialized without a fixed seed.

    @param verbosity An integer controlling the level of debugging output. A
                    value of 0 implies no output. verbosity=1 may lead to
                    one-time printouts during construction, serialization or
                    deserialization. verbosity=2 may lead to some output per
                    encode operation. verbosity>2 may lead to significantly
                    more output.
    """
        # Validate inputs
        if (w <= 0) or (w % 2 == 0):
            raise ValueError("w must be an odd positive integer")

        if resolution <= 0:
            raise ValueError("resolution must be a positive number")

        if (n <= 6 * w) or (not isinstance(n, int)):
            raise ValueError("n must be an int strictly greater than 6*w. For "
                             "good results we recommend n be strictly greater "
                             "than 11*w")

        self.encoders = None
        self.verbosity = verbosity
        self.w = w
        self.n = n
        self.resolution = float(resolution)

        # The largest overlap we allow for non-adjacent encodings
        self._maxOverlap = 2

        # initialize the random number generators
        self._seed(seed)

        # Internal parameters for bucket mapping
        self.minIndex = None
        self.maxIndex = None
        self._offset = None
        self._initializeBucketMap(INITIAL_BUCKETS, offset)

        # A name used for debug printouts
        if name is not None:
            self.name = name
        else:
            self.name = "[%s]" % (self.resolution)

        if self.verbosity > 0:
            self.dump()

    def __setstate__(self, state):
        self.__dict__.update(state)

        # Initialize self.random as an instance of NupicRandom derived from the
        # previous numpy random state
        randomState = state["random"]
        if isinstance(randomState, numpy.random.mtrand.RandomState):
            self.random = NupicRandom(randomState.randint(sys.maxint))

    def _seed(self, seed=-1):
        """
    Initialize the random seed
    """
        if seed != -1:
            self.random = NupicRandom(seed)
        else:
            self.random = NupicRandom()

    def getDecoderOutputFieldTypes(self):
        """ See method description in base.py """
        return (FieldMetaType.float, )

    def getWidth(self):
        """ See method description in base.py """
        return self.n

    def getDescription(self):
        return [(self.name, 0)]

    def getBucketIndices(self, x):
        """ See method description in base.py """

        if ((isinstance(x, float) and math.isnan(x))
                or x == SENTINEL_VALUE_FOR_MISSING_DATA):
            return [None]

        if self._offset is None:
            self._offset = x

        bucketIdx = ((self._maxBuckets / 2) +
                     int(round((x - self._offset) / self.resolution)))

        if bucketIdx < 0:
            bucketIdx = 0
        elif bucketIdx >= self._maxBuckets:
            bucketIdx = self._maxBuckets - 1

        return [bucketIdx]

    def mapBucketIndexToNonZeroBits(self, index):
        """
    Given a bucket index, return the list of non-zero bits. If the bucket
    index does not exist, it is created. If the index falls outside our range
    we clip it.
    """
        if index < 0:
            index = 0

        if index >= self._maxBuckets:
            index = self._maxBuckets - 1

        if not self.bucketMap.has_key(index):
            if self.verbosity >= 2:
                print "Adding additional buckets to handle index=", index
            self._createBucket(index)
        return self.bucketMap[index]

    def encodeIntoArray(self, x, output):
        """ See method description in base.py """

        if x is not None and not isinstance(x, numbers.Number):
            raise TypeError(
                "Expected a scalar input but got input of type %s" % type(x))

        # Get the bucket index to use
        bucketIdx = self.getBucketIndices(x)[0]

        # None is returned for missing value in which case we return all 0's.
        output[0:self.n] = 0
        if bucketIdx is not None:
            output[self.mapBucketIndexToNonZeroBits(bucketIdx)] = 1

    def _createBucket(self, index):
        """
    Create the given bucket index. Recursively create as many in-between
    bucket indices as necessary.
    """
        if index < self.minIndex:
            if index == self.minIndex - 1:
                # Create a new representation that has exactly w-1 overlapping bits
                # as the min representation
                self.bucketMap[index] = self._newRepresentation(
                    self.minIndex, index)
                self.minIndex = index
            else:
                # Recursively create all the indices above and then this index
                self._createBucket(index + 1)
                self._createBucket(index)
        else:
            if index == self.maxIndex + 1:
                # Create a new representation that has exactly w-1 overlapping bits
                # as the max representation
                self.bucketMap[index] = self._newRepresentation(
                    self.maxIndex, index)
                self.maxIndex = index
            else:
                # Recursively create all the indices below and then this index
                self._createBucket(index - 1)
                self._createBucket(index)

    def _newRepresentation(self, index, newIndex):
        """
    Return a new representation for newIndex that overlaps with the
    representation at index by exactly w-1 bits
    """
        newRepresentation = self.bucketMap[index].copy()

        # Choose the bit we will replace in this representation. We need to shift
        # this bit deterministically. If this is always chosen randomly then there
        # is a 1 in w chance of the same bit being replaced in neighboring
        # representations, which is fairly high
        ri = newIndex % self.w

        # Now we choose a bit such that the overlap rules are satisfied.
        newBit = self.random.getUInt32(self.n)
        newRepresentation[ri] = newBit
        while newBit in self.bucketMap[index] or \
              not self._newRepresentationOK(newRepresentation, newIndex):
            self.numTries += 1
            newBit = self.random.getUInt32(self.n)
            newRepresentation[ri] = newBit

        return newRepresentation

    def _newRepresentationOK(self, newRep, newIndex):
        """
    Return True if this new candidate representation satisfies all our overlap
    rules. Since we know that neighboring representations differ by at most
    one bit, we compute running overlaps.
    """
        if newRep.size != self.w:
            return False
        if (newIndex < self.minIndex - 1) or (newIndex > self.maxIndex + 1):
            raise ValueError("newIndex must be within one of existing indices")

        # A binary representation of newRep. We will use this to test containment
        newRepBinary = numpy.array([False] * self.n)
        newRepBinary[newRep] = True

        # Midpoint
        midIdx = self._maxBuckets / 2

        # Start by checking the overlap at minIndex
        runningOverlap = self._countOverlap(self.bucketMap[self.minIndex],
                                            newRep)
        if not self._overlapOK(self.minIndex, newIndex,
                               overlap=runningOverlap):
            return False

        # Compute running overlaps all the way to the midpoint
        for i in range(self.minIndex + 1, midIdx + 1):
            # This is the bit that is going to change
            newBit = (i - 1) % self.w

            # Update our running overlap
            if newRepBinary[self.bucketMap[i - 1][newBit]]:
                runningOverlap -= 1
            if newRepBinary[self.bucketMap[i][newBit]]:
                runningOverlap += 1

            # Verify our rules
            if not self._overlapOK(i, newIndex, overlap=runningOverlap):
                return False

        # At this point, runningOverlap contains the overlap for midIdx
        # Compute running overlaps all the way to maxIndex
        for i in range(midIdx + 1, self.maxIndex + 1):
            # This is the bit that is going to change
            newBit = i % self.w

            # Update our running overlap
            if newRepBinary[self.bucketMap[i - 1][newBit]]:
                runningOverlap -= 1
            if newRepBinary[self.bucketMap[i][newBit]]:
                runningOverlap += 1

            # Verify our rules
            if not self._overlapOK(i, newIndex, overlap=runningOverlap):
                return False

        return True

    def _countOverlapIndices(self, i, j):
        """
    Return the overlap between bucket indices i and j
    """
        if self.bucketMap.has_key(i) and self.bucketMap.has_key(j):
            iRep = self.bucketMap[i]
            jRep = self.bucketMap[j]
            return self._countOverlap(iRep, jRep)
        else:
            raise ValueError("Either i or j don't exist")

    @staticmethod
    def _countOverlap(rep1, rep2):
        """
    Return the overlap between two representations. rep1 and rep2 are lists of
    non-zero indices.
    """
        overlap = 0
        for e in rep1:
            if e in rep2:
                overlap += 1
        return overlap

    def _overlapOK(self, i, j, overlap=None):
        """
    Return True if the given overlap between bucket indices i and j are
    acceptable. If overlap is not specified, calculate it from the bucketMap
    """
        if overlap is None:
            overlap = self._countOverlapIndices(i, j)
        if abs(i - j) < self.w:
            if overlap == (self.w - abs(i - j)):
                return True
            else:
                return False
        else:
            if overlap <= self._maxOverlap:
                return True
            else:
                return False

    def _initializeBucketMap(self, maxBuckets, offset):
        """
    Initialize the bucket map assuming the given number of maxBuckets.
    """
        # The first bucket index will be _maxBuckets / 2 and bucket indices will be
        # allowed to grow lower or higher as long as they don't become negative.
        # _maxBuckets is required because the current CLA Classifier assumes bucket
        # indices must be non-negative. This normally does not need to be changed
        # but if altered, should be set to an even number.
        self._maxBuckets = maxBuckets
        self.minIndex = self._maxBuckets / 2
        self.maxIndex = self._maxBuckets / 2

        # The scalar offset used to map scalar values to bucket indices. The middle
        # bucket will correspond to numbers in the range
        # [offset-resolution/2, offset+resolution/2).
        # The bucket index for a number x will be:
        #     maxBuckets/2 + int( round( (x-offset)/resolution ) )
        self._offset = offset

        # This dictionary maps a bucket index into its bit representation
        # We initialize the class with a single bucket with index 0
        self.bucketMap = {}

        def _permutation(n):
            r = numpy.arange(n, dtype=numpy.uint32)
            self.random.shuffle(r)
            return r

        self.bucketMap[self.minIndex] = _permutation(self.n)[0:self.w]

        # How often we need to retry when generating valid encodings
        self.numTries = 0

    def dump(self):
        print "RandomDistributedScalarEncoder:"
        print "  minIndex:   %d" % self.minIndex
        print "  maxIndex:   %d" % self.maxIndex
        print "  w:          %d" % self.w
        print "  n:          %d" % self.getWidth()
        print "  resolution: %g" % self.resolution
        print "  offset:     %s" % str(self._offset)
        print "  numTries:   %d" % self.numTries
        print "  name:       %s" % self.name
        if self.verbosity > 2:
            print "  All buckets:     "
            pprint.pprint(self.bucketMap)

    @classmethod
    def read(cls, proto):
        encoder = object.__new__(cls)
        encoder.resolution = proto.resolution
        encoder.w = proto.w
        encoder.n = proto.n
        encoder.name = proto.name
        encoder._offset = proto.offset
        encoder.random = NupicRandom()
        encoder.random.read(proto.random)
        encoder.resolution = proto.resolution
        encoder.verbosity = proto.verbosity
        encoder.minIndex = proto.minIndex
        encoder.maxIndex = proto.maxIndex
        encoder.encoders = None
        encoder._maxBuckets = INITIAL_BUCKETS
        encoder.bucketMap = {
            x.key: numpy.array(x.value, dtype=numpy.uint32)
            for x in proto.bucketMap
        }

        return encoder

    def write(self, proto):
        proto.resolution = self.resolution
        proto.w = self.w
        proto.n = self.n
        proto.name = self.name
        proto.offset = self._offset
        self.random.write(proto.random)
        proto.verbosity = self.verbosity
        proto.minIndex = self.minIndex
        proto.maxIndex = self.maxIndex
        proto.bucketMap = [{
            "key": key,
            "value": value.tolist()
        } for key, value in self.bucketMap.items()]
Example #10
0
class SequenceMachine(object):
  """
  Base sequence machine class.
  """

  def __init__(self,
               patternMachine,
               seed=42):
    """
    @param patternMachine (PatternMachine) Pattern machine instance
    """
    # Save member variables
    self.patternMachine = patternMachine

    # Initialize member variables
    self._random = Random(seed)


  def generateFromNumbers(self, numbers):
    """
    Generate a sequence from a list of numbers.

    Note: Any `None` in the list of numbers is considered a reset.

    @param numbers (list) List of numbers

    @return (list) Generated sequence
    """
    sequence = []

    for number in numbers:
      if number == None:
        sequence.append(number)
      else:
        pattern = self.patternMachine.get(number)
        sequence.append(pattern)

    return sequence


  def addSpatialNoise(self, sequence, amount):
    """
    Add spatial noise to each pattern in the sequence.

    @param sequence (list)  Sequence
    @param amount   (float) Amount of spatial noise

    @return (list) Sequence with spatial noise
    """
    newSequence = []

    for pattern in sequence:
      if pattern is not None:
        pattern = self.patternMachine.addNoise(pattern, amount)
      newSequence.append(pattern)

    return newSequence


  def prettyPrintSequence(self, sequence, verbosity=1):
    """
    Pretty print a sequence.

    @param sequence  (list) Sequence
    @param verbosity (int)  Verbosity level

    @return (string) Pretty-printed text
    """
    text = ""

    for i in range(len(sequence)):
      pattern = sequence[i]

      if pattern == None:
        text += "<reset>"
        if i < len(sequence) - 1:
          text += "\n"
      else:
        text += self.patternMachine.prettyPrintPattern(pattern,
                                                       verbosity=verbosity)

    return text


  def generateNumbers(self, numSequences, sequenceLength, sharedRange=None):
    """
    @param numSequences   (int)   Number of sequences to return,
                                  separated by None
    @param sequenceLength (int)   Length of each sequence
    @param sharedRange    (tuple) (start index, end index) indicating range of
                                  shared subsequence in each sequence
                                  (None if no shared subsequences)
    @return (list) Numbers representing sequences
    """
    numbers = []

    if sharedRange:
      sharedStart, sharedEnd = sharedRange
      sharedLength = sharedEnd - sharedStart
      sharedNumbers = list(range(numSequences * sequenceLength,
                            numSequences * sequenceLength + sharedLength))

    for i in range(numSequences):
      start = i * sequenceLength
      newNumbers = np.array(list(range(start, start + sequenceLength)), np.uint32)
      self._random.shuffle(newNumbers)
      newNumbers = list(newNumbers)

      if sharedRange is not None:
        newNumbers[sharedStart:sharedEnd] = sharedNumbers

      numbers += newNumbers
      numbers.append(None)

    return numbers
class ColumnPooler(object):
  """
  This class constitutes a temporary implementation for a cross-column pooler.
  The implementation goal of this class is to prove basic properties before
  creating a cleaner implementation.
  """

  def __init__(self,
               inputWidth,
               numActiveColumnsPerInhArea=40,
               synPermProximalInc=0.1,
               synPermProximalDec=0.001,
               initialProximalPermanence=0.6,
               columnDimensions=(2048,),
               activationThreshold=13,
               minThreshold=10,
               initialPermanence=0.41,
               connectedPermanence=0.50,
               maxNewSynapseCount=20,
               permanenceIncrement=0.10,
               permanenceDecrement=0.10,
               predictedSegmentDecrement=0.0,
               maxSegmentsPerCell=255,
               maxSynapsesPerSegment=255,
               seed=42):
    """
    This classes uses an ExtendedTemporalMemory internally to keep track of
    distal segments. Please see ExtendedTemporalMemory for descriptions of
    constructor parameters not defined below.

    Parameters:
    ----------------------------
    @param  inputWidth (int)
            The number of proximal inputs into this layer

    @param  numActiveColumnsPerInhArea (int)
            Target number of active cells

    @param  synPermProximalInc (float)
            Permanence increment for proximal synapses

    @param  synPermProximalDec (float)
            Permanence decrement for proximal synapses

    @param  initialProximalPermanence (float)
            Initial permanence value for proximal segments

    """

    self.inputWidth = inputWidth
    self.numActiveColumnsPerInhArea = numActiveColumnsPerInhArea
    self.synPermProximalInc = synPermProximalInc
    self.synPermProximalDec = synPermProximalDec
    self.initialProximalPermanence = initialProximalPermanence
    self.connectedPermanence = connectedPermanence
    self.maxNewSynapseCount = maxNewSynapseCount
    self.minThreshold = minThreshold
    self.activeCells = set()
    self._random = Random(seed)

    # Create our own instance of extended temporal memory to handle distal
    # segments.
    self.tm = createModel(
                      modelName="extendedCPP",
                      columnDimensions=columnDimensions,
                      cellsPerColumn=1,
                      activationThreshold=activationThreshold,
                      initialPermanence=initialPermanence,
                      connectedPermanence=connectedPermanence,
                      minThreshold=minThreshold,
                      maxNewSynapseCount=maxNewSynapseCount,
                      permanenceIncrement=permanenceIncrement,
                      permanenceDecrement=permanenceDecrement,
                      predictedSegmentDecrement=predictedSegmentDecrement,
                      maxSegmentsPerCell=maxSegmentsPerCell,
                      maxSynapsesPerSegment=maxSynapsesPerSegment,
                      seed=seed,
                      learnOnOneCell=False,
    )

    # These sparse matrices will hold the synapses for each proximal segment.
    #
    # proximalPermanences - SparseMatrix with permanence values
    # proximalConnections - SparseBinaryMatrix of connected synapses

    self.proximalPermanences = SparseMatrix(self.numberOfColumns(),
                                               inputWidth)
    self.proximalConnections = SparseBinaryMatrix(inputWidth)
    self.proximalConnections.resize(self.numberOfColumns(), inputWidth)



  def compute(self,
              feedforwardInput=None,
              activeExternalCells=None,
              learn=True):
    """
    Parameters:
    ----------------------------
    @param  feedforwardInput     (set)
            Indices of active input bits

    @param  activeExternalCells  (set)
            Indices of active cells that will form connections to distal
            segments.

    @param learn                    (bool)
            If True, we are learning a new object
    """
    if activeExternalCells is None:
      activeExternalCells = set()

    if learn:
      self._computeLearningMode(feedforwardInput=feedforwardInput,
                               lateralInput=activeExternalCells)

    else:
      self._computeInferenceMode(feedforwardInput=feedforwardInput,
                                 lateralInput=activeExternalCells)


  def _computeLearningMode(self, feedforwardInput, lateralInput):
    """
    Learning mode: we are learning a new object. If there is no prior
    activity, we randomly activate 2% of cells and create connections to
    incoming input. If there was prior activity, we maintain it.

    These cells will represent the object and learn distal connections to
    lateral cortical columns.

    Parameters:
    ----------------------------
    @param  feedforwardInput (set)
            Indices of active input bits

    @param  lateralInput (set)
            Indices of active cells from neighboring columns.
    """
    # If there are no previously active cells, select random subset of cells
    if len(self.activeCells) == 0:
      self.activeCells = set(self._random.shuffle(
            numpy.array(range(self.numberOfCells()),
                        dtype="uint32"))[0:self.numActiveColumnsPerInhArea])

    # else we maintain previous activity, nothing to do.

    # Those cells that remain active will learn on their proximal and distal
    # dendrites as long as there is some input.  If there are no
    # cells active, no learning happens.  This only happens in the very
    # beginning if there has been no bottom up activity at all.
    if len(self.activeCells) > 0:

      # Learn on proximal dendrite if appropriate
      if len(feedforwardInput) > 0:
        self._learnProximal(feedforwardInput, self.activeCells,
                            self.maxNewSynapseCount, self.proximalPermanences,
                            self.proximalConnections,
                            self.initialProximalPermanence,
                            self.synPermProximalInc, self.synPermProximalDec,
                            self.connectedPermanence)

      # Learn on distal dendrites if appropriate
      self.tm.compute(activeColumns=self.activeCells,
                      activeExternalCells=lateralInput,
                      formInternalConnections=False,
                      learn=True)


  def _computeInferenceMode(self, feedforwardInput, lateralInput):
    """
    Inference mode: if there is some feedforward activity, perform
    spatial pooling on it to recognize previously known objects. If there
    is no feedforward activity, maintain previous activity.

    Parameters:
    ----------------------------
    @param  feedforwardInput (set)
            Indices of active input bits

    @param  lateralInput (list of lists)
            A list of list of active cells from neighboring columns.
            len(lateralInput) == number of connected neighboring cortical
            columns.

    """
    # Figure out which cells are active due to feedforward proximal inputs
    # In order to form unions, we keep all cells that are over threshold
    inputVector = numpy.zeros(self.numberOfInputs(), dtype=realDType)
    inputVector[list(feedforwardInput)] = 1
    overlaps = numpy.zeros(self.numberOfColumns(), dtype=realDType)
    self.proximalConnections.rightVecSumAtNZ_fast(inputVector.astype(realDType),
                                                 overlaps)
    overlaps[overlaps < self.minThreshold] = 0
    bottomUpActivity =  set(overlaps.nonzero()[0])

    # If there is insufficient current bottom up activity, we incorporate all
    # previous activity. We set their overlaps so they are sure to win.
    if len(bottomUpActivity) < self.numActiveColumnsPerInhArea:
      bottomUpActivity = bottomUpActivity.union(self.activeCells)
      maxOverlap = overlaps.max()
      overlaps[self.getActiveCells()] = maxOverlap+1

    # Narrow down list of active cells based on lateral activity
    self.activeCells = self._winnersBasedOnLateralActivity(
      bottomUpActivity,
      self.getPredictiveCells(),
      overlaps,
      self.numActiveColumnsPerInhArea
    )

    # Update predictive cells for next time step
    self.tm.compute(activeColumns=self.activeCells,
                    activeExternalCells=lateralInput,
                    formInternalConnections=False,
                    learn=False)


  def numberOfInputs(self):
    """
    Returns the number of inputs into this layer
    """
    return self.inputWidth


  def numberOfColumns(self):
    """
    Returns the number of columns in this layer.
    @return (int) Number of columns
    """
    return self.tm.numberOfColumns()


  def numberOfCells(self):
    """
    Returns the number of cells in this layer.
    @return (int) Number of cells
    """
    return self.tm.numberOfCells()


  def getActiveCells(self):
    """
    Returns the indices of the active cells.
    @return (set) Indices of active cells.
    """
    return self.getCellIndices(self.activeCells)


  @classmethod
  def getCellIndices(cls, cells):
    return [cls.getCellIndex(c) for c in cells]


  @staticmethod
  def getCellIndex(cell):
    return cell


  def numberOfConnectedSynapses(self, cells=None):
    """
    Returns the number of proximal connected synapses on these cells.

    Parameters:
    ----------------------------
    @param  cells (set or list)
            Indices of the cells. If None return count for all cells.
    """
    if cells is None:
      cells = xrange(self.numberOfCells())
    n = 0
    for cell in cells:
      n += self.proximalConnections.nNonZerosOnRow(cell)
    return n


  def numberOfSynapses(self, cells=None):
    """
    Returns the number of proximal synapses with permanence>0 on these cells.

    Parameters:
    ----------------------------
    @param  cells (set or list)
            Indices of the cells. If None return count for all cells.
    """
    if cells is None:
      cells = xrange(self.numberOfCells())
    n = 0
    for cell in cells:
      n += self.proximalPermanences.nNonZerosOnRow(cell)
    return n


  def numberOfDistalSegments(self, cells):
    """
    Returns the total number of distal segments for these cells.

    Parameters:
    ----------------------------
    @param  cells (set or list)
            Indices of the cells
    """
    n = 0
    for cell in cells:
      n += len(self.tm.connections.segmentsForCell(cell))
    return n


  def numberOfDistalSynapses(self, cells):
    """
    Returns the total number of distal synapses for these cells.

    Parameters:
    ----------------------------
    @param  cells (set or list)
            Indices of the cells
    """
    n = 0
    for cell in cells:
      segments = self.tm.connections.segmentsForCell(cell)
      for segment in segments:
        n += len(self.tm.connections.synapsesForSegment(segment))
    return n


  def reset(self):
    """
    Reset internal states. When learning this signifies we are to learn a
    unique new object.
    """
    self.activeCells = set()
    self.tm.reset()


  def getPredictiveCells(self):
    """
    Get the set of distally predictive cells as a set.

    @return (list) A list containing indices of the current distally predicted
    cells.
    """
    return self.tm.getPredictiveCells()


  def getPredictedActiveCells(self):
    """
    Get the set of cells that were predicted previously then became active

    @return (set) A set containing indices.
    """
    return self.tm.predictedActiveCellsIndices()


  def getConnections(self):
    """
    Get the Connections structure associated with our TM. Beware of using
    this as it is implementation specific and may change.

    @return (object) A Connections object
    """
    return self.tm.connections


  def _learnProximal(self,
             activeInputs, activeCells, maxNewSynapseCount, proximalPermanences,
             proximalConnections, initialPermanence, synPermProximalInc,
             synPermProximalDec, connectedPermanence):
    """
    Learn on proximal dendrites of active cells.  Updates proximalPermanences
    """
    for cell in activeCells:
      cellPermanencesDense = proximalPermanences.getRow(cell)
      cellNonZeroIndices, _ = proximalPermanences.rowNonZeros(cell)
      cellNonZeroIndices = list(cellNonZeroIndices)

      # Get new and existing connections for this segment
      newInputs, existingInputs = self._pickProximalInputsToLearnOn(
        maxNewSynapseCount, activeInputs, cellNonZeroIndices
      )

      # Adjust existing connections appropriately
      # First we decrement all existing permanences
      if len(cellNonZeroIndices) > 0:
        cellPermanencesDense[cellNonZeroIndices] -= synPermProximalDec

      # Then we add inc + dec to existing active synapses
      if len(existingInputs) > 0:
        cellPermanencesDense[existingInputs] += synPermProximalInc + synPermProximalDec

      # Add new connections
      if len(newInputs) > 0:
        cellPermanencesDense[newInputs] += initialPermanence

      # Update proximalPermanences and proximalConnections
      proximalPermanences.setRowFromDense(cell, cellPermanencesDense)
      newConnected = numpy.where(cellPermanencesDense >= connectedPermanence)[0]
      proximalConnections.replaceSparseRow(cell, newConnected)



  def _pickProximalInputsToLearnOn(self, newSynapseCount, activeInputs,
                                  cellNonZeros):
    """
    Pick inputs to form proximal connections to a particular cell. We just
    randomly subsample from activeInputs, regardless of whether they are already
    connected.

    We return a list of up to newSynapseCount input indices from activeInputs
    that are valid new connections for this cell. We also return a list
    containing all inputs in activeInputs that are already connected to this
    cell.

    Parameters:
    ----------------------------
    @param newSynapseCount  (int)        Number of inputs to pick
    @param cell             (int)        Cell index
    @param activeInputs     (set)        Indices of active inputs
    @param cellNonZeros     (list)       Indices of inputs input this cell with
                                         non-zero permanences.

    @return (list, list) Indices of new inputs to connect to, inputs already
                         connected
    """
    candidates = []
    alreadyConnected = []

    # Collect inputs that already have synapses and list of new candidate inputs
    for inputIdx in activeInputs:
      if inputIdx in cellNonZeros:
        alreadyConnected += [inputIdx]
      else:
        candidates += [inputIdx]

    # Select min(newSynapseCount, len(candidates)) new inputs to connect to
    if newSynapseCount >= len(candidates):
      return candidates, alreadyConnected

    else:
      # Pick newSynapseCount cells randomly
      # TODO: we could maybe implement this more efficiently with shuffle.
      inputs = []
      for _ in range(newSynapseCount):
        i = self._random.getUInt32(len(candidates))
        inputs += [candidates[i]]
        candidates.remove(candidates[i])

      return inputs, alreadyConnected


  def _winnersBasedOnLateralActivity(self,
                                     activeCells,
                                     predictiveCells,
                                     overlaps,
                                     targetActiveCells):
    """
    Given the set of cells active due to feedforward input, narrow down the
    list of active cells based on predictions due to previous lateralInput.

    Parameters:
    ----------------------------
    @param    activeCells           (set)
              Indices of cells activated by bottom-up input.

    @param    predictiveCells       (set)
              Indices of cells that are laterally predicted.

    @param    overlaps              (numpy array)
              Bottom up overlap scores for each proximal segment. This is used
              to select additional cells if the narrowed down list contains less
              than targetActiveCells.

    @param    targetActiveCells     (int)
              The number of active cells we want to have active.

    @return (set) List of new winner cell indices

    """
    # No TM accessors that return set so access internal member directly
    predictedActiveCells = activeCells.intersection(predictiveCells)

    # If predicted cells don't intersect at all with active cells, we go with
    # bottom up input. In these cases we can stick with existing active cells
    # and skip the overlap sorting
    if len(predictedActiveCells) == 0:
      predictedActiveCells = activeCells

    # We want to keep all cells that were predicted and currently active due to
    # feedforward input. This set could be larger than our target number of
    # active cells due to unions, which is ok. However if there are insufficient
    # cells active after this intersection, we fill in with those currently
    # active cells that have highest overlap.
    elif len(predictedActiveCells) < targetActiveCells:
      # Don't want to consider cells already chosen
      overlaps[list(predictedActiveCells)] = 0

      # Add in the desired number of cells with highest activity
      numActive = targetActiveCells - len(predictedActiveCells)
      winnerIndices = numpy.argsort(overlaps, kind='mergesort')
      sortedWinnerIndices = winnerIndices[-numActive:][::-1]
      predictedActiveCells = predictedActiveCells.union(set(sortedWinnerIndices))

    return predictedActiveCells
Example #12
0
class ColumnPooler(object):
  """
  This class constitutes a temporary implementation for a cross-column pooler.
  The implementation goal of this class is to prove basic properties before
  creating a cleaner implementation.
  """

  def __init__(self,
               inputWidth,
               lateralInputWidth,
               numActiveColumnsPerInhArea=40,
               synPermProximalInc=0.1,
               synPermProximalDec=0.001,
               initialProximalPermanence=0.6,
               columnDimensions=(2048,),
               minThresholdProximal=10,
               activationThresholdDistal=13,
               minThresholdDistal=10,
               initialPermanence=0.41,
               connectedPermanence=0.50,
               maxNewProximalSynapseCount=20,
               maxNewDistalSynapseCount=20,
               permanenceIncrement=0.10,
               permanenceDecrement=0.10,
               predictedSegmentDecrement=0.0,
               maxSegmentsPerCell=255,
               maxSynapsesPerProximalSegment=255,
               maxSynapsesPerDistalSegment=255,
               seed=42):
    """
    This classes uses an ExtendedTemporalMemory internally to keep track of
    distal segments. Please see ExtendedTemporalMemory for descriptions of
    constructor parameters not defined below.

    Parameters:
    ----------------------------
    @param  inputWidth (int)
            The number of proximal inputs into this layer

    @param  lateralInputWidth (int)
            The number of lateral inputs into this layer

    @param  numActiveColumnsPerInhArea (int)
            Target number of active cells

    @param  synPermProximalInc (float)
            Permanence increment for proximal synapses

    @param  synPermProximalDec (float)
            Permanence decrement for proximal synapses

    @param  initialProximalPermanence (float)
            Initial permanence value for proximal segments

    """

    self.inputWidth = inputWidth
    self.lateralInputWidth = lateralInputWidth
    self.numActiveColumnsPerInhArea = numActiveColumnsPerInhArea
    self.synPermProximalInc = synPermProximalInc
    self.synPermProximalDec = synPermProximalDec
    self.initialProximalPermanence = initialProximalPermanence
    self.connectedPermanence = connectedPermanence
    self.maxNewProximalSynapseCount = maxNewProximalSynapseCount
    self.maxNewDistalSynapseCount = maxNewDistalSynapseCount
    self.minThresholdProximal = minThresholdProximal
    self.minThresholdDistal = minThresholdDistal
    self.maxSynapsesPerProximalSegment = maxSynapsesPerProximalSegment
    self.activeCells = set()
    self._random = Random(seed)

    # Create our own instance of extended temporal memory to handle distal
    # segments.
    self.tm = createModel(
                      modelName="etm_cpp",
                      columnDimensions=columnDimensions,
                      basalInputDimensions=(lateralInputWidth,),
                      apicalInputDimensions=(),
                      cellsPerColumn=1,
                      activationThreshold=activationThresholdDistal,
                      initialPermanence=initialPermanence,
                      connectedPermanence=connectedPermanence,
                      minThreshold=minThresholdDistal,
                      maxNewSynapseCount=maxNewDistalSynapseCount,
                      permanenceIncrement=permanenceIncrement,
                      permanenceDecrement=permanenceDecrement,
                      predictedSegmentDecrement=predictedSegmentDecrement,
                      formInternalBasalConnections=False,
                      learnOnOneCell=False,
                      maxSegmentsPerCell=maxSegmentsPerCell,
                      maxSynapsesPerSegment=maxSynapsesPerDistalSegment,
                      seed=seed,
    )

    # These sparse matrices will hold the synapses for each proximal segment.
    #
    # proximalPermanences - SparseMatrix with permanence values
    # proximalConnections - SparseBinaryMatrix of connected synapses

    self.proximalPermanences = SparseMatrix(self.numberOfColumns(),
                                               inputWidth)
    self.proximalConnections = SparseBinaryMatrix(inputWidth)
    self.proximalConnections.resize(self.numberOfColumns(), inputWidth)


  def depolarizeCells(self, activeExternalCells, learn=True):
    """
    Parameters:
    ----------------------------
    @param  activeExternalCells  (set)
            Indices of active cells that will form connections to distal
            segments.

    @param  learn (bool)
            If true, distal segment activations will be recorded. This
            information is used during segment cleanup.

    """
    self.tm.depolarizeCells(activeCellsExternalBasal=activeExternalCells,
                            learn=learn)


  def activateCells(self,
                    feedforwardInput=(),
                    reinforceCandidatesExternal=(),
                    growthCandidatesExternal=(),
                    learn=True):
    """

    @param  feedforwardInput (set)
            Indices of active input bits

    @param  reinforceCandidatesExternal (set)
            Indices of active cells that will reinforce synapses to distal
            segments.

    @param  growthCandidatesExternal  (set)
            Indices of active cells that will grow synapses to distal segments.

    @param learn                    (bool)
            If True, we are learning a new object
    """
    if learn:
      self._activateCellsLearningMode(feedforwardInput,
                                      reinforceCandidatesExternal,
                                      growthCandidatesExternal)
    else:
      self._activateCellsInferenceMode(feedforwardInput)


  def compute(self, feedforwardInput=(), lateralInput=(), learn=True):
    """
    Runs one time step of the column pooler algorithm.

    This method assumes:

     - Lateral input should trigger predictions for this time step, i.e. for
       this feedforward input.
     - During learning, all lateral input is eligible for growth and
       reinforcement.

    If these are bad assumptions, use depolarizeCells and activateCells
    directly.

    @param  feedforwardInput (set)
            Indices of active feedforward input bits

    @param  lateralInput  (set)
            Indices of active lateral input bits

    @param learn                    (bool)
            If True, we are learning a new object
    """
    self.depolarizeCells(lateralInput, learn)
    self.activateCells(feedforwardInput, lateralInput, lateralInput, learn)


  def _activateCellsLearningMode(self,
                                 feedforwardInput,
                                 reinforceCandidatesExternal,
                                 growthCandidatesExternal):
    """
    Learning mode: we are learning a new object. If there is no prior
    activity, we randomly activate 2% of cells and create connections to
    incoming input. If there was prior activity, we maintain it.

    These cells will represent the object and learn distal connections to
    lateral cortical columns.

    Parameters:
    ----------------------------
    @param  feedforwardInput (set)
            Indices of active input bits

    @param  lateralInput (set)
            Indices of active cells from neighboring columns.
    """
    # If there are no previously active cells, select random subset of cells
    if len(self.activeCells) == 0:
      self.activeCells = set(self._random.shuffle(
            numpy.array(range(self.numberOfCells()),
                        dtype="uint32"))[0:self.numActiveColumnsPerInhArea])

    # else we maintain previous activity, nothing to do.

    # Those cells that remain active will learn on their proximal and distal
    # dendrites as long as there is some input.  If there are no
    # cells active, no learning happens.  This only happens in the very
    # beginning if there has been no bottom up activity at all.
    if len(self.activeCells) > 0:

      # Learn on proximal dendrite if appropriate
      if len(feedforwardInput) > 0:
        self._learnProximal(feedforwardInput, self.activeCells,
                            self.maxNewProximalSynapseCount,
                            self.proximalPermanences,
                            self.proximalConnections,
                            self.initialProximalPermanence,
                            self.synPermProximalInc, self.synPermProximalDec,
                            self.connectedPermanence)

      # Learn on distal dendrites if appropriate
      self.tm.activateCells(
        activeColumns=sorted(self.activeCells),
        reinforceCandidatesExternalBasal=sorted(reinforceCandidatesExternal),
        growthCandidatesExternalBasal=sorted(growthCandidatesExternal),
        learn=True)


  def _activateCellsInferenceMode(self, feedforwardInput):
    """
    Inference mode: if there is some feedforward activity, perform
    spatial pooling on it to recognize previously known objects. If there
    is no feedforward activity, maintain previous activity.

    Parameters:
    ----------------------------
    @param  feedforwardInput (set)
            Indices of active input bits
    """
    # Figure out which cells are active due to feedforward proximal inputs
    # In order to form unions, we keep all cells that are over threshold
    inputVector = numpy.zeros(self.numberOfInputs(), dtype=realDType)
    inputVector[list(feedforwardInput)] = 1
    overlaps = numpy.zeros(self.numberOfColumns(), dtype=realDType)
    self.proximalConnections.rightVecSumAtNZ_fast(inputVector.astype(realDType),
                                                 overlaps)
    overlaps[overlaps < self.minThresholdProximal] = 0
    bottomUpActivity =  set(overlaps.nonzero()[0])

    # If there is insufficient current bottom up activity, we incorporate all
    # previous activity. We set their overlaps so they are sure to win.
    if len(bottomUpActivity) < self.numActiveColumnsPerInhArea:
      bottomUpActivity = bottomUpActivity.union(self.activeCells)
      maxOverlap = overlaps.max()
      overlaps[self.getActiveCells()] = maxOverlap+1

    # Narrow down list of active cells based on lateral activity
    self.activeCells = self._winnersBasedOnLateralActivity(
      bottomUpActivity,
      self.getPredictiveCells(),
      overlaps,
      self.numActiveColumnsPerInhArea
    )

    # Update the active cells in the TM. Without learning and without internal
    # basal connections, this has no effect on column pooler output.
    self.tm.activateCells(activeColumns=sorted(self.activeCells),
                          learn=False)


  def numberOfInputs(self):
    """
    Returns the number of inputs into this layer
    """
    return self.inputWidth


  def numberOfColumns(self):
    """
    Returns the number of columns in this layer.
    @return (int) Number of columns
    """
    return self.tm.numberOfColumns()


  def numberOfCells(self):
    """
    Returns the number of cells in this layer.
    @return (int) Number of cells
    """
    return self.tm.numberOfCells()


  def getActiveCells(self):
    """
    Returns the indices of the active cells.
    @return (list) Indices of active cells.
    """
    return list(self.activeCells)


  def numberOfConnectedSynapses(self, cells=None):
    """
    Returns the number of proximal connected synapses on these cells.

    Parameters:
    ----------------------------
    @param  cells (set or list)
            Indices of the cells. If None return count for all cells.
    """
    if cells is None:
      cells = xrange(self.numberOfCells())
    n = 0
    for cell in cells:
      n += self.proximalConnections.nNonZerosOnRow(cell)
    return n


  def numberOfSynapses(self, cells=None):
    """
    Returns the number of proximal synapses with permanence>0 on these cells.

    Parameters:
    ----------------------------
    @param  cells (set or list)
            Indices of the cells. If None return count for all cells.
    """
    if cells is None:
      cells = xrange(self.numberOfCells())
    n = 0
    for cell in cells:
      n += self.proximalPermanences.nNonZerosOnRow(cell)
    return n


  def numberOfDistalSegments(self, cells):
    """
    Returns the total number of distal segments for these cells.

    Parameters:
    ----------------------------
    @param  cells (set or list)
            Indices of the cells
    """
    n = 0
    for cell in cells:
      n += self.tm.basalConnections.numSegments(cell)
    return n


  def numberOfDistalSynapses(self, cells):
    """
    Returns the total number of distal synapses for these cells.

    Parameters:
    ----------------------------
    @param  cells (set or list)
            Indices of the cells
    """
    n = 0
    for cell in cells:
      segments = self.tm.basalConnections.segmentsForCell(cell)
      for segment in segments:
        n += self.tm.basalConnections.numSynapses(segment)
    return n


  def reset(self):
    """
    Reset internal states. When learning this signifies we are to learn a
    unique new object.
    """
    self.activeCells = set()
    self.tm.reset()


  def getPredictiveCells(self):
    """
    Get the set of distally predictive cells as a set.

    @return (list) A list containing indices of the current distally predicted
    cells.
    """
    return self.tm.getPredictiveCells()


  def getPredictedActiveCells(self):
    """
    Get the set of cells that were predicted previously then became active

    @return (set) A set containing indices.
    """
    return self.tm.predictedActiveCellsIndices()


  def getConnections(self):
    """
    Get the Connections structure associated with our TM. Beware of using
    this as it is implementation specific and may change.

    @return (object) A Connections object
    """
    return self.tm.basalConnections


  def _learnProximal(self,
             activeInputs, activeCells, maxNewSynapseCount, proximalPermanences,
             proximalConnections, initialPermanence, synPermProximalInc,
             synPermProximalDec, connectedPermanence):
    """
    Learn on proximal dendrites of active cells.  Updates proximalPermanences
    """
    for cell in activeCells:
      cellPermanencesDense = proximalPermanences.getRow(cell)
      cellNonZeroIndices, _ = proximalPermanences.rowNonZeros(cell)
      cellNonZeroIndices = set(cellNonZeroIndices)

      # Find the synapses that should be reinforced, punished, and grown.
      reinforce = list(activeInputs & cellNonZeroIndices)
      punish = list(cellNonZeroIndices - activeInputs)
      growthCandidates = activeInputs - cellNonZeroIndices
      newSynapseCount = min(len(growthCandidates), maxNewSynapseCount)
      grow = _sample(growthCandidates, newSynapseCount, self._random)

      # Make the changes.
      cellPermanencesDense[punish] -= synPermProximalDec
      cellPermanencesDense[reinforce] += synPermProximalInc
      cellPermanencesDense[grow] = initialPermanence

      # Update proximalPermanences and proximalConnections.
      proximalPermanences.setRowFromDense(cell, cellPermanencesDense)
      newConnected = numpy.where(cellPermanencesDense >= connectedPermanence)[0]
      proximalConnections.replaceSparseRow(cell, newConnected)


  def _winnersBasedOnLateralActivity(self,
                                     activeCells,
                                     predictiveCells,
                                     overlaps,
                                     targetActiveCells):
    """
    Given the set of cells active due to feedforward input, narrow down the
    list of active cells based on predictions due to previous lateralInput.

    Parameters:
    ----------------------------
    @param    activeCells           (set)
              Indices of cells activated by bottom-up input.

    @param    predictiveCells       (set)
              Indices of cells that are laterally predicted.

    @param    overlaps              (numpy array)
              Bottom up overlap scores for each proximal segment. This is used
              to select additional cells if the narrowed down list contains less
              than targetActiveCells.

    @param    targetActiveCells     (int)
              The number of active cells we want to have active.
    @return (set) List of new winner cell indices
    """
    # No TM accessors that return set so access internal member directly
    predictedActiveCells = activeCells.intersection(predictiveCells)

    # If predicted cells don't intersect at all with active cells, we go with
    # bottom up input. In these cases we can stick with existing active cells
    # and skip the overlap sorting
    if len(predictedActiveCells) == 0:
      predictedActiveCells = activeCells

    # We want to keep all cells that were predicted and currently active due to
    # feedforward input. This set could be larger than our target number of
    # active cells due to unions, which is ok. However if there are insufficient
    # cells active after this intersection, we fill in with those currently
    # active cells that have highest overlap.
    elif len(predictedActiveCells) < targetActiveCells:
      # Don't want to consider cells already chosen
      overlaps[list(predictedActiveCells)] = 0

      # Add in the desired number of cells with highest activity
      numActive = targetActiveCells - len(predictedActiveCells)
      winnerIndices = numpy.argsort(overlaps, kind='mergesort')
      sortedWinnerIndices = winnerIndices[-numActive:][::-1]
      predictedActiveCells = predictedActiveCells.union(set(sortedWinnerIndices))

    return predictedActiveCells