def testShuffleEmpty(self): r = Random(42) arr = numpy.zeros([0], dtype="uint32") r.shuffle(arr) self.assertEqual(arr.size, 0)
def testShuffle(self): r = Random(42) arr = numpy.array([1, 2, 3, 4], dtype="uint32") r.shuffle(arr) self.assertEqual(arr[0], 3) self.assertEqual(arr[1], 4) self.assertEqual(arr[2], 2) self.assertEqual(arr[3], 1)
def testShuffle(self): r = Random(42) arr = numpy.array([1, 2, 3, 4], dtype="uint32") r.shuffle(arr) self.assertEqual(arr[0], 2) self.assertEqual(arr[1], 1) self.assertEqual(arr[2], 4) self.assertEqual(arr[3], 3)
class PatternMachine(object): """ Base pattern machine class. """ def __init__(self, n, w, num=100, seed=42): """ @param n (int) Number of available bits in pattern @param w (int/list) Number of on bits in pattern If list, each pattern will have a `w` randomly selected from the list. @param num (int) Number of available patterns """ # Save member variables self._n = n self._w = w self._num = num # Initialize member variables self._random = Random(seed) self._patterns = dict() self._generate() def get(self, number): """ Return a pattern for a number. @param number (int) Number of pattern @return (set) Indices of on bits """ if not number in self._patterns: raise IndexError("Invalid number") return self._patterns[number] def addNoise(self, bits, amount): """ Add noise to pattern. @param bits (set) Indices of on bits @param amount (float) Probability of switching an on bit with a random bit @return (set) Indices of on bits in noisy pattern """ newBits = set() for bit in bits: if self._random.getReal64() < amount: newBits.add(self._random.getUInt32(self._n)) else: newBits.add(bit) return newBits def numbersForBit(self, bit): """ Return the set of pattern numbers that match a bit. @param bit (int) Index of bit @return (set) Indices of numbers """ if bit >= self._n: raise IndexError("Invalid bit") numbers = set() for index, pattern in self._patterns.items(): if bit in pattern: numbers.add(index) return numbers def numberMapForBits(self, bits): """ Return a map from number to matching on bits, for all numbers that match a set of bits. @param bits (set) Indices of bits @return (dict) Mapping from number => on bits. """ numberMap = dict() for bit in bits: numbers = self.numbersForBit(bit) for number in numbers: if not number in numberMap: numberMap[number] = set() numberMap[number].add(bit) return numberMap def prettyPrintPattern(self, bits, verbosity=1): """ Pretty print a pattern. @param bits (set) Indices of on bits @param verbosity (int) Verbosity level @return (string) Pretty-printed text """ numberMap = self.numberMapForBits(bits) text = "" numberList = [] numberItems = sorted(iter(numberMap.items()), key=lambda number_bits: len(number_bits[1]), reverse=True) for number, bits in numberItems: if verbosity > 2: strBits = [str(n) for n in bits] numberText = "{0} (bits: {1})".format(number, ",".join(strBits)) elif verbosity > 1: numberText = "{0} ({1} bits)".format(number, len(bits)) else: numberText = str(number) numberList.append(numberText) text += "[{0}]".format(", ".join(numberList)) return text def _generate(self): """ Generates set of random patterns. """ candidates = np.array(list(range(self._n)), np.uint32) for i in range(self._num): self._random.shuffle(candidates) pattern = candidates[0:self._getW()] self._patterns[i] = set(pattern) def _getW(self): """ Gets a value of `w` for use in generating a pattern. """ w = self._w if type(w) is list: return w[self._random.getUInt32(len(w))] else: return w
class RandomDistributedScalarEncoder(Encoder): """ A scalar encoder encodes a numeric (floating point) value into an array of bits. This class maps a scalar value into a random distributed representation that is suitable as scalar input into the spatial pooler. The encoding scheme is designed to replace a simple ScalarEncoder. It preserves the important properties around overlapping representations. Unlike ScalarEncoder the min and max range can be dynamically increased without any negative effects. The only required parameter is resolution, which determines the resolution of input values. Scalar values are mapped to a bucket. The class maintains a random distributed encoding for each bucket. The following properties are maintained by RandomDistributedEncoder: 1) Similar scalars should have high overlap. Overlap should decrease smoothly as scalars become less similar. Specifically, neighboring bucket indices must overlap by a linearly decreasing number of bits. 2) Dissimilar scalars should have very low overlap so that the SP does not confuse representations. Specifically, buckets that are more than w indices apart should have at most maxOverlap bits of overlap. We arbitrarily (and safely) define "very low" to be 2 bits of overlap or lower. Properties 1 and 2 lead to the following overlap rules for buckets i and j: If abs(i-j) < w then: overlap(i,j) = w - abs(i-j) else: overlap(i,j) <= maxOverlap 3) The representation for a scalar must not change during the lifetime of the object. Specifically, as new buckets are created and the min/max range is extended, the representation for previously in-range sscalars and previously created buckets must not change. """ def __init__(self, resolution, w=21, n=400, name=None, offset=None, seed=42, verbosity=0): """Constructor @param resolution A floating point positive number denoting the resolution of the output representation. Numbers within [offset-resolution/2, offset+resolution/2] will fall into the same bucket and thus have an identical representation. Adjacent buckets will differ in one bit. resolution is a required parameter. @param w Number of bits to set in output. w must be odd to avoid centering problems. w must be large enough that spatial pooler columns will have a sufficiently large overlap to avoid false matches. A value of w=21 is typical. @param n Number of bits in the representation (must be > w). n must be large enough such that there is enough room to select new representations as the range grows. With w=21 a value of n=400 is typical. The class enforces n > 6*w. @param name An optional string which will become part of the description. @param offset A floating point offset used to map scalar inputs to bucket indices. The middle bucket will correspond to numbers in the range [offset - resolution/2, offset + resolution/2). If set to None, the very first input that is encoded will be used to determine the offset. @param seed The seed used for numpy's random number generator. If set to -1 the generator will be initialized without a fixed seed. @param verbosity An integer controlling the level of debugging output. A value of 0 implies no output. verbosity=1 may lead to one-time printouts during construction, serialization or deserialization. verbosity=2 may lead to some output per encode operation. verbosity>2 may lead to significantly more output. """ # Validate inputs if (w <= 0) or (w%2 == 0): raise ValueError("w must be an odd positive integer") if resolution <= 0: raise ValueError("resolution must be a positive number") if (n <= 6*w) or (not isinstance(n, int)): raise ValueError("n must be an int strictly greater than 6*w. For " "good results we recommend n be strictly greater " "than 11*w") self.encoders = None self.verbosity = verbosity self.w = w self.n = n self.resolution = float(resolution) # The largest overlap we allow for non-adjacent encodings self._maxOverlap = 2 # initialize the random number generators self._seed(seed) # Internal parameters for bucket mapping self.minIndex = None self.maxIndex = None self._offset = None self._initializeBucketMap(INITIAL_BUCKETS, offset) # A name used for debug printouts if name is not None: self.name = name else: self.name = "[%s]" % (self.resolution) if self.verbosity > 0: self.dump() def __setstate__(self, state): self.__dict__.update(state) # Initialize self.random as an instance of NupicRandom derived from the # previous numpy random state randomState = state["random"] if isinstance(randomState, numpy.random.mtrand.RandomState): self.random = NupicRandom(randomState.randint(sys.maxint)) def _seed(self, seed=-1): """ Initialize the random seed """ if seed != -1: self.random = NupicRandom(seed) else: self.random = NupicRandom() def getDecoderOutputFieldTypes(self): """ See method description in base.py """ return (FieldMetaType.float, ) def getWidth(self): """ See method description in base.py """ return self.n def getDescription(self): return [(self.name, 0)] def getBucketIndices(self, x): """ See method description in base.py """ if ((isinstance(x, float) and math.isnan(x)) or x == SENTINEL_VALUE_FOR_MISSING_DATA): return [None] if self._offset is None: self._offset = x bucketIdx = ( (self._maxBuckets/2) + int(round((x - self._offset) / self.resolution)) ) if bucketIdx < 0: bucketIdx = 0 elif bucketIdx >= self._maxBuckets: bucketIdx = self._maxBuckets-1 return [bucketIdx] def mapBucketIndexToNonZeroBits(self, index): """ Given a bucket index, return the list of non-zero bits. If the bucket index does not exist, it is created. If the index falls outside our range we clip it. @param index The bucket index to get non-zero bits for. @returns numpy array of indices of non-zero bits for specified index. """ if index < 0: index = 0 if index >= self._maxBuckets: index = self._maxBuckets-1 if not self.bucketMap.has_key(index): if self.verbosity >= 2: print "Adding additional buckets to handle index=", index self._createBucket(index) return self.bucketMap[index] def encodeIntoArray(self, x, output): """ See method description in base.py """ if x is not None and not isinstance(x, numbers.Number): raise TypeError( "Expected a scalar input but got input of type %s" % type(x)) # Get the bucket index to use bucketIdx = self.getBucketIndices(x)[0] # None is returned for missing value in which case we return all 0's. output[0:self.n] = 0 if bucketIdx is not None: output[self.mapBucketIndexToNonZeroBits(bucketIdx)] = 1 def _createBucket(self, index): """ Create the given bucket index. Recursively create as many in-between bucket indices as necessary. """ if index < self.minIndex: if index == self.minIndex - 1: # Create a new representation that has exactly w-1 overlapping bits # as the min representation self.bucketMap[index] = self._newRepresentation(self.minIndex, index) self.minIndex = index else: # Recursively create all the indices above and then this index self._createBucket(index+1) self._createBucket(index) else: if index == self.maxIndex + 1: # Create a new representation that has exactly w-1 overlapping bits # as the max representation self.bucketMap[index] = self._newRepresentation(self.maxIndex, index) self.maxIndex = index else: # Recursively create all the indices below and then this index self._createBucket(index-1) self._createBucket(index) def _newRepresentation(self, index, newIndex): """ Return a new representation for newIndex that overlaps with the representation at index by exactly w-1 bits """ newRepresentation = self.bucketMap[index].copy() # Choose the bit we will replace in this representation. We need to shift # this bit deterministically. If this is always chosen randomly then there # is a 1 in w chance of the same bit being replaced in neighboring # representations, which is fairly high ri = newIndex % self.w # Now we choose a bit such that the overlap rules are satisfied. newBit = self.random.getUInt32(self.n) newRepresentation[ri] = newBit while newBit in self.bucketMap[index] or \ not self._newRepresentationOK(newRepresentation, newIndex): self.numTries += 1 newBit = self.random.getUInt32(self.n) newRepresentation[ri] = newBit return newRepresentation def _newRepresentationOK(self, newRep, newIndex): """ Return True if this new candidate representation satisfies all our overlap rules. Since we know that neighboring representations differ by at most one bit, we compute running overlaps. """ if newRep.size != self.w: return False if (newIndex < self.minIndex-1) or (newIndex > self.maxIndex+1): raise ValueError("newIndex must be within one of existing indices") # A binary representation of newRep. We will use this to test containment newRepBinary = numpy.array([False]*self.n) newRepBinary[newRep] = True # Midpoint midIdx = self._maxBuckets/2 # Start by checking the overlap at minIndex runningOverlap = self._countOverlap(self.bucketMap[self.minIndex], newRep) if not self._overlapOK(self.minIndex, newIndex, overlap=runningOverlap): return False # Compute running overlaps all the way to the midpoint for i in range(self.minIndex+1, midIdx+1): # This is the bit that is going to change newBit = (i-1)%self.w # Update our running overlap if newRepBinary[self.bucketMap[i-1][newBit]]: runningOverlap -= 1 if newRepBinary[self.bucketMap[i][newBit]]: runningOverlap += 1 # Verify our rules if not self._overlapOK(i, newIndex, overlap=runningOverlap): return False # At this point, runningOverlap contains the overlap for midIdx # Compute running overlaps all the way to maxIndex for i in range(midIdx+1, self.maxIndex+1): # This is the bit that is going to change newBit = i%self.w # Update our running overlap if newRepBinary[self.bucketMap[i-1][newBit]]: runningOverlap -= 1 if newRepBinary[self.bucketMap[i][newBit]]: runningOverlap += 1 # Verify our rules if not self._overlapOK(i, newIndex, overlap=runningOverlap): return False return True def _countOverlapIndices(self, i, j): """ Return the overlap between bucket indices i and j """ if self.bucketMap.has_key(i) and self.bucketMap.has_key(j): iRep = self.bucketMap[i] jRep = self.bucketMap[j] return self._countOverlap(iRep, jRep) else: raise ValueError("Either i or j don't exist") @staticmethod def _countOverlap(rep1, rep2): """ Return the overlap between two representations. rep1 and rep2 are lists of non-zero indices. """ overlap = 0 for e in rep1: if e in rep2: overlap += 1 return overlap def _overlapOK(self, i, j, overlap=None): """ Return True if the given overlap between bucket indices i and j are acceptable. If overlap is not specified, calculate it from the bucketMap """ if overlap is None: overlap = self._countOverlapIndices(i, j) if abs(i-j) < self.w: if overlap == (self.w - abs(i-j)): return True else: return False else: if overlap <= self._maxOverlap: return True else: return False def _initializeBucketMap(self, maxBuckets, offset): """ Initialize the bucket map assuming the given number of maxBuckets. """ # The first bucket index will be _maxBuckets / 2 and bucket indices will be # allowed to grow lower or higher as long as they don't become negative. # _maxBuckets is required because the current CLA Classifier assumes bucket # indices must be non-negative. This normally does not need to be changed # but if altered, should be set to an even number. self._maxBuckets = maxBuckets self.minIndex = self._maxBuckets / 2 self.maxIndex = self._maxBuckets / 2 # The scalar offset used to map scalar values to bucket indices. The middle # bucket will correspond to numbers in the range # [offset-resolution/2, offset+resolution/2). # The bucket index for a number x will be: # maxBuckets/2 + int( round( (x-offset)/resolution ) ) self._offset = offset # This dictionary maps a bucket index into its bit representation # We initialize the class with a single bucket with index 0 self.bucketMap = {} def _permutation(n): r = numpy.arange(n, dtype=numpy.uint32) self.random.shuffle(r) return r self.bucketMap[self.minIndex] = _permutation(self.n)[0:self.w] # How often we need to retry when generating valid encodings self.numTries = 0 def dump(self): print "RandomDistributedScalarEncoder:" print " minIndex: %d" % self.minIndex print " maxIndex: %d" % self.maxIndex print " w: %d" % self.w print " n: %d" % self.getWidth() print " resolution: %g" % self.resolution print " offset: %s" % str(self._offset) print " numTries: %d" % self.numTries print " name: %s" % self.name if self.verbosity > 2: print " All buckets: " pprint.pprint(self.bucketMap) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.resolution = proto.resolution encoder.w = proto.w encoder.n = proto.n encoder.name = proto.name encoder._offset = proto.offset encoder.random = NupicRandom() encoder.random.read(proto.random) encoder.resolution = proto.resolution encoder.verbosity = proto.verbosity encoder.minIndex = proto.minIndex encoder.maxIndex = proto.maxIndex encoder.encoders = None encoder._maxBuckets = INITIAL_BUCKETS encoder.bucketMap = {x.key: numpy.array(x.value, dtype=numpy.uint32) for x in proto.bucketMap} return encoder def write(self, proto): proto.resolution = self.resolution proto.w = self.w proto.n = self.n proto.name = self.name proto.offset = self._offset self.random.write(proto.random) proto.verbosity = self.verbosity proto.minIndex = self.minIndex proto.maxIndex = self.maxIndex proto.bucketMap = [{"key": key, "value": value.tolist()} for key, value in self.bucketMap.items()]
class PatternMachine(object): """ Base pattern machine class. """ def __init__(self, n, w, num=100, seed=42): """ @param n (int) Number of available bits in pattern @param w (int/list) Number of on bits in pattern If list, each pattern will have a `w` randomly selected from the list. @param num (int) Number of available patterns """ # Save member variables self._n = n self._w = w self._num = num # Initialize member variables self._random = Random(seed) self._patterns = dict() self._generate() def get(self, number): """ Return a pattern for a number. @param number (int) Number of pattern @return (set) Indices of on bits """ if not number in self._patterns: raise IndexError("Invalid number") return self._patterns[number] def addNoise(self, bits, amount): """ Add noise to pattern. @param bits (set) Indices of on bits @param amount (float) Probability of switching an on bit with a random bit @return (set) Indices of on bits in noisy pattern """ newBits = set() for bit in bits: if self._random.getReal64() < amount: newBits.add(self._random.getUInt32(self._n)) else: newBits.add(bit) return newBits def numbersForBit(self, bit): """ Return the set of pattern numbers that match a bit. @param bit (int) Index of bit @return (set) Indices of numbers """ if bit >= self._n: raise IndexError("Invalid bit") numbers = set() for index, pattern in self._patterns.iteritems(): if bit in pattern: numbers.add(index) return numbers def numberMapForBits(self, bits): """ Return a map from number to matching on bits, for all numbers that match a set of bits. @param bits (set) Indices of bits @return (dict) Mapping from number => on bits. """ numberMap = dict() for bit in bits: numbers = self.numbersForBit(bit) for number in numbers: if not number in numberMap: numberMap[number] = set() numberMap[number].add(bit) return numberMap def prettyPrintPattern(self, bits, verbosity=1): """ Pretty print a pattern. @param bits (set) Indices of on bits @param verbosity (int) Verbosity level @return (string) Pretty-printed text """ numberMap = self.numberMapForBits(bits) text = "" numberList = [] numberItems = sorted(numberMap.iteritems(), key=lambda (number, bits): len(bits), reverse=True) for number, bits in numberItems: if verbosity > 2: strBits = [str(n) for n in bits] numberText = "{0} (bits: {1})".format(number, ",".join(strBits)) elif verbosity > 1: numberText = "{0} ({1} bits)".format(number, len(bits)) else: numberText = str(number) numberList.append(numberText) text += "[{0}]".format(", ".join(numberList)) return text def _generate(self): """ Generates set of random patterns. """ candidates = np.array(range(self._n), np.uint32) for i in xrange(self._num): self._random.shuffle(candidates) pattern = candidates[0:self._getW()] self._patterns[i] = set(pattern) def _getW(self): """ Gets a value of `w` for use in generating a pattern. """ w = self._w if type(w) is list: return w[self._random.getUInt32(len(w))] else: return w
class SequenceMachine(object): """ Base sequence machine class. """ def __init__(self, patternMachine, seed=42): """ @param patternMachine (PatternMachine) Pattern machine instance """ # Save member variables self.patternMachine = patternMachine # Initialize member variables self._random = Random(seed) def generateFromNumbers(self, numbers): """ Generate a sequence from a list of numbers. Note: Any `None` in the list of numbers is considered a reset. @param numbers (list) List of numbers @return (list) Generated sequence """ sequence = [] for number in numbers: if number is None: sequence.append(number) else: pattern = self.patternMachine.get(number) sequence.append(pattern) return sequence def addSpatialNoise(self, sequence, amount): """ Add spatial noise to each pattern in the sequence. @param sequence (list) Sequence @param amount (float) Amount of spatial noise @return (list) Sequence with spatial noise """ newSequence = [] for pattern in sequence: if pattern is not None: pattern = self.patternMachine.addNoise(pattern, amount) newSequence.append(pattern) return newSequence def prettyPrintSequence(self, sequence, verbosity=1): """ Pretty print a sequence. @param sequence (list) Sequence @param verbosity (int) Verbosity level @return (string) Pretty-printed text """ text = "" for i in xrange(len(sequence)): pattern = sequence[i] if pattern is None: text += "<reset>" if i < len(sequence) - 1: text += "\n" else: text += self.patternMachine.prettyPrintPattern(pattern, verbosity=verbosity) return text def generateNumbers(self, numSequences, sequenceLength, sharedRange=None): """ @param numSequences (int) Number of sequences to return, separated by None @param sequenceLength (int) Length of each sequence @param sharedRange (tuple) (start index, end index) indicating range of shared subsequence in each sequence (None if no shared subsequences) @return (list) Numbers representing sequences """ numbers = [] if sharedRange: sharedStart, sharedEnd = sharedRange sharedLength = sharedEnd - sharedStart sharedNumbers = range(numSequences * sequenceLength, numSequences * sequenceLength + sharedLength) for i in xrange(numSequences): start = i * sequenceLength newNumbers = np.array(range(start, start + sequenceLength), np.uint32) self._random.shuffle(newNumbers) newNumbers = list(newNumbers) if sharedRange is not None: newNumbers[sharedStart:sharedEnd] = sharedNumbers numbers += newNumbers numbers.append(None) return numbers
class RandomDistributedScalarEncoder(Encoder): """ A scalar encoder encodes a numeric (floating point) value into an array of bits. This class maps a scalar value into a random distributed representation that is suitable as scalar input into the spatial pooler. The encoding scheme is designed to replace a simple ScalarEncoder. It preserves the important properties around overlapping representations. Unlike ScalarEncoder the min and max range can be dynamically increased without any negative effects. The only required parameter is resolution, which determines the resolution of input values. Scalar values are mapped to a bucket. The class maintains a random distributed encoding for each bucket. The following properties are maintained by RandomDistributedEncoder: 1) Similar scalars should have high overlap. Overlap should decrease smoothly as scalars become less similar. Specifically, neighboring bucket indices must overlap by a linearly decreasing number of bits. 2) Dissimilar scalars should have very low overlap so that the SP does not confuse representations. Specifically, buckets that are more than w indices apart should have at most maxOverlap bits of overlap. We arbitrarily (and safely) define "very low" to be 2 bits of overlap or lower. Properties 1 and 2 lead to the following overlap rules for buckets i and j: If abs(i-j) < w then: overlap(i,j) = w - abs(i-j) else: overlap(i,j) <= maxOverlap 3) The representation for a scalar must not change during the lifetime of the object. Specifically, as new buckets are created and the min/max range is extended, the representation for previously in-range sscalars and previously created buckets must not change. """ def __init__(self, resolution, w=21, n=400, name=None, offset=None, seed=42, verbosity=0): """Constructor @param resolution A floating point positive number denoting the resolution of the output representation. Numbers within [offset-resolution/2, offset+resolution/2] will fall into the same bucket and thus have an identical representation. Adjacent buckets will differ in one bit. resolution is a required parameter. @param w Number of bits to set in output. w must be odd to avoid centering problems. w must be large enough that spatial pooler columns will have a sufficiently large overlap to avoid false matches. A value of w=21 is typical. @param n Number of bits in the representation (must be > w). n must be large enough such that there is enough room to select new representations as the range grows. With w=21 a value of n=400 is typical. The class enforces n > 6*w. @param name An optional string which will become part of the description. @param offset A floating point offset used to map scalar inputs to bucket indices. The middle bucket will correspond to numbers in the range [offset - resolution/2, offset + resolution/2). If set to None, the very first input that is encoded will be used to determine the offset. @param seed The seed used for numpy's random number generator. If set to -1 the generator will be initialized without a fixed seed. @param verbosity An integer controlling the level of debugging output. A value of 0 implies no output. verbosity=1 may lead to one-time printouts during construction, serialization or deserialization. verbosity=2 may lead to some output per encode operation. verbosity>2 may lead to significantly more output. """ # Validate inputs if (w <= 0) or (w % 2 == 0): raise ValueError("w must be an odd positive integer") if resolution <= 0: raise ValueError("resolution must be a positive number") if (n <= 6 * w) or (not isinstance(n, int)): raise ValueError("n must be an int strictly greater than 6*w. For " "good results we recommend n be strictly greater " "than 11*w") self.encoders = None self.verbosity = verbosity self.w = w self.n = n self.resolution = float(resolution) # The largest overlap we allow for non-adjacent encodings self._maxOverlap = 2 # initialize the random number generators self._seed(seed) # Internal parameters for bucket mapping self.minIndex = None self.maxIndex = None self._offset = None self._initializeBucketMap(INITIAL_BUCKETS, offset) # A name used for debug printouts if name is not None: self.name = name else: self.name = "[%s]" % (self.resolution) if self.verbosity > 0: self.dump() def __setstate__(self, state): self.__dict__.update(state) # Initialize self.random as an instance of NupicRandom derived from the # previous numpy random state randomState = state["random"] if isinstance(randomState, numpy.random.mtrand.RandomState): self.random = NupicRandom(randomState.randint(sys.maxint)) def _seed(self, seed=-1): """ Initialize the random seed """ if seed != -1: self.random = NupicRandom(seed) else: self.random = NupicRandom() def getDecoderOutputFieldTypes(self): """ See method description in base.py """ return (FieldMetaType.float, ) def getWidth(self): """ See method description in base.py """ return self.n def getDescription(self): return [(self.name, 0)] def getBucketIndices(self, x): """ See method description in base.py """ if ((isinstance(x, float) and math.isnan(x)) or x == SENTINEL_VALUE_FOR_MISSING_DATA): return [None] if self._offset is None: self._offset = x bucketIdx = ((self._maxBuckets / 2) + int(round((x - self._offset) / self.resolution))) if bucketIdx < 0: bucketIdx = 0 elif bucketIdx >= self._maxBuckets: bucketIdx = self._maxBuckets - 1 return [bucketIdx] def mapBucketIndexToNonZeroBits(self, index): """ Given a bucket index, return the list of non-zero bits. If the bucket index does not exist, it is created. If the index falls outside our range we clip it. """ if index < 0: index = 0 if index >= self._maxBuckets: index = self._maxBuckets - 1 if not self.bucketMap.has_key(index): if self.verbosity >= 2: print "Adding additional buckets to handle index=", index self._createBucket(index) return self.bucketMap[index] def encodeIntoArray(self, x, output): """ See method description in base.py """ if x is not None and not isinstance(x, numbers.Number): raise TypeError( "Expected a scalar input but got input of type %s" % type(x)) # Get the bucket index to use bucketIdx = self.getBucketIndices(x)[0] # None is returned for missing value in which case we return all 0's. output[0:self.n] = 0 if bucketIdx is not None: output[self.mapBucketIndexToNonZeroBits(bucketIdx)] = 1 def _createBucket(self, index): """ Create the given bucket index. Recursively create as many in-between bucket indices as necessary. """ if index < self.minIndex: if index == self.minIndex - 1: # Create a new representation that has exactly w-1 overlapping bits # as the min representation self.bucketMap[index] = self._newRepresentation( self.minIndex, index) self.minIndex = index else: # Recursively create all the indices above and then this index self._createBucket(index + 1) self._createBucket(index) else: if index == self.maxIndex + 1: # Create a new representation that has exactly w-1 overlapping bits # as the max representation self.bucketMap[index] = self._newRepresentation( self.maxIndex, index) self.maxIndex = index else: # Recursively create all the indices below and then this index self._createBucket(index - 1) self._createBucket(index) def _newRepresentation(self, index, newIndex): """ Return a new representation for newIndex that overlaps with the representation at index by exactly w-1 bits """ newRepresentation = self.bucketMap[index].copy() # Choose the bit we will replace in this representation. We need to shift # this bit deterministically. If this is always chosen randomly then there # is a 1 in w chance of the same bit being replaced in neighboring # representations, which is fairly high ri = newIndex % self.w # Now we choose a bit such that the overlap rules are satisfied. newBit = self.random.getUInt32(self.n) newRepresentation[ri] = newBit while newBit in self.bucketMap[index] or \ not self._newRepresentationOK(newRepresentation, newIndex): self.numTries += 1 newBit = self.random.getUInt32(self.n) newRepresentation[ri] = newBit return newRepresentation def _newRepresentationOK(self, newRep, newIndex): """ Return True if this new candidate representation satisfies all our overlap rules. Since we know that neighboring representations differ by at most one bit, we compute running overlaps. """ if newRep.size != self.w: return False if (newIndex < self.minIndex - 1) or (newIndex > self.maxIndex + 1): raise ValueError("newIndex must be within one of existing indices") # A binary representation of newRep. We will use this to test containment newRepBinary = numpy.array([False] * self.n) newRepBinary[newRep] = True # Midpoint midIdx = self._maxBuckets / 2 # Start by checking the overlap at minIndex runningOverlap = self._countOverlap(self.bucketMap[self.minIndex], newRep) if not self._overlapOK(self.minIndex, newIndex, overlap=runningOverlap): return False # Compute running overlaps all the way to the midpoint for i in range(self.minIndex + 1, midIdx + 1): # This is the bit that is going to change newBit = (i - 1) % self.w # Update our running overlap if newRepBinary[self.bucketMap[i - 1][newBit]]: runningOverlap -= 1 if newRepBinary[self.bucketMap[i][newBit]]: runningOverlap += 1 # Verify our rules if not self._overlapOK(i, newIndex, overlap=runningOverlap): return False # At this point, runningOverlap contains the overlap for midIdx # Compute running overlaps all the way to maxIndex for i in range(midIdx + 1, self.maxIndex + 1): # This is the bit that is going to change newBit = i % self.w # Update our running overlap if newRepBinary[self.bucketMap[i - 1][newBit]]: runningOverlap -= 1 if newRepBinary[self.bucketMap[i][newBit]]: runningOverlap += 1 # Verify our rules if not self._overlapOK(i, newIndex, overlap=runningOverlap): return False return True def _countOverlapIndices(self, i, j): """ Return the overlap between bucket indices i and j """ if self.bucketMap.has_key(i) and self.bucketMap.has_key(j): iRep = self.bucketMap[i] jRep = self.bucketMap[j] return self._countOverlap(iRep, jRep) else: raise ValueError("Either i or j don't exist") @staticmethod def _countOverlap(rep1, rep2): """ Return the overlap between two representations. rep1 and rep2 are lists of non-zero indices. """ overlap = 0 for e in rep1: if e in rep2: overlap += 1 return overlap def _overlapOK(self, i, j, overlap=None): """ Return True if the given overlap between bucket indices i and j are acceptable. If overlap is not specified, calculate it from the bucketMap """ if overlap is None: overlap = self._countOverlapIndices(i, j) if abs(i - j) < self.w: if overlap == (self.w - abs(i - j)): return True else: return False else: if overlap <= self._maxOverlap: return True else: return False def _initializeBucketMap(self, maxBuckets, offset): """ Initialize the bucket map assuming the given number of maxBuckets. """ # The first bucket index will be _maxBuckets / 2 and bucket indices will be # allowed to grow lower or higher as long as they don't become negative. # _maxBuckets is required because the current CLA Classifier assumes bucket # indices must be non-negative. This normally does not need to be changed # but if altered, should be set to an even number. self._maxBuckets = maxBuckets self.minIndex = self._maxBuckets / 2 self.maxIndex = self._maxBuckets / 2 # The scalar offset used to map scalar values to bucket indices. The middle # bucket will correspond to numbers in the range # [offset-resolution/2, offset+resolution/2). # The bucket index for a number x will be: # maxBuckets/2 + int( round( (x-offset)/resolution ) ) self._offset = offset # This dictionary maps a bucket index into its bit representation # We initialize the class with a single bucket with index 0 self.bucketMap = {} def _permutation(n): r = numpy.arange(n, dtype=numpy.uint32) self.random.shuffle(r) return r self.bucketMap[self.minIndex] = _permutation(self.n)[0:self.w] # How often we need to retry when generating valid encodings self.numTries = 0 def dump(self): print "RandomDistributedScalarEncoder:" print " minIndex: %d" % self.minIndex print " maxIndex: %d" % self.maxIndex print " w: %d" % self.w print " n: %d" % self.getWidth() print " resolution: %g" % self.resolution print " offset: %s" % str(self._offset) print " numTries: %d" % self.numTries print " name: %s" % self.name if self.verbosity > 2: print " All buckets: " pprint.pprint(self.bucketMap) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.resolution = proto.resolution encoder.w = proto.w encoder.n = proto.n encoder.name = proto.name encoder._offset = proto.offset encoder.random = NupicRandom() encoder.random.read(proto.random) encoder.resolution = proto.resolution encoder.verbosity = proto.verbosity encoder.minIndex = proto.minIndex encoder.maxIndex = proto.maxIndex encoder.encoders = None encoder._maxBuckets = INITIAL_BUCKETS encoder.bucketMap = { x.key: numpy.array(x.value, dtype=numpy.uint32) for x in proto.bucketMap } return encoder def write(self, proto): proto.resolution = self.resolution proto.w = self.w proto.n = self.n proto.name = self.name proto.offset = self._offset self.random.write(proto.random) proto.verbosity = self.verbosity proto.minIndex = self.minIndex proto.maxIndex = self.maxIndex proto.bucketMap = [{ "key": key, "value": value.tolist() } for key, value in self.bucketMap.items()]
class SequenceMachine(object): """ Base sequence machine class. """ def __init__(self, patternMachine, seed=42): """ @param patternMachine (PatternMachine) Pattern machine instance """ # Save member variables self.patternMachine = patternMachine # Initialize member variables self._random = Random(seed) def generateFromNumbers(self, numbers): """ Generate a sequence from a list of numbers. Note: Any `None` in the list of numbers is considered a reset. @param numbers (list) List of numbers @return (list) Generated sequence """ sequence = [] for number in numbers: if number == None: sequence.append(number) else: pattern = self.patternMachine.get(number) sequence.append(pattern) return sequence def addSpatialNoise(self, sequence, amount): """ Add spatial noise to each pattern in the sequence. @param sequence (list) Sequence @param amount (float) Amount of spatial noise @return (list) Sequence with spatial noise """ newSequence = [] for pattern in sequence: if pattern is not None: pattern = self.patternMachine.addNoise(pattern, amount) newSequence.append(pattern) return newSequence def prettyPrintSequence(self, sequence, verbosity=1): """ Pretty print a sequence. @param sequence (list) Sequence @param verbosity (int) Verbosity level @return (string) Pretty-printed text """ text = "" for i in range(len(sequence)): pattern = sequence[i] if pattern == None: text += "<reset>" if i < len(sequence) - 1: text += "\n" else: text += self.patternMachine.prettyPrintPattern(pattern, verbosity=verbosity) return text def generateNumbers(self, numSequences, sequenceLength, sharedRange=None): """ @param numSequences (int) Number of sequences to return, separated by None @param sequenceLength (int) Length of each sequence @param sharedRange (tuple) (start index, end index) indicating range of shared subsequence in each sequence (None if no shared subsequences) @return (list) Numbers representing sequences """ numbers = [] if sharedRange: sharedStart, sharedEnd = sharedRange sharedLength = sharedEnd - sharedStart sharedNumbers = list(range(numSequences * sequenceLength, numSequences * sequenceLength + sharedLength)) for i in range(numSequences): start = i * sequenceLength newNumbers = np.array(list(range(start, start + sequenceLength)), np.uint32) self._random.shuffle(newNumbers) newNumbers = list(newNumbers) if sharedRange is not None: newNumbers[sharedStart:sharedEnd] = sharedNumbers numbers += newNumbers numbers.append(None) return numbers
class ColumnPooler(object): """ This class constitutes a temporary implementation for a cross-column pooler. The implementation goal of this class is to prove basic properties before creating a cleaner implementation. """ def __init__(self, inputWidth, numActiveColumnsPerInhArea=40, synPermProximalInc=0.1, synPermProximalDec=0.001, initialProximalPermanence=0.6, columnDimensions=(2048,), activationThreshold=13, minThreshold=10, initialPermanence=0.41, connectedPermanence=0.50, maxNewSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, predictedSegmentDecrement=0.0, maxSegmentsPerCell=255, maxSynapsesPerSegment=255, seed=42): """ This classes uses an ExtendedTemporalMemory internally to keep track of distal segments. Please see ExtendedTemporalMemory for descriptions of constructor parameters not defined below. Parameters: ---------------------------- @param inputWidth (int) The number of proximal inputs into this layer @param numActiveColumnsPerInhArea (int) Target number of active cells @param synPermProximalInc (float) Permanence increment for proximal synapses @param synPermProximalDec (float) Permanence decrement for proximal synapses @param initialProximalPermanence (float) Initial permanence value for proximal segments """ self.inputWidth = inputWidth self.numActiveColumnsPerInhArea = numActiveColumnsPerInhArea self.synPermProximalInc = synPermProximalInc self.synPermProximalDec = synPermProximalDec self.initialProximalPermanence = initialProximalPermanence self.connectedPermanence = connectedPermanence self.maxNewSynapseCount = maxNewSynapseCount self.minThreshold = minThreshold self.activeCells = set() self._random = Random(seed) # Create our own instance of extended temporal memory to handle distal # segments. self.tm = createModel( modelName="extendedCPP", columnDimensions=columnDimensions, cellsPerColumn=1, activationThreshold=activationThreshold, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThreshold, maxNewSynapseCount=maxNewSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment, seed=seed, learnOnOneCell=False, ) # These sparse matrices will hold the synapses for each proximal segment. # # proximalPermanences - SparseMatrix with permanence values # proximalConnections - SparseBinaryMatrix of connected synapses self.proximalPermanences = SparseMatrix(self.numberOfColumns(), inputWidth) self.proximalConnections = SparseBinaryMatrix(inputWidth) self.proximalConnections.resize(self.numberOfColumns(), inputWidth) def compute(self, feedforwardInput=None, activeExternalCells=None, learn=True): """ Parameters: ---------------------------- @param feedforwardInput (set) Indices of active input bits @param activeExternalCells (set) Indices of active cells that will form connections to distal segments. @param learn (bool) If True, we are learning a new object """ if activeExternalCells is None: activeExternalCells = set() if learn: self._computeLearningMode(feedforwardInput=feedforwardInput, lateralInput=activeExternalCells) else: self._computeInferenceMode(feedforwardInput=feedforwardInput, lateralInput=activeExternalCells) def _computeLearningMode(self, feedforwardInput, lateralInput): """ Learning mode: we are learning a new object. If there is no prior activity, we randomly activate 2% of cells and create connections to incoming input. If there was prior activity, we maintain it. These cells will represent the object and learn distal connections to lateral cortical columns. Parameters: ---------------------------- @param feedforwardInput (set) Indices of active input bits @param lateralInput (set) Indices of active cells from neighboring columns. """ # If there are no previously active cells, select random subset of cells if len(self.activeCells) == 0: self.activeCells = set(self._random.shuffle( numpy.array(range(self.numberOfCells()), dtype="uint32"))[0:self.numActiveColumnsPerInhArea]) # else we maintain previous activity, nothing to do. # Those cells that remain active will learn on their proximal and distal # dendrites as long as there is some input. If there are no # cells active, no learning happens. This only happens in the very # beginning if there has been no bottom up activity at all. if len(self.activeCells) > 0: # Learn on proximal dendrite if appropriate if len(feedforwardInput) > 0: self._learnProximal(feedforwardInput, self.activeCells, self.maxNewSynapseCount, self.proximalPermanences, self.proximalConnections, self.initialProximalPermanence, self.synPermProximalInc, self.synPermProximalDec, self.connectedPermanence) # Learn on distal dendrites if appropriate self.tm.compute(activeColumns=self.activeCells, activeExternalCells=lateralInput, formInternalConnections=False, learn=True) def _computeInferenceMode(self, feedforwardInput, lateralInput): """ Inference mode: if there is some feedforward activity, perform spatial pooling on it to recognize previously known objects. If there is no feedforward activity, maintain previous activity. Parameters: ---------------------------- @param feedforwardInput (set) Indices of active input bits @param lateralInput (list of lists) A list of list of active cells from neighboring columns. len(lateralInput) == number of connected neighboring cortical columns. """ # Figure out which cells are active due to feedforward proximal inputs # In order to form unions, we keep all cells that are over threshold inputVector = numpy.zeros(self.numberOfInputs(), dtype=realDType) inputVector[list(feedforwardInput)] = 1 overlaps = numpy.zeros(self.numberOfColumns(), dtype=realDType) self.proximalConnections.rightVecSumAtNZ_fast(inputVector.astype(realDType), overlaps) overlaps[overlaps < self.minThreshold] = 0 bottomUpActivity = set(overlaps.nonzero()[0]) # If there is insufficient current bottom up activity, we incorporate all # previous activity. We set their overlaps so they are sure to win. if len(bottomUpActivity) < self.numActiveColumnsPerInhArea: bottomUpActivity = bottomUpActivity.union(self.activeCells) maxOverlap = overlaps.max() overlaps[self.getActiveCells()] = maxOverlap+1 # Narrow down list of active cells based on lateral activity self.activeCells = self._winnersBasedOnLateralActivity( bottomUpActivity, self.getPredictiveCells(), overlaps, self.numActiveColumnsPerInhArea ) # Update predictive cells for next time step self.tm.compute(activeColumns=self.activeCells, activeExternalCells=lateralInput, formInternalConnections=False, learn=False) def numberOfInputs(self): """ Returns the number of inputs into this layer """ return self.inputWidth def numberOfColumns(self): """ Returns the number of columns in this layer. @return (int) Number of columns """ return self.tm.numberOfColumns() def numberOfCells(self): """ Returns the number of cells in this layer. @return (int) Number of cells """ return self.tm.numberOfCells() def getActiveCells(self): """ Returns the indices of the active cells. @return (set) Indices of active cells. """ return self.getCellIndices(self.activeCells) @classmethod def getCellIndices(cls, cells): return [cls.getCellIndex(c) for c in cells] @staticmethod def getCellIndex(cell): return cell def numberOfConnectedSynapses(self, cells=None): """ Returns the number of proximal connected synapses on these cells. Parameters: ---------------------------- @param cells (set or list) Indices of the cells. If None return count for all cells. """ if cells is None: cells = xrange(self.numberOfCells()) n = 0 for cell in cells: n += self.proximalConnections.nNonZerosOnRow(cell) return n def numberOfSynapses(self, cells=None): """ Returns the number of proximal synapses with permanence>0 on these cells. Parameters: ---------------------------- @param cells (set or list) Indices of the cells. If None return count for all cells. """ if cells is None: cells = xrange(self.numberOfCells()) n = 0 for cell in cells: n += self.proximalPermanences.nNonZerosOnRow(cell) return n def numberOfDistalSegments(self, cells): """ Returns the total number of distal segments for these cells. Parameters: ---------------------------- @param cells (set or list) Indices of the cells """ n = 0 for cell in cells: n += len(self.tm.connections.segmentsForCell(cell)) return n def numberOfDistalSynapses(self, cells): """ Returns the total number of distal synapses for these cells. Parameters: ---------------------------- @param cells (set or list) Indices of the cells """ n = 0 for cell in cells: segments = self.tm.connections.segmentsForCell(cell) for segment in segments: n += len(self.tm.connections.synapsesForSegment(segment)) return n def reset(self): """ Reset internal states. When learning this signifies we are to learn a unique new object. """ self.activeCells = set() self.tm.reset() def getPredictiveCells(self): """ Get the set of distally predictive cells as a set. @return (list) A list containing indices of the current distally predicted cells. """ return self.tm.getPredictiveCells() def getPredictedActiveCells(self): """ Get the set of cells that were predicted previously then became active @return (set) A set containing indices. """ return self.tm.predictedActiveCellsIndices() def getConnections(self): """ Get the Connections structure associated with our TM. Beware of using this as it is implementation specific and may change. @return (object) A Connections object """ return self.tm.connections def _learnProximal(self, activeInputs, activeCells, maxNewSynapseCount, proximalPermanences, proximalConnections, initialPermanence, synPermProximalInc, synPermProximalDec, connectedPermanence): """ Learn on proximal dendrites of active cells. Updates proximalPermanences """ for cell in activeCells: cellPermanencesDense = proximalPermanences.getRow(cell) cellNonZeroIndices, _ = proximalPermanences.rowNonZeros(cell) cellNonZeroIndices = list(cellNonZeroIndices) # Get new and existing connections for this segment newInputs, existingInputs = self._pickProximalInputsToLearnOn( maxNewSynapseCount, activeInputs, cellNonZeroIndices ) # Adjust existing connections appropriately # First we decrement all existing permanences if len(cellNonZeroIndices) > 0: cellPermanencesDense[cellNonZeroIndices] -= synPermProximalDec # Then we add inc + dec to existing active synapses if len(existingInputs) > 0: cellPermanencesDense[existingInputs] += synPermProximalInc + synPermProximalDec # Add new connections if len(newInputs) > 0: cellPermanencesDense[newInputs] += initialPermanence # Update proximalPermanences and proximalConnections proximalPermanences.setRowFromDense(cell, cellPermanencesDense) newConnected = numpy.where(cellPermanencesDense >= connectedPermanence)[0] proximalConnections.replaceSparseRow(cell, newConnected) def _pickProximalInputsToLearnOn(self, newSynapseCount, activeInputs, cellNonZeros): """ Pick inputs to form proximal connections to a particular cell. We just randomly subsample from activeInputs, regardless of whether they are already connected. We return a list of up to newSynapseCount input indices from activeInputs that are valid new connections for this cell. We also return a list containing all inputs in activeInputs that are already connected to this cell. Parameters: ---------------------------- @param newSynapseCount (int) Number of inputs to pick @param cell (int) Cell index @param activeInputs (set) Indices of active inputs @param cellNonZeros (list) Indices of inputs input this cell with non-zero permanences. @return (list, list) Indices of new inputs to connect to, inputs already connected """ candidates = [] alreadyConnected = [] # Collect inputs that already have synapses and list of new candidate inputs for inputIdx in activeInputs: if inputIdx in cellNonZeros: alreadyConnected += [inputIdx] else: candidates += [inputIdx] # Select min(newSynapseCount, len(candidates)) new inputs to connect to if newSynapseCount >= len(candidates): return candidates, alreadyConnected else: # Pick newSynapseCount cells randomly # TODO: we could maybe implement this more efficiently with shuffle. inputs = [] for _ in range(newSynapseCount): i = self._random.getUInt32(len(candidates)) inputs += [candidates[i]] candidates.remove(candidates[i]) return inputs, alreadyConnected def _winnersBasedOnLateralActivity(self, activeCells, predictiveCells, overlaps, targetActiveCells): """ Given the set of cells active due to feedforward input, narrow down the list of active cells based on predictions due to previous lateralInput. Parameters: ---------------------------- @param activeCells (set) Indices of cells activated by bottom-up input. @param predictiveCells (set) Indices of cells that are laterally predicted. @param overlaps (numpy array) Bottom up overlap scores for each proximal segment. This is used to select additional cells if the narrowed down list contains less than targetActiveCells. @param targetActiveCells (int) The number of active cells we want to have active. @return (set) List of new winner cell indices """ # No TM accessors that return set so access internal member directly predictedActiveCells = activeCells.intersection(predictiveCells) # If predicted cells don't intersect at all with active cells, we go with # bottom up input. In these cases we can stick with existing active cells # and skip the overlap sorting if len(predictedActiveCells) == 0: predictedActiveCells = activeCells # We want to keep all cells that were predicted and currently active due to # feedforward input. This set could be larger than our target number of # active cells due to unions, which is ok. However if there are insufficient # cells active after this intersection, we fill in with those currently # active cells that have highest overlap. elif len(predictedActiveCells) < targetActiveCells: # Don't want to consider cells already chosen overlaps[list(predictedActiveCells)] = 0 # Add in the desired number of cells with highest activity numActive = targetActiveCells - len(predictedActiveCells) winnerIndices = numpy.argsort(overlaps, kind='mergesort') sortedWinnerIndices = winnerIndices[-numActive:][::-1] predictedActiveCells = predictedActiveCells.union(set(sortedWinnerIndices)) return predictedActiveCells
class ColumnPooler(object): """ This class constitutes a temporary implementation for a cross-column pooler. The implementation goal of this class is to prove basic properties before creating a cleaner implementation. """ def __init__(self, inputWidth, lateralInputWidth, numActiveColumnsPerInhArea=40, synPermProximalInc=0.1, synPermProximalDec=0.001, initialProximalPermanence=0.6, columnDimensions=(2048,), minThresholdProximal=10, activationThresholdDistal=13, minThresholdDistal=10, initialPermanence=0.41, connectedPermanence=0.50, maxNewProximalSynapseCount=20, maxNewDistalSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, predictedSegmentDecrement=0.0, maxSegmentsPerCell=255, maxSynapsesPerProximalSegment=255, maxSynapsesPerDistalSegment=255, seed=42): """ This classes uses an ExtendedTemporalMemory internally to keep track of distal segments. Please see ExtendedTemporalMemory for descriptions of constructor parameters not defined below. Parameters: ---------------------------- @param inputWidth (int) The number of proximal inputs into this layer @param lateralInputWidth (int) The number of lateral inputs into this layer @param numActiveColumnsPerInhArea (int) Target number of active cells @param synPermProximalInc (float) Permanence increment for proximal synapses @param synPermProximalDec (float) Permanence decrement for proximal synapses @param initialProximalPermanence (float) Initial permanence value for proximal segments """ self.inputWidth = inputWidth self.lateralInputWidth = lateralInputWidth self.numActiveColumnsPerInhArea = numActiveColumnsPerInhArea self.synPermProximalInc = synPermProximalInc self.synPermProximalDec = synPermProximalDec self.initialProximalPermanence = initialProximalPermanence self.connectedPermanence = connectedPermanence self.maxNewProximalSynapseCount = maxNewProximalSynapseCount self.maxNewDistalSynapseCount = maxNewDistalSynapseCount self.minThresholdProximal = minThresholdProximal self.minThresholdDistal = minThresholdDistal self.maxSynapsesPerProximalSegment = maxSynapsesPerProximalSegment self.activeCells = set() self._random = Random(seed) # Create our own instance of extended temporal memory to handle distal # segments. self.tm = createModel( modelName="etm_cpp", columnDimensions=columnDimensions, basalInputDimensions=(lateralInputWidth,), apicalInputDimensions=(), cellsPerColumn=1, activationThreshold=activationThresholdDistal, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThresholdDistal, maxNewSynapseCount=maxNewDistalSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, formInternalBasalConnections=False, learnOnOneCell=False, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerDistalSegment, seed=seed, ) # These sparse matrices will hold the synapses for each proximal segment. # # proximalPermanences - SparseMatrix with permanence values # proximalConnections - SparseBinaryMatrix of connected synapses self.proximalPermanences = SparseMatrix(self.numberOfColumns(), inputWidth) self.proximalConnections = SparseBinaryMatrix(inputWidth) self.proximalConnections.resize(self.numberOfColumns(), inputWidth) def depolarizeCells(self, activeExternalCells, learn=True): """ Parameters: ---------------------------- @param activeExternalCells (set) Indices of active cells that will form connections to distal segments. @param learn (bool) If true, distal segment activations will be recorded. This information is used during segment cleanup. """ self.tm.depolarizeCells(activeCellsExternalBasal=activeExternalCells, learn=learn) def activateCells(self, feedforwardInput=(), reinforceCandidatesExternal=(), growthCandidatesExternal=(), learn=True): """ @param feedforwardInput (set) Indices of active input bits @param reinforceCandidatesExternal (set) Indices of active cells that will reinforce synapses to distal segments. @param growthCandidatesExternal (set) Indices of active cells that will grow synapses to distal segments. @param learn (bool) If True, we are learning a new object """ if learn: self._activateCellsLearningMode(feedforwardInput, reinforceCandidatesExternal, growthCandidatesExternal) else: self._activateCellsInferenceMode(feedforwardInput) def compute(self, feedforwardInput=(), lateralInput=(), learn=True): """ Runs one time step of the column pooler algorithm. This method assumes: - Lateral input should trigger predictions for this time step, i.e. for this feedforward input. - During learning, all lateral input is eligible for growth and reinforcement. If these are bad assumptions, use depolarizeCells and activateCells directly. @param feedforwardInput (set) Indices of active feedforward input bits @param lateralInput (set) Indices of active lateral input bits @param learn (bool) If True, we are learning a new object """ self.depolarizeCells(lateralInput, learn) self.activateCells(feedforwardInput, lateralInput, lateralInput, learn) def _activateCellsLearningMode(self, feedforwardInput, reinforceCandidatesExternal, growthCandidatesExternal): """ Learning mode: we are learning a new object. If there is no prior activity, we randomly activate 2% of cells and create connections to incoming input. If there was prior activity, we maintain it. These cells will represent the object and learn distal connections to lateral cortical columns. Parameters: ---------------------------- @param feedforwardInput (set) Indices of active input bits @param lateralInput (set) Indices of active cells from neighboring columns. """ # If there are no previously active cells, select random subset of cells if len(self.activeCells) == 0: self.activeCells = set(self._random.shuffle( numpy.array(range(self.numberOfCells()), dtype="uint32"))[0:self.numActiveColumnsPerInhArea]) # else we maintain previous activity, nothing to do. # Those cells that remain active will learn on their proximal and distal # dendrites as long as there is some input. If there are no # cells active, no learning happens. This only happens in the very # beginning if there has been no bottom up activity at all. if len(self.activeCells) > 0: # Learn on proximal dendrite if appropriate if len(feedforwardInput) > 0: self._learnProximal(feedforwardInput, self.activeCells, self.maxNewProximalSynapseCount, self.proximalPermanences, self.proximalConnections, self.initialProximalPermanence, self.synPermProximalInc, self.synPermProximalDec, self.connectedPermanence) # Learn on distal dendrites if appropriate self.tm.activateCells( activeColumns=sorted(self.activeCells), reinforceCandidatesExternalBasal=sorted(reinforceCandidatesExternal), growthCandidatesExternalBasal=sorted(growthCandidatesExternal), learn=True) def _activateCellsInferenceMode(self, feedforwardInput): """ Inference mode: if there is some feedforward activity, perform spatial pooling on it to recognize previously known objects. If there is no feedforward activity, maintain previous activity. Parameters: ---------------------------- @param feedforwardInput (set) Indices of active input bits """ # Figure out which cells are active due to feedforward proximal inputs # In order to form unions, we keep all cells that are over threshold inputVector = numpy.zeros(self.numberOfInputs(), dtype=realDType) inputVector[list(feedforwardInput)] = 1 overlaps = numpy.zeros(self.numberOfColumns(), dtype=realDType) self.proximalConnections.rightVecSumAtNZ_fast(inputVector.astype(realDType), overlaps) overlaps[overlaps < self.minThresholdProximal] = 0 bottomUpActivity = set(overlaps.nonzero()[0]) # If there is insufficient current bottom up activity, we incorporate all # previous activity. We set their overlaps so they are sure to win. if len(bottomUpActivity) < self.numActiveColumnsPerInhArea: bottomUpActivity = bottomUpActivity.union(self.activeCells) maxOverlap = overlaps.max() overlaps[self.getActiveCells()] = maxOverlap+1 # Narrow down list of active cells based on lateral activity self.activeCells = self._winnersBasedOnLateralActivity( bottomUpActivity, self.getPredictiveCells(), overlaps, self.numActiveColumnsPerInhArea ) # Update the active cells in the TM. Without learning and without internal # basal connections, this has no effect on column pooler output. self.tm.activateCells(activeColumns=sorted(self.activeCells), learn=False) def numberOfInputs(self): """ Returns the number of inputs into this layer """ return self.inputWidth def numberOfColumns(self): """ Returns the number of columns in this layer. @return (int) Number of columns """ return self.tm.numberOfColumns() def numberOfCells(self): """ Returns the number of cells in this layer. @return (int) Number of cells """ return self.tm.numberOfCells() def getActiveCells(self): """ Returns the indices of the active cells. @return (list) Indices of active cells. """ return list(self.activeCells) def numberOfConnectedSynapses(self, cells=None): """ Returns the number of proximal connected synapses on these cells. Parameters: ---------------------------- @param cells (set or list) Indices of the cells. If None return count for all cells. """ if cells is None: cells = xrange(self.numberOfCells()) n = 0 for cell in cells: n += self.proximalConnections.nNonZerosOnRow(cell) return n def numberOfSynapses(self, cells=None): """ Returns the number of proximal synapses with permanence>0 on these cells. Parameters: ---------------------------- @param cells (set or list) Indices of the cells. If None return count for all cells. """ if cells is None: cells = xrange(self.numberOfCells()) n = 0 for cell in cells: n += self.proximalPermanences.nNonZerosOnRow(cell) return n def numberOfDistalSegments(self, cells): """ Returns the total number of distal segments for these cells. Parameters: ---------------------------- @param cells (set or list) Indices of the cells """ n = 0 for cell in cells: n += self.tm.basalConnections.numSegments(cell) return n def numberOfDistalSynapses(self, cells): """ Returns the total number of distal synapses for these cells. Parameters: ---------------------------- @param cells (set or list) Indices of the cells """ n = 0 for cell in cells: segments = self.tm.basalConnections.segmentsForCell(cell) for segment in segments: n += self.tm.basalConnections.numSynapses(segment) return n def reset(self): """ Reset internal states. When learning this signifies we are to learn a unique new object. """ self.activeCells = set() self.tm.reset() def getPredictiveCells(self): """ Get the set of distally predictive cells as a set. @return (list) A list containing indices of the current distally predicted cells. """ return self.tm.getPredictiveCells() def getPredictedActiveCells(self): """ Get the set of cells that were predicted previously then became active @return (set) A set containing indices. """ return self.tm.predictedActiveCellsIndices() def getConnections(self): """ Get the Connections structure associated with our TM. Beware of using this as it is implementation specific and may change. @return (object) A Connections object """ return self.tm.basalConnections def _learnProximal(self, activeInputs, activeCells, maxNewSynapseCount, proximalPermanences, proximalConnections, initialPermanence, synPermProximalInc, synPermProximalDec, connectedPermanence): """ Learn on proximal dendrites of active cells. Updates proximalPermanences """ for cell in activeCells: cellPermanencesDense = proximalPermanences.getRow(cell) cellNonZeroIndices, _ = proximalPermanences.rowNonZeros(cell) cellNonZeroIndices = set(cellNonZeroIndices) # Find the synapses that should be reinforced, punished, and grown. reinforce = list(activeInputs & cellNonZeroIndices) punish = list(cellNonZeroIndices - activeInputs) growthCandidates = activeInputs - cellNonZeroIndices newSynapseCount = min(len(growthCandidates), maxNewSynapseCount) grow = _sample(growthCandidates, newSynapseCount, self._random) # Make the changes. cellPermanencesDense[punish] -= synPermProximalDec cellPermanencesDense[reinforce] += synPermProximalInc cellPermanencesDense[grow] = initialPermanence # Update proximalPermanences and proximalConnections. proximalPermanences.setRowFromDense(cell, cellPermanencesDense) newConnected = numpy.where(cellPermanencesDense >= connectedPermanence)[0] proximalConnections.replaceSparseRow(cell, newConnected) def _winnersBasedOnLateralActivity(self, activeCells, predictiveCells, overlaps, targetActiveCells): """ Given the set of cells active due to feedforward input, narrow down the list of active cells based on predictions due to previous lateralInput. Parameters: ---------------------------- @param activeCells (set) Indices of cells activated by bottom-up input. @param predictiveCells (set) Indices of cells that are laterally predicted. @param overlaps (numpy array) Bottom up overlap scores for each proximal segment. This is used to select additional cells if the narrowed down list contains less than targetActiveCells. @param targetActiveCells (int) The number of active cells we want to have active. @return (set) List of new winner cell indices """ # No TM accessors that return set so access internal member directly predictedActiveCells = activeCells.intersection(predictiveCells) # If predicted cells don't intersect at all with active cells, we go with # bottom up input. In these cases we can stick with existing active cells # and skip the overlap sorting if len(predictedActiveCells) == 0: predictedActiveCells = activeCells # We want to keep all cells that were predicted and currently active due to # feedforward input. This set could be larger than our target number of # active cells due to unions, which is ok. However if there are insufficient # cells active after this intersection, we fill in with those currently # active cells that have highest overlap. elif len(predictedActiveCells) < targetActiveCells: # Don't want to consider cells already chosen overlaps[list(predictedActiveCells)] = 0 # Add in the desired number of cells with highest activity numActive = targetActiveCells - len(predictedActiveCells) winnerIndices = numpy.argsort(overlaps, kind='mergesort') sortedWinnerIndices = winnerIndices[-numActive:][::-1] predictedActiveCells = predictedActiveCells.union(set(sortedWinnerIndices)) return predictedActiveCells