def run_tm_noise_experiment(dim = 2048, cellsPerColumn=1, num_active = 40, activationThreshold=16, initialPermanence=0.8, connectedPermanence=0.50, minThreshold=16, maxNewSynapseCount=20, permanenceIncrement=0.05, permanenceDecrement=0.00, predictedSegmentDecrement=0.000, maxSegmentsPerCell=255, maxSynapsesPerSegment=255, seed=42, num_samples = 1, num_trials = 1000, sequence_length = 20, training_iters = 1, automatic_threshold = False, noise_range = range(0, 100, 5)): """ Run an experiment tracking the performance of the temporal memory given noise. The number of active cells and the dimensions of the TM are fixed. We track performance by comparing the cells predicted to be active with the cells actually active in the sequence without noise at every timestep, and averaging across timesteps. Three metrics are used, correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard index) and cosine similarity (using scipy.spatial.distance.cosine). The Jaccard set similarity is the canonical metric used in the paper, but all three metrics tend to produce very similar results. Typically, this experiment is run to test the influence of activation threshold on noise tolerance, with multiple different thresholds tested. However, this experiment could also be used to examine the influence of factors such as sparsity and sequence length. Output is written to tm_noise_{threshold}}.txt, including sample size. We used three different activation threshold settings, 8, 12 and 16, mirroring the parameters used in the Poirazi neuron model experiment. """ if automatic_threshold: activationThreshold = min(num_active/2, maxNewSynapseCount/2) minThreshold = min(num_active/2, maxNewSynapseCount/2) for noise in noise_range: print noise for trial in range(num_trials): tm = TM(columnDimensions=(dim,), cellsPerColumn=cellsPerColumn, activationThreshold=activationThreshold, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThreshold, maxNewSynapseCount=maxNewSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment, )#seed=seed) datapoints = [] canonical_active_cells = [] for sample in range(num_samples): data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length) datapoints.append(data) for i in range(training_iters): for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) tm.reset() current_active_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) current_active_cells.append(tm.getActiveCells()) canonical_active_cells.append(current_active_cells) tm.reset() # Now that the TM has been trained, check its performance on each sequence with noise added. correlations = [] similarities = [] csims = [] for datapoint, active_cells in zip(datapoints, canonical_active_cells): data = copy.deepcopy(datapoint) apply_noise(data, noise) predicted_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = False) predicted_cells.append(tm.getPredictiveCells()) similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])] dense_predicted_cells = convert_cell_lists_to_dense(2048*32, predicted_cells[:-1]) dense_active_cells = convert_cell_lists_to_dense(2048*32, active_cells[1:]) correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)] csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)] correlation = numpy.nan_to_num(correlation) csim = numpy.nan_to_num(csim) correlations.append(numpy.mean(correlation)) similarities.append(numpy.mean(similarity)) csims.append(numpy.mean(csim)) correlation = numpy.mean(correlations) similarity = numpy.mean(similarities) csim = numpy.mean(csims) with open("tm_noise_{}.txt".format(activationThreshold), "a") as f: f.write(str(noise)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_trials) + "\n")
class BaseNetwork(object): def __init__(self, inputMin=None, inputMax=None, runSanity=False): self.inputMin = inputMin self.inputMax = inputMax self.runSanity = runSanity self.encoder = None self.encoderOutput = None self.sp = None self.spOutput = None self.spOutputNZ = None self.tm = None self.anomalyScore = None if runSanity: self.sanity = None self.defaultEncoderResolution = 0.0001 self.numColumns = 2048 self.cellsPerColumn = 32 self.predictedActiveCells = None self.previouslyPredictiveCells = None def initialize(self): # Scalar Encoder resolution = self.getEncoderResolution() self.encoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32) # Spatial Pooler spInputWidth = self.encoder.getWidth() self.spParams = { "globalInhibition": True, "columnDimensions": [self.numColumns], "inputDimensions": [spInputWidth], "potentialRadius": spInputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 5.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, } self.sp = SpatialPooler(**self.spParams) self.spOutput = np.zeros(self.numColumns, dtype=np.uint32) # Temporal Memory self.tmParams = { "activationThreshold": 20, "cellsPerColumn": self.cellsPerColumn, "columnDimensions": (self.numColumns,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, } self.tm = TemporalMemory(**self.tmParams) # Sanity if self.runSanity: self.sanity = sanity.SPTMInstance(self.sp, self.tm) def handleRecord(self, scalarValue, label=None, skipEncoding=False, learningMode=True): """Process one record.""" if self.runSanity: self.sanity.waitForUserContinue() # Encode the input data record if it hasn't already been encoded. if not skipEncoding: self.encodeValue(scalarValue) # Run the encoded data through the spatial pooler self.sp.compute(self.encoderOutput, learningMode, self.spOutput) self.spOutputNZ = self.spOutput.nonzero()[0] # WARNING: this needs to happen here, before the TM runs. self.previouslyPredictiveCells = self.tm.getPredictiveCells() # Run SP output through temporal memory self.tm.compute(self.spOutputNZ) self.predictedActiveCells = _computePredictedActiveCells( self.tm.getActiveCells(), self.previouslyPredictiveCells) # Anomaly score self.anomalyScore = _computeAnomalyScore(self.spOutputNZ, self.previouslyPredictiveCells, self.cellsPerColumn) # Run Sanity if self.runSanity: self.sanity.appendTimestep(self.getEncoderOutputNZ(), self.getSpOutputNZ(), self.previouslyPredictiveCells, { 'value': scalarValue, 'label':label }) def encodeValue(self, scalarValue): self.encoder.encodeIntoArray(scalarValue, self.encoderOutput) def getEncoderResolution(self): """ Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's calculated from the data min and max, specific to the data stream. """ if self.inputMin is None or self.inputMax is None: return self.defaultEncoderResolution else: rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets) def getEncoderOutputNZ(self): return self.encoderOutput.nonzero()[0] def getSpOutputNZ(self): return self.spOutputNZ def getTmPredictiveCellsNZ(self): return self.tm.getPredictiveCells() def getTmActiveCellsNZ(self): return self.tm.getActiveCells() def getTmPredictedActiveCellsNZ(self): return self.predictedActiveCells def getRawAnomalyScore(self): return self.anomalyScore
def run_tm_dim_experiment(test_dims = range(300, 3100, 100), cellsPerColumn=1, num_active = 256, activationThreshold=10, initialPermanence=0.8, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.05, permanenceDecrement=0.00, predictedSegmentDecrement=0.000, maxSegmentsPerCell=4000, maxSynapsesPerSegment=255, seed=42, num_samples = 1000, sequence_length = 20, training_iters = 1, automatic_threshold = False, save_results = True): """ Run an experiment tracking the performance of the temporal memory given different input dimensions. The number of active cells is kept fixed, so we are in effect varying the sparsity of the input. We track performance by comparing the cells predicted to be active with the cells actually active in the sequence without noise at every timestep, and averaging across timesteps. Three metrics are used, correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard index) and cosine similarity (using scipy.spatial.distance.cosine). The Jaccard set similarity is the canonical metric used in the paper, but all three tend to produce very similar results. Output is written to tm_dim_{num_active}.txt, including sample size. In our experiments, we used the set similarity metric (third column in output) along with three different values for num_active, 64, 128 and 256. We used dimensions from 300 to 2900 in each case, testing every 100. 1000 sequences of length 20 were passed to the TM in each trial. """ if automatic_threshold: activationThreshold = min(num_active/2, maxNewSynapseCount/2) minThreshold = min(num_active/2, maxNewSynapseCount/2) print "Using activation threshold {}".format(activationThreshold) for dim in test_dims: tm = TM(columnDimensions=(dim,), cellsPerColumn=cellsPerColumn, activationThreshold=activationThreshold, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThreshold, maxNewSynapseCount=maxNewSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment, seed=seed) tm.setMinThreshold(1000) datapoints = [] canonical_active_cells = [] for sample in range(num_samples): if (sample + 1) % 10 == 0: print sample + 1 data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length) datapoints.append(data) for i in range(training_iters): for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) tm.reset() current_active_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) current_active_cells.append(tm.getActiveCells()) canonical_active_cells.append(current_active_cells) tm.reset() # Now that the TM has been trained, check its performance on each sequence with noise added. correlations = [] similarities = [] csims = [] for datapoint, active_cells in zip(datapoints, canonical_active_cells): data = copy.deepcopy(datapoint) predicted_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = False) predicted_cells.append(tm.getPredictiveCells()) tm.reset() similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])] dense_predicted_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, predicted_cells[:-1]) dense_active_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, active_cells[1:]) correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)] csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)] correlation = numpy.nan_to_num(correlation) csim = numpy.nan_to_num(csim) correlations.append(numpy.mean(correlation)) similarities.append(numpy.mean(similarity)) csims.append(numpy.mean(csim)) correlation = numpy.mean(correlations) similarity = numpy.mean(similarities) csim = numpy.mean(csims) print dim, correlation, similarity, csim if save_results: with open("tm_dim_{}.txt".format(num_active), "a") as f: f.write(str(dim)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_samples) + "\n")
class BaseNetwork(object): def __init__(self, inputMin=None, inputMax=None, runSanity=False): self.inputMin = inputMin self.inputMax = inputMax self.runSanity = runSanity self.encoder = None self.encoderOutput = None self.sp = None self.spOutput = None self.spOutputNZ = None self.tm = None self.anomalyScore = None if runSanity: self.sanity = None self.defaultEncoderResolution = 0.0001 self.numColumns = 2048 self.cellsPerColumn = 32 self.predictedActiveCells = None self.previouslyPredictiveCells = None def initialize(self): # Scalar Encoder resolution = self.getEncoderResolution() self.encoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32) # Spatial Pooler spInputWidth = self.encoder.getWidth() self.spParams = { "globalInhibition": True, "columnDimensions": [self.numColumns], "inputDimensions": [spInputWidth], "potentialRadius": spInputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, } self.sp = SpatialPooler(**self.spParams) self.spOutput = np.zeros(self.numColumns, dtype=np.uint32) # Temporal Memory self.tmParams = { "activationThreshold": 20, "cellsPerColumn": self.cellsPerColumn, "columnDimensions": (self.numColumns,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, } self.tm = TemporalMemory(**self.tmParams) # Sanity if self.runSanity: self.sanity = sanity.SPTMInstance(self.sp, self.tm) def handleRecord(self, scalarValue, label=None, skipEncoding=False, learningMode=True): """Process one record.""" if self.runSanity: self.sanity.waitForUserContinue() # Encode the input data record if it hasn't already been encoded. if not skipEncoding: self.encodeValue(scalarValue) # Run the encoded data through the spatial pooler self.sp.compute(self.encoderOutput, learningMode, self.spOutput) self.spOutputNZ = self.spOutput.nonzero()[0] # WARNING: this needs to happen here, before the TM runs. self.previouslyPredictiveCells = self.tm.getPredictiveCells() # Run SP output through temporal memory self.tm.compute(self.spOutputNZ) self.predictedActiveCells = _computePredictedActiveCells( self.tm.getActiveCells(), self.previouslyPredictiveCells) # Anomaly score self.anomalyScore = _computeAnomalyScore(self.spOutputNZ, self.previouslyPredictiveCells, self.cellsPerColumn) # Run Sanity if self.runSanity: self.sanity.appendTimestep(self.getEncoderOutputNZ(), self.getSpOutputNZ(), self.previouslyPredictiveCells, { 'value': scalarValue, 'label':label }) def encodeValue(self, scalarValue): self.encoder.encodeIntoArray(scalarValue, self.encoderOutput) def getEncoderResolution(self): """ Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's calculated from the data min and max, specific to the data stream. """ if self.inputMin is None or self.inputMax is None: return self.defaultEncoderResolution else: rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets) def getEncoderOutputNZ(self): return self.encoderOutput.nonzero()[0] def getSpOutputNZ(self): return self.spOutputNZ def getTmPredictiveCellsNZ(self): return self.tm.getPredictiveCells() def getTmActiveCellsNZ(self): return self.tm.getActiveCells() def getTmPredictedActiveCellsNZ(self): return self.predictedActiveCells def getRawAnomalyScore(self): return self.anomalyScore
def run_tm_noise_experiment(dim=2048, cellsPerColumn=1, num_active=40, activationThreshold=16, initialPermanence=0.8, connectedPermanence=0.50, minThreshold=16, maxNewSynapseCount=20, permanenceIncrement=0.05, permanenceDecrement=0.00, predictedSegmentDecrement=0.000, maxSegmentsPerCell=255, maxSynapsesPerSegment=255, seed=42, num_samples=1, num_trials=1000, sequence_length=20, training_iters=1, automatic_threshold=False, noise_range=range(0, 100, 5)): """ Run an experiment tracking the performance of the temporal memory given noise. The number of active cells and the dimensions of the TM are fixed. We track performance by comparing the cells predicted to be active with the cells actually active in the sequence without noise at every timestep, and averaging across timesteps. Three metrics are used, correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard index) and cosine similarity (using scipy.spatial.distance.cosine). The Jaccard set similarity is the canonical metric used in the paper, but all three metrics tend to produce very similar results. Typically, this experiment is run to test the influence of activation threshold on noise tolerance, with multiple different thresholds tested. However, this experiment could also be used to examine the influence of factors such as sparsity and sequence length. Output is written to tm_noise_{threshold}}.txt, including sample size. We used three different activation threshold settings, 8, 12 and 16, mirroring the parameters used in the Poirazi neuron model experiment. """ if automatic_threshold: activationThreshold = min(num_active / 2, maxNewSynapseCount / 2) minThreshold = min(num_active / 2, maxNewSynapseCount / 2) for noise in noise_range: print noise for trial in range(num_trials): tm = TM( columnDimensions=(dim, ), cellsPerColumn=cellsPerColumn, activationThreshold=activationThreshold, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThreshold, maxNewSynapseCount=maxNewSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment, ) #seed=seed) datapoints = [] canonical_active_cells = [] for sample in range(num_samples): data = generate_evenly_distributed_data_sparse( dim=dim, num_active=num_active, num_samples=sequence_length) datapoints.append(data) for i in range(training_iters): for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn=True) tm.reset() current_active_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn=True) current_active_cells.append(tm.getActiveCells()) canonical_active_cells.append(current_active_cells) tm.reset() # Now that the TM has been trained, check its performance on each sequence with noise added. correlations = [] similarities = [] csims = [] for datapoint, active_cells in zip(datapoints, canonical_active_cells): data = copy.deepcopy(datapoint) apply_noise(data, noise) predicted_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn=False) predicted_cells.append(tm.getPredictiveCells()) similarity = [(0. + len(set(predicted) & set(active))) / len( (set(predicted) | set(active))) for predicted, active in zip( predicted_cells[:-1], active_cells[1:])] dense_predicted_cells = convert_cell_lists_to_dense( 2048 * 32, predicted_cells[:-1]) dense_active_cells = convert_cell_lists_to_dense( 2048 * 32, active_cells[1:]) correlation = [ numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells) ] csim = [ 1 - cosine(predicted, active) for predicted, active in zip( dense_predicted_cells, dense_active_cells) ] correlation = numpy.nan_to_num(correlation) csim = numpy.nan_to_num(csim) correlations.append(numpy.mean(correlation)) similarities.append(numpy.mean(similarity)) csims.append(numpy.mean(csim)) correlation = numpy.mean(correlations) similarity = numpy.mean(similarities) csim = numpy.mean(csims) with open("tm_noise_{}.txt".format(activationThreshold), "a") as f: f.write( str(noise) + ", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_trials) + "\n")
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] # timeOfDayEncoder = DateEncoder( # timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) # weekendEncoder = DateEncoder( # weekend=enParams["timestamp_weekend"]["weekend"]) # scalarEncoder = RandomDistributedScalarEncoder( # enParams["consumption"]["resolution"]) rdseParams = RDSE_Parameters() rdseParams.size = 100 rdseParams.sparsity = .10 rdseParams.radius = 10 scalarEncoder = RDSE(rdseParams) # encodingWidth = (timeOfDayEncoder.getWidth() # + weekendEncoder.getWidth() # + scalarEncoder.getWidth()) encodingWidth = scalarEncoder.size sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"]) results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = next(reader) next(reader) next(reader) for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. # timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) # weekendBits = numpy.zeros(weekendEncoder.getWidth()) # consumptionBits = numpy.zeros(scalarEncoder.size) consumptionBits = SDR(scalarEncoder.size) # Now we call the encoders to create bit representations for each value. # timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) # weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encode(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. # encoding = numpy.concatenate( # [timeOfDayBits, weekendBits, consumptionBits] # ) encoding = consumptionBits # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. # activeColumns = numpy.zeros(spParams["columnCount"]) activeColumns = SDR(spParams["columnCount"]) encodingIn = numpy.uint32(encoding.dense) minicolumnsOut = numpy.uint32(activeColumns.dense) # Execute Spatial Pooling algorithm over input space. sp.compute(encodingIn, True, minicolumnsOut) activeColumnIndices = numpy.nonzero(minicolumnsOut)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() print(len(activeCells)) results.append(activeCells) return results
def run_tm_dim_experiment(test_dims = range(300, 3100, 100), cellsPerColumn=1, num_active = 256, activationThreshold=10, initialPermanence=0.8, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.05, permanenceDecrement=0.00, predictedSegmentDecrement=0.000, maxSegmentsPerCell=4000, maxSynapsesPerSegment=255, seed=42, num_samples = 1000, sequence_length = 20, training_iters = 1, automatic_threshold = False, save_results = True): """ Run an experiment tracking the performance of the temporal memory given different input dimensions. The number of active cells is kept fixed, so we are in effect varying the sparsity of the input. We track performance by comparing the cells predicted to be active with the cells actually active in the sequence without noise at every timestep, and averaging across timesteps. Three metrics are used, correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard index) and cosine similarity (using scipy.spatial.distance.cosine). The Jaccard set similarity is the canonical metric used in the paper, but all three tend to produce very similar results. Output is written to tm_dim_{num_active}.txt, including sample size. In our experiments, we used the set similarity metric (third column in output) along with three different values for num_active, 64, 128 and 256. We used dimensions from 300 to 2900 in each case, testing every 100. 1000 sequences of length 20 were passed to the TM in each trial. """ if automatic_threshold: activationThreshold = min(num_active/2, maxNewSynapseCount/2) minThreshold = min(num_active/2, maxNewSynapseCount/2) print "Using activation threshold {}".format(activationThreshold) for dim in test_dims: tm = TM(columnDimensions=(dim,), cellsPerColumn=cellsPerColumn, activationThreshold=activationThreshold, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThreshold, maxNewSynapseCount=maxNewSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment, seed=seed) tm.setMinThreshold(1000) datapoints = [] canonical_active_cells = [] for sample in range(num_samples): if (sample + 1) % 10 == 0: print sample + 1 data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length) datapoints.append(data) for i in range(training_iters): for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) tm.reset() current_active_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) current_active_cells.append(tm.getActiveCells()) canonical_active_cells.append(current_active_cells) tm.reset() # Now that the TM has been trained, check its performance on each sequence with noise added. correlations = [] similarities = [] csims = [] for datapoint, active_cells in zip(datapoints, canonical_active_cells): data = copy.deepcopy(datapoint) predicted_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = False) predicted_cells.append(tm.getPredictiveCells()) tm.reset() similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])] dense_predicted_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, predicted_cells[:-1]) dense_active_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, active_cells[1:]) correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)] csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)] correlation = numpy.nan_to_num(correlation) csim = numpy.nan_to_num(csim) correlations.append(numpy.mean(correlation)) similarities.append(numpy.mean(similarity)) csims.append(numpy.mean(csim)) correlation = numpy.mean(correlations) similarity = numpy.mean(similarities) csim = numpy.mean(csims) print dim, correlation, similarity, csim if save_results: with open("tm_dim_{}.txt".format(num_active), "a") as f: f.write(str(dim)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_samples) + "\n")