def testIssue807(): # The following should silently pass. Previous versions segfaulted. # See https://github.com/numenta/nupic.core/issues/807 for context from nupic.bindings.algorithms import TemporalMemory tm = TemporalMemory() tm.compute(set(), True)
cellsPerColumn=8, initialPermanence=0.21, connectedPermanence=0.3, minThreshold=15, maxNewSynapseCount=40, permanenceIncrement=0.1, permanenceDecrement=0.1, activationThreshold=15, predictedSegmentDecrement=0.01, ) for t in range(75): rnd = random.randrange(2) for k in range(4): if rnd == 0: tm.compute(set(seq1[k][:].nonzero()[0].tolist()), learn=True) else: tm.compute(set(seq2[k][:].nonzero()[0].tolist()), learn=True) print("") print("-" * 50) print( "We now have a look at the output of the TM when presented with the individual" ) print( "characters A, B, C, D, X, and Y. We might observe simultaneous predictions when" ) print( "presented with character D (predicting A and X), character Y (predicting A and X)," ) print("and when presented with character C (predicting D and Y).")
class NumentaTMLowLevelDetector(AnomalyDetector): """The 'numentaTM' detector, but not using the CLAModel or network API """ def __init__(self, *args, **kwargs): super(NumentaTMLowLevelDetector, self).__init__(*args, **kwargs) self.valueEncoder = None self.encodedValue = None self.timestampEncoder = None self.encodedTimestamp = None self.sp = None self.spOutput = None self.tm = None self.anomalyLikelihood = None # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] def initialize(self): # Initialize the RDSE with a resolution; calculated from the data min and # max, the resolution is specific to the data stream. rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 resolution = max(0.001, (maxVal - minVal) / numBuckets) self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encodedValue = np.zeros(self.valueEncoder.getWidth(), dtype=np.uint32) # Initialize the timestamp encoder self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, )) self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(), dtype=np.uint32) inputWidth = (self.timestampEncoder.getWidth() + self.valueEncoder.getWidth()) self.sp = SpatialPooler(**{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) self.spOutput = np.zeros(2048, dtype=np.float32) self.tm = TemporalMemory(**{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, }) if self.useLikelihood: learningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=100 ) def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore).""" # Encode the input data record self.valueEncoder.encodeIntoArray( inputData["value"], self.encodedValue) self.timestampEncoder.encodeIntoArray( inputData["timestamp"], self.encodedTimestamp) # Run the encoded data through the spatial pooler self.sp.compute(np.concatenate((self.encodedTimestamp, self.encodedValue,)), True, self.spOutput) # At the current state, the set of the region's active columns and the set # of columns that have previously-predicted cells are used to calculate the # raw anomaly score. activeColumns = set(self.spOutput.nonzero()[0].tolist()) prevPredictedColumns = set(self.tm.columnForCell(cell) for cell in self.tm.getPredictiveCells()) rawScore = (len(activeColumns - prevPredictedColumns) / float(len(activeColumns))) self.tm.compute(activeColumns) if self.useLikelihood: # Compute the log-likelihood score anomalyScore = self.anomalyLikelihood.anomalyProbability( inputData["value"], rawScore, inputData["timestamp"]) logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore) return (logScore, rawScore) return (rawScore, rawScore)
activationThreshold=15, maxNewSynapseCount=20) print print "Training TM on sequences ... " numRepeatsBatch = 1 numRptsPerSequence = 1 np.random.seed(10) for rpt in xrange(numRepeatsBatch): # randomize the order of training sequences randomIdx = np.random.permutation(range(numTrain)) for i in range(numTrain): for _ in xrange(numRptsPerSequence): for t in range(sequenceLength): tm.compute(activeColumnsTrain[randomIdx[i]][t], learn=True) tm.reset() print "Rpt: {}, {} out of {} done ".format(rpt, i, trainData.shape[0]) # run TM over training data unionLength = 20 print "Running TM on Training Data with union window {}".format(unionLength) (activeColTrain, activeCellsTrain, activeFreqTrain, predActiveFreqTrain) = runTMOverDatasetFast(tm, activeColumnsTrain, unionLength) # construct two distance matrices using training data distMatColumnTrain = calculateDistanceMat(activeColTrain, activeColTrain) distMatCellTrain = calculateDistanceMat(activeCellsTrain, activeCellsTrain) distMatActiveFreqTrain = calculateDistanceMat(activeFreqTrain, activeFreqTrain)
def go(): valueEncoder = RandomDistributedScalarEncoder(resolution=0.88, seed=42) timestampEncoder = DateEncoder(timeOfDay=( 21, 9.49, )) inputWidth = timestampEncoder.getWidth() + valueEncoder.getWidth() sp = SpatialPooler( **{ "globalInhibition": True, "columnDimensions": [2048], "inputDimensions": [inputWidth], "potentialRadius": inputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, }) tm = TemporalMemory( **{ "activationThreshold": 20, "cellsPerColumn": 32, "columnDimensions": (2048, ), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1961, }) inputPath = os.path.join(os.path.dirname(__file__), "data/rec-center-hourly.csv") inputFile = open(inputPath, "rb") csvReader = csv.reader(inputFile) csvReader.next() csvReader.next() csvReader.next() encodedValue = np.zeros(valueEncoder.getWidth(), dtype=np.uint32) encodedTimestamp = np.zeros(timestampEncoder.getWidth(), dtype=np.uint32) spOutput = np.zeros(2048, dtype=np.float32) sanityInstance = sanity.SPTMInstance(sp, tm) for timestampStr, consumptionStr in csvReader: sanityInstance.waitForUserContinue() timestamp = datetime.datetime.strptime(timestampStr, "%m/%d/%y %H:%M") consumption = float(consumptionStr) timestampEncoder.encodeIntoArray(timestamp, encodedTimestamp) valueEncoder.encodeIntoArray(consumption, encodedValue) sensoryInput = np.concatenate(( encodedTimestamp, encodedValue, )) sp.compute(sensoryInput, True, spOutput) activeColumns = np.flatnonzero(spOutput) predictedCells = tm.getPredictiveCells() tm.compute(activeColumns) activeInputBits = np.flatnonzero(sensoryInput) displayText = { "timestamp": timestampStr, "consumption": consumptionStr } sanityInstance.appendTimestep(activeInputBits, activeColumns, predictedCells, displayText)
def run_tm_noise_experiment(dim = 2048, cellsPerColumn=1, num_active = 40, activationThreshold=16, initialPermanence=0.8, connectedPermanence=0.50, minThreshold=16, maxNewSynapseCount=20, permanenceIncrement=0.05, permanenceDecrement=0.00, predictedSegmentDecrement=0.000, maxSegmentsPerCell=255, maxSynapsesPerSegment=255, seed=42, num_samples = 1, num_trials = 1000, sequence_length = 20, training_iters = 1, automatic_threshold = False, noise_range = range(0, 100, 5)): """ Run an experiment tracking the performance of the temporal memory given noise. The number of active cells and the dimensions of the TM are fixed. We track performance by comparing the cells predicted to be active with the cells actually active in the sequence without noise at every timestep, and averaging across timesteps. Three metrics are used, correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard index) and cosine similarity (using scipy.spatial.distance.cosine). The Jaccard set similarity is the canonical metric used in the paper, but all three metrics tend to produce very similar results. Typically, this experiment is run to test the influence of activation threshold on noise tolerance, with multiple different thresholds tested. However, this experiment could also be used to examine the influence of factors such as sparsity and sequence length. Output is written to tm_noise_{threshold}}.txt, including sample size. We used three different activation threshold settings, 8, 12 and 16, mirroring the parameters used in the Poirazi neuron model experiment. """ if automatic_threshold: activationThreshold = min(num_active/2, maxNewSynapseCount/2) minThreshold = min(num_active/2, maxNewSynapseCount/2) for noise in noise_range: print noise for trial in range(num_trials): tm = TM(columnDimensions=(dim,), cellsPerColumn=cellsPerColumn, activationThreshold=activationThreshold, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThreshold, maxNewSynapseCount=maxNewSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment, )#seed=seed) datapoints = [] canonical_active_cells = [] for sample in range(num_samples): data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length) datapoints.append(data) for i in range(training_iters): for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) tm.reset() current_active_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) current_active_cells.append(tm.getActiveCells()) canonical_active_cells.append(current_active_cells) tm.reset() # Now that the TM has been trained, check its performance on each sequence with noise added. correlations = [] similarities = [] csims = [] for datapoint, active_cells in zip(datapoints, canonical_active_cells): data = copy.deepcopy(datapoint) apply_noise(data, noise) predicted_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = False) predicted_cells.append(tm.getPredictiveCells()) similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])] dense_predicted_cells = convert_cell_lists_to_dense(2048*32, predicted_cells[:-1]) dense_active_cells = convert_cell_lists_to_dense(2048*32, active_cells[1:]) correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)] csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)] correlation = numpy.nan_to_num(correlation) csim = numpy.nan_to_num(csim) correlations.append(numpy.mean(correlation)) similarities.append(numpy.mean(similarity)) csims.append(numpy.mean(csim)) correlation = numpy.mean(correlations) similarity = numpy.mean(similarities) csim = numpy.mean(csims) with open("tm_noise_{}.txt".format(activationThreshold), "a") as f: f.write(str(noise)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_trials) + "\n")
def run_tm_union_experiment(dim = 4000, cellsPerColumn=1, num_active = 40, activationThreshold=5, initialPermanence=0.8, connectedPermanence=0.50, minThreshold=5, maxNewSynapseCount=20, permanenceIncrement=0.05, permanenceDecrement=0.00, predictedSegmentDecrement=0.000, maxSegmentsPerCell=255, maxSynapsesPerSegment=255, seed=42, num_branches_range = range(50, 51, 1), onset_length = 5, training_iters = 10, num_trials = 10000, automatic_threshold = True, save_results = True): """ Run an experiment tracking the performance of the temporal memory given different input dimensions. The number of active cells is kept fixed, so we are in effect varying the sparsity of the input. We track performance by comparing the cells predicted to be active with the cells actually active in the sequence without noise at every timestep, and averaging across timesteps. Three metrics are used, correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard index) and cosine similarity (using scipy.spatial.distance.cosine). The Jaccard set similarity is the canonical metric used in the paper, but all three tend to produce very similar results. Output is written to tm_dim_{num_active}.txt, including sample size. We tested two different dimension settings, 2000 and 4000. """ if automatic_threshold: activationThreshold = min(num_active/2, maxNewSynapseCount/2) minThreshold = min(num_active/2, maxNewSynapseCount/2) for num_branches in num_branches_range: overlaps = [] surprises = [] csims = [] for trial in range(num_trials): if (trial + 1) % 100 == 0: print trial + 1 tm = TM(columnDimensions=(dim,), cellsPerColumn=cellsPerColumn, activationThreshold=activationThreshold, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThreshold, maxNewSynapseCount=maxNewSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment, seed=seed) datapoints = [] canonical_active_cells = [] onset = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = onset_length) for branch in range(num_branches): datapoint = numpy.random.choice(dim, num_active, replace = False) datapoints.append(datapoint) for i in range(training_iters): for j in range(onset.nRows()): activeColumns = set(onset.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) tm.compute(datapoint, learn=True) tm.reset() for j in range(onset.nRows()): activeColumns = set(onset.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = False) predicted_cells = tm.getPredictiveCells() datapoint = numpy.random.choice(dim, num_active, replace = False) overlap = (1. * len(set(predicted_cells) & set(datapoint)))/len(datapoint) surprise = len(datapoint) - len(set(predicted_cells) & set(datapoint)) dense_predicted_cells = numpy.zeros((dim*cellsPerColumn,)) for cell in predicted_cells: dense_predicted_cells[cell] = 1. dense_active_cells = numpy.zeros((dim*cellsPerColumn,)) for cell in datapoint: dense_active_cells[cell] = 1. csim = 1 - cosine(dense_predicted_cells, dense_active_cells) csim = numpy.nan_to_num(csim) overlaps.append(overlap) surprises.append(surprise) csims.append(csim) overlap = numpy.mean(overlaps) surprise = numpy.mean(surprises) csim = numpy.mean(csims) print dim, overlap, surprise, csim if save_results: with open("tm_union_n{}_a{}_c{}.txt".format(dim, num_active, cellsPerColumn), "a") as f: f.write(str(num_branches)+", " + str(overlap) + ", " + str(surprise) + ", " + str(csim) + ", " + str(num_trials) + "\n")
def runHotgym(numRecords): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f)["modelParams"] enParams = modelParams["sensorParams"]["encoders"] spParams = modelParams["spParams"] tmParams = modelParams["tmParams"] # timeOfDayEncoder = DateEncoder( # timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"]) # weekendEncoder = DateEncoder( # weekend=enParams["timestamp_weekend"]["weekend"]) # scalarEncoder = RandomDistributedScalarEncoder( # enParams["consumption"]["resolution"]) rdseParams = RDSE_Parameters() rdseParams.size = 100 rdseParams.sparsity = .10 rdseParams.radius = 10 scalarEncoder = RDSE(rdseParams) # encodingWidth = (timeOfDayEncoder.getWidth() # + weekendEncoder.getWidth() # + scalarEncoder.getWidth()) encodingWidth = scalarEncoder.size sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], seed=spParams["seed"], wrapAround=True) tm = TemporalMemory( columnDimensions=(tmParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"]) results = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = next(reader) next(reader) next(reader) for count, record in enumerate(reader): if count >= numRecords: break # Convert data string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) # To encode, we need to provide zero-filled numpy arrays for the encoders # to populate. # timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth()) # weekendBits = numpy.zeros(weekendEncoder.getWidth()) # consumptionBits = numpy.zeros(scalarEncoder.size) consumptionBits = SDR(scalarEncoder.size) # Now we call the encoders to create bit representations for each value. # timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits) # weekendEncoder.encodeIntoArray(dateString, weekendBits) scalarEncoder.encode(consumption, consumptionBits) # Concatenate all these encodings into one large encoding for Spatial # Pooling. # encoding = numpy.concatenate( # [timeOfDayBits, weekendBits, consumptionBits] # ) encoding = consumptionBits # Create an array to represent active columns, all initially zero. This # will be populated by the compute method below. It must have the same # dimensions as the Spatial Pooler. # activeColumns = numpy.zeros(spParams["columnCount"]) activeColumns = SDR(spParams["columnCount"]) encodingIn = numpy.uint32(encoding.dense) minicolumnsOut = numpy.uint32(activeColumns.dense) # Execute Spatial Pooling algorithm over input space. sp.compute(encodingIn, True, minicolumnsOut) activeColumnIndices = numpy.nonzero(minicolumnsOut)[0] # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumnIndices, learn=True) activeCells = tm.getActiveCells() print(len(activeCells)) results.append(activeCells) return results
cellsPerColumn=8, initialPermanence=0.21, connectedPermanence=0.3, minThreshold=15, maxNewSynapseCount=40, permanenceIncrement=0.1, permanenceDecrement=0.1, activationThreshold=15, predictedSegmentDecrement=0.01, ) for t in range(75): rnd = random.randrange(2) for k in range(4): if rnd == 0: tm.compute(set(seq1[k][:].nonzero()[0].tolist()), learn=True) else: tm.compute(set(seq2[k][:].nonzero()[0].tolist()), learn=True) print "" print "-"*50 print "We now have a look at the output of the TM when presented with the individual" print "characters A, B, C, D, X, and Y. We might observe simultaneous predictions when" print "presented with character D (predicting A and X), character Y (predicting A and X)," print "and when presented with character C (predicting D and Y)." print "N.B. Due to the stochasticity of this script, we might not observe simultaneous" print "predictions in *all* the aforementioned characters." print "-"*50 print "" showPredictions()
def run_tm_dim_experiment(test_dims = range(300, 3100, 100), cellsPerColumn=1, num_active = 256, activationThreshold=10, initialPermanence=0.8, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.05, permanenceDecrement=0.00, predictedSegmentDecrement=0.000, maxSegmentsPerCell=4000, maxSynapsesPerSegment=255, seed=42, num_samples = 1000, sequence_length = 20, training_iters = 1, automatic_threshold = False, save_results = True): """ Run an experiment tracking the performance of the temporal memory given different input dimensions. The number of active cells is kept fixed, so we are in effect varying the sparsity of the input. We track performance by comparing the cells predicted to be active with the cells actually active in the sequence without noise at every timestep, and averaging across timesteps. Three metrics are used, correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard index) and cosine similarity (using scipy.spatial.distance.cosine). The Jaccard set similarity is the canonical metric used in the paper, but all three tend to produce very similar results. Output is written to tm_dim_{num_active}.txt, including sample size. In our experiments, we used the set similarity metric (third column in output) along with three different values for num_active, 64, 128 and 256. We used dimensions from 300 to 2900 in each case, testing every 100. 1000 sequences of length 20 were passed to the TM in each trial. """ if automatic_threshold: activationThreshold = min(num_active/2, maxNewSynapseCount/2) minThreshold = min(num_active/2, maxNewSynapseCount/2) print "Using activation threshold {}".format(activationThreshold) for dim in test_dims: tm = TM(columnDimensions=(dim,), cellsPerColumn=cellsPerColumn, activationThreshold=activationThreshold, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThreshold, maxNewSynapseCount=maxNewSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment, seed=seed) tm.setMinThreshold(1000) datapoints = [] canonical_active_cells = [] for sample in range(num_samples): if (sample + 1) % 10 == 0: print sample + 1 data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length) datapoints.append(data) for i in range(training_iters): for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) tm.reset() current_active_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = True) current_active_cells.append(tm.getActiveCells()) canonical_active_cells.append(current_active_cells) tm.reset() # Now that the TM has been trained, check its performance on each sequence with noise added. correlations = [] similarities = [] csims = [] for datapoint, active_cells in zip(datapoints, canonical_active_cells): data = copy.deepcopy(datapoint) predicted_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn = False) predicted_cells.append(tm.getPredictiveCells()) tm.reset() similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])] dense_predicted_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, predicted_cells[:-1]) dense_active_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, active_cells[1:]) correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)] csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)] correlation = numpy.nan_to_num(correlation) csim = numpy.nan_to_num(csim) correlations.append(numpy.mean(correlation)) similarities.append(numpy.mean(similarity)) csims.append(numpy.mean(csim)) correlation = numpy.mean(correlations) similarity = numpy.mean(similarities) csim = numpy.mean(csims) print dim, correlation, similarity, csim if save_results: with open("tm_dim_{}.txt".format(num_active), "a") as f: f.write(str(dim)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_samples) + "\n")
activationThreshold=15, maxNewSynapseCount=20) print print "Training TM on sequences ... " numRepeatsBatch = 1 numRptsPerSequence = 1 np.random.seed(10) for rpt in xrange(numRepeatsBatch): # randomize the order of training sequences randomIdx = np.random.permutation(range(numTrain)) for i in range(numTrain): for _ in xrange(numRptsPerSequence): for t in range(sequenceLength): tm.compute(activeColumnsTrain[randomIdx[i]][t], learn=True) tm.reset() print "Rpt: {}, {} out of {} done ".format( rpt, i, trainData.shape[0]) # run TM over training data unionLength = 20 print "Running TM on Training Data with union window {}".format( unionLength) (activeColTrain, activeCellsTrain, activeFreqTrain, predActiveFreqTrain) = runTMOverDatasetFast(tm, activeColumnsTrain, unionLength) # construct two distance matrices using training data distMatColumnTrain = calculateDistanceMat(activeColTrain, activeColTrain)
for i, s_id in enumerate(seqs_train): s = uniqueSequences[s_id] #s = s[0:-1] #SP_SDR_train = numpy.zeros((seqLength,200)) SP_SDR_train = numpy.zeros((seqLength, 6)) SP_SDR_train = SP_SDR_train.astype(numpy.uint32) for j, symbol in enumerate(s): #print "symbol=", symbol #print "j=", j SP_SDR_train[j] = SDR_activity_codes[symbol] SP_SDR_seqs_train.append(SP_SDR_train) # now train temporal memory for j, SDR in enumerate(SP_SDR_train): tp.compute(SDR, learn=True) # how do we reset the tp ? tp.reset() # Now, check predictions of tp def printTemoralPredictions(SP_activeCol_seq, symbol_seq): for i, s in enumerate(SP_activeCol_seq): tp.compute(s, learn=False) print "TP Winner cells", tp.getWinnerCells() print "TP Predictive cells", tp.getPredictiveCells() tp.reset() print "\n"
class BaseNetwork(object): def __init__(self, inputMin=None, inputMax=None, runSanity=False): self.inputMin = inputMin self.inputMax = inputMax self.runSanity = runSanity self.encoder = None self.encoderOutput = None self.sp = None self.spOutput = None self.spOutputNZ = None self.tm = None self.anomalyScore = None if runSanity: self.sanity = None self.defaultEncoderResolution = 0.0001 self.numColumns = 2048 self.cellsPerColumn = 32 self.predictedActiveCells = None self.previouslyPredictiveCells = None def initialize(self): # Scalar Encoder resolution = self.getEncoderResolution() self.encoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32) # Spatial Pooler spInputWidth = self.encoder.getWidth() self.spParams = { "globalInhibition": True, "columnDimensions": [self.numColumns], "inputDimensions": [spInputWidth], "potentialRadius": spInputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 0.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, } self.sp = SpatialPooler(**self.spParams) self.spOutput = np.zeros(self.numColumns, dtype=np.uint32) # Temporal Memory self.tmParams = { "activationThreshold": 20, "cellsPerColumn": self.cellsPerColumn, "columnDimensions": (self.numColumns,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, } self.tm = TemporalMemory(**self.tmParams) # Sanity if self.runSanity: self.sanity = sanity.SPTMInstance(self.sp, self.tm) def handleRecord(self, scalarValue, label=None, skipEncoding=False, learningMode=True): """Process one record.""" if self.runSanity: self.sanity.waitForUserContinue() # Encode the input data record if it hasn't already been encoded. if not skipEncoding: self.encodeValue(scalarValue) # Run the encoded data through the spatial pooler self.sp.compute(self.encoderOutput, learningMode, self.spOutput) self.spOutputNZ = self.spOutput.nonzero()[0] # WARNING: this needs to happen here, before the TM runs. self.previouslyPredictiveCells = self.tm.getPredictiveCells() # Run SP output through temporal memory self.tm.compute(self.spOutputNZ) self.predictedActiveCells = _computePredictedActiveCells( self.tm.getActiveCells(), self.previouslyPredictiveCells) # Anomaly score self.anomalyScore = _computeAnomalyScore(self.spOutputNZ, self.previouslyPredictiveCells, self.cellsPerColumn) # Run Sanity if self.runSanity: self.sanity.appendTimestep(self.getEncoderOutputNZ(), self.getSpOutputNZ(), self.previouslyPredictiveCells, { 'value': scalarValue, 'label':label }) def encodeValue(self, scalarValue): self.encoder.encodeIntoArray(scalarValue, self.encoderOutput) def getEncoderResolution(self): """ Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's calculated from the data min and max, specific to the data stream. """ if self.inputMin is None or self.inputMax is None: return self.defaultEncoderResolution else: rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets) def getEncoderOutputNZ(self): return self.encoderOutput.nonzero()[0] def getSpOutputNZ(self): return self.spOutputNZ def getTmPredictiveCellsNZ(self): return self.tm.getPredictiveCells() def getTmActiveCellsNZ(self): return self.tm.getActiveCells() def getTmPredictedActiveCellsNZ(self): return self.predictedActiveCells def getRawAnomalyScore(self): return self.anomalyScore
def run_tm_noise_experiment(dim=2048, cellsPerColumn=1, num_active=40, activationThreshold=16, initialPermanence=0.8, connectedPermanence=0.50, minThreshold=16, maxNewSynapseCount=20, permanenceIncrement=0.05, permanenceDecrement=0.00, predictedSegmentDecrement=0.000, maxSegmentsPerCell=255, maxSynapsesPerSegment=255, seed=42, num_samples=1, num_trials=1000, sequence_length=20, training_iters=1, automatic_threshold=False, noise_range=range(0, 100, 5)): """ Run an experiment tracking the performance of the temporal memory given noise. The number of active cells and the dimensions of the TM are fixed. We track performance by comparing the cells predicted to be active with the cells actually active in the sequence without noise at every timestep, and averaging across timesteps. Three metrics are used, correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard index) and cosine similarity (using scipy.spatial.distance.cosine). The Jaccard set similarity is the canonical metric used in the paper, but all three metrics tend to produce very similar results. Typically, this experiment is run to test the influence of activation threshold on noise tolerance, with multiple different thresholds tested. However, this experiment could also be used to examine the influence of factors such as sparsity and sequence length. Output is written to tm_noise_{threshold}}.txt, including sample size. We used three different activation threshold settings, 8, 12 and 16, mirroring the parameters used in the Poirazi neuron model experiment. """ if automatic_threshold: activationThreshold = min(num_active / 2, maxNewSynapseCount / 2) minThreshold = min(num_active / 2, maxNewSynapseCount / 2) for noise in noise_range: print noise for trial in range(num_trials): tm = TM( columnDimensions=(dim, ), cellsPerColumn=cellsPerColumn, activationThreshold=activationThreshold, initialPermanence=initialPermanence, connectedPermanence=connectedPermanence, minThreshold=minThreshold, maxNewSynapseCount=maxNewSynapseCount, permanenceIncrement=permanenceIncrement, permanenceDecrement=permanenceDecrement, predictedSegmentDecrement=predictedSegmentDecrement, maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment, ) #seed=seed) datapoints = [] canonical_active_cells = [] for sample in range(num_samples): data = generate_evenly_distributed_data_sparse( dim=dim, num_active=num_active, num_samples=sequence_length) datapoints.append(data) for i in range(training_iters): for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn=True) tm.reset() current_active_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn=True) current_active_cells.append(tm.getActiveCells()) canonical_active_cells.append(current_active_cells) tm.reset() # Now that the TM has been trained, check its performance on each sequence with noise added. correlations = [] similarities = [] csims = [] for datapoint, active_cells in zip(datapoints, canonical_active_cells): data = copy.deepcopy(datapoint) apply_noise(data, noise) predicted_cells = [] for j in range(data.nRows()): activeColumns = set(data.rowNonZeros(j)[0]) tm.compute(activeColumns, learn=False) predicted_cells.append(tm.getPredictiveCells()) similarity = [(0. + len(set(predicted) & set(active))) / len( (set(predicted) | set(active))) for predicted, active in zip( predicted_cells[:-1], active_cells[1:])] dense_predicted_cells = convert_cell_lists_to_dense( 2048 * 32, predicted_cells[:-1]) dense_active_cells = convert_cell_lists_to_dense( 2048 * 32, active_cells[1:]) correlation = [ numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells) ] csim = [ 1 - cosine(predicted, active) for predicted, active in zip( dense_predicted_cells, dense_active_cells) ] correlation = numpy.nan_to_num(correlation) csim = numpy.nan_to_num(csim) correlations.append(numpy.mean(correlation)) similarities.append(numpy.mean(similarity)) csims.append(numpy.mean(csim)) correlation = numpy.mean(correlations) similarity = numpy.mean(similarities) csim = numpy.mean(csims) with open("tm_noise_{}.txt".format(activationThreshold), "a") as f: f.write( str(noise) + ", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_trials) + "\n")
class BaseNetwork(object): def __init__(self, inputMin=None, inputMax=None, runSanity=False): self.inputMin = inputMin self.inputMax = inputMax self.runSanity = runSanity self.encoder = None self.encoderOutput = None self.sp = None self.spOutput = None self.spOutputNZ = None self.tm = None self.anomalyScore = None if runSanity: self.sanity = None self.defaultEncoderResolution = 0.0001 self.numColumns = 2048 self.cellsPerColumn = 32 self.predictedActiveCells = None self.previouslyPredictiveCells = None def initialize(self): # Scalar Encoder resolution = self.getEncoderResolution() self.encoder = RandomDistributedScalarEncoder(resolution, seed=42) self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32) # Spatial Pooler spInputWidth = self.encoder.getWidth() self.spParams = { "globalInhibition": True, "columnDimensions": [self.numColumns], "inputDimensions": [spInputWidth], "potentialRadius": spInputWidth, "numActiveColumnsPerInhArea": 40, "seed": 1956, "potentialPct": 0.8, "boostStrength": 5.0, "synPermActiveInc": 0.003, "synPermConnected": 0.2, "synPermInactiveDec": 0.0005, } self.sp = SpatialPooler(**self.spParams) self.spOutput = np.zeros(self.numColumns, dtype=np.uint32) # Temporal Memory self.tmParams = { "activationThreshold": 20, "cellsPerColumn": self.cellsPerColumn, "columnDimensions": (self.numColumns,), "initialPermanence": 0.24, "maxSegmentsPerCell": 128, "maxSynapsesPerSegment": 128, "minThreshold": 13, "maxNewSynapseCount": 31, "permanenceDecrement": 0.008, "permanenceIncrement": 0.04, "seed": 1960, } self.tm = TemporalMemory(**self.tmParams) # Sanity if self.runSanity: self.sanity = sanity.SPTMInstance(self.sp, self.tm) def handleRecord(self, scalarValue, label=None, skipEncoding=False, learningMode=True): """Process one record.""" if self.runSanity: self.sanity.waitForUserContinue() # Encode the input data record if it hasn't already been encoded. if not skipEncoding: self.encodeValue(scalarValue) # Run the encoded data through the spatial pooler self.sp.compute(self.encoderOutput, learningMode, self.spOutput) self.spOutputNZ = self.spOutput.nonzero()[0] # WARNING: this needs to happen here, before the TM runs. self.previouslyPredictiveCells = self.tm.getPredictiveCells() # Run SP output through temporal memory self.tm.compute(self.spOutputNZ) self.predictedActiveCells = _computePredictedActiveCells( self.tm.getActiveCells(), self.previouslyPredictiveCells) # Anomaly score self.anomalyScore = _computeAnomalyScore(self.spOutputNZ, self.previouslyPredictiveCells, self.cellsPerColumn) # Run Sanity if self.runSanity: self.sanity.appendTimestep(self.getEncoderOutputNZ(), self.getSpOutputNZ(), self.previouslyPredictiveCells, { 'value': scalarValue, 'label':label }) def encodeValue(self, scalarValue): self.encoder.encodeIntoArray(scalarValue, self.encoderOutput) def getEncoderResolution(self): """ Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's calculated from the data min and max, specific to the data stream. """ if self.inputMin is None or self.inputMax is None: return self.defaultEncoderResolution else: rangePadding = abs(self.inputMax - self.inputMin) * 0.2 minVal = self.inputMin - rangePadding maxVal = (self.inputMax + rangePadding if self.inputMin != self.inputMax else self.inputMin + 1) numBuckets = 130.0 return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets) def getEncoderOutputNZ(self): return self.encoderOutput.nonzero()[0] def getSpOutputNZ(self): return self.spOutputNZ def getTmPredictiveCellsNZ(self): return self.tm.getPredictiveCells() def getTmActiveCellsNZ(self): return self.tm.getActiveCells() def getTmPredictedActiveCellsNZ(self): return self.predictedActiveCells def getRawAnomalyScore(self): return self.anomalyScore