def testIssue807():
    # The following should silently pass.  Previous versions segfaulted.
    # See https://github.com/numenta/nupic.core/issues/807 for context
    from nupic.bindings.algorithms import TemporalMemory

    tm = TemporalMemory()
    tm.compute(set(), True)
Beispiel #2
0
    def testIssue807():
        # The following should silently pass.  Previous versions segfaulted.
        # See https://github.com/numenta/nupic.core/issues/807 for context
        from nupic.bindings.algorithms import TemporalMemory

        tm = TemporalMemory()
        tm.compute(set(), True)
Beispiel #3
0
    cellsPerColumn=8,
    initialPermanence=0.21,
    connectedPermanence=0.3,
    minThreshold=15,
    maxNewSynapseCount=40,
    permanenceIncrement=0.1,
    permanenceDecrement=0.1,
    activationThreshold=15,
    predictedSegmentDecrement=0.01,
)

for t in range(75):
    rnd = random.randrange(2)
    for k in range(4):
        if rnd == 0:
            tm.compute(set(seq1[k][:].nonzero()[0].tolist()), learn=True)
        else:
            tm.compute(set(seq2[k][:].nonzero()[0].tolist()), learn=True)

print("")
print("-" * 50)
print(
    "We now have a look at the output of the TM when presented with the individual"
)
print(
    "characters A, B, C, D, X, and Y. We might observe simultaneous predictions when"
)
print(
    "presented with character D (predicting A and X), character Y (predicting A and X),"
)
print("and when presented with character C (predicting D and Y).")
Beispiel #4
0
class NumentaTMLowLevelDetector(AnomalyDetector):
  """The 'numentaTM' detector, but not using the CLAModel or network API """
  def __init__(self, *args, **kwargs):
    super(NumentaTMLowLevelDetector, self).__init__(*args, **kwargs)

    self.valueEncoder = None
    self.encodedValue = None
    self.timestampEncoder = None
    self.encodedTimestamp = None
    self.sp = None
    self.spOutput = None
    self.tm = None
    self.anomalyLikelihood = None

    # Set this to False if you want to get results based on raw scores
    # without using AnomalyLikelihood. This will give worse results, but
    # useful for checking the efficacy of AnomalyLikelihood. You will need
    # to re-optimize the thresholds when running with this setting.
    self.useLikelihood = True


  def getAdditionalHeaders(self):
    """Returns a list of strings."""
    return ["raw_score"]


  def initialize(self):

    # Initialize the RDSE with a resolution; calculated from the data min and
    # max, the resolution is specific to the data stream.
    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
    minVal = self.inputMin - rangePadding
    maxVal = (self.inputMax + rangePadding
              if self.inputMin != self.inputMax
              else self.inputMin + 1)
    numBuckets = 130.0
    resolution = max(0.001, (maxVal - minVal) / numBuckets)
    self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                 dtype=np.uint32)

    # Initialize the timestamp encoder
    self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, ))
    self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                     dtype=np.uint32)

    inputWidth = (self.timestampEncoder.getWidth() +
                  self.valueEncoder.getWidth())

    self.sp = SpatialPooler(**{
      "globalInhibition": True,
      "columnDimensions": [2048],
      "inputDimensions": [inputWidth],
      "potentialRadius": inputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    })
    self.spOutput = np.zeros(2048, dtype=np.float32)

    self.tm = TemporalMemory(**{
      "activationThreshold": 20,
      "cellsPerColumn": 32,
      "columnDimensions": (2048,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    })

    if self.useLikelihood:
      learningPeriod = math.floor(self.probationaryPeriod / 2.0)
      self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
        claLearningPeriod=learningPeriod,
        estimationSamples=self.probationaryPeriod - learningPeriod,
        reestimationPeriod=100
      )


  def handleRecord(self, inputData):
    """Returns a tuple (anomalyScore, rawScore)."""

    # Encode the input data record
    self.valueEncoder.encodeIntoArray(
        inputData["value"], self.encodedValue)
    self.timestampEncoder.encodeIntoArray(
        inputData["timestamp"], self.encodedTimestamp)

    # Run the encoded data through the spatial pooler
    self.sp.compute(np.concatenate((self.encodedTimestamp,
                                    self.encodedValue,)),
                    True, self.spOutput)

    # At the current state, the set of the region's active columns and the set
    # of columns that have previously-predicted cells are used to calculate the
    # raw anomaly score.
    activeColumns = set(self.spOutput.nonzero()[0].tolist())
    prevPredictedColumns = set(self.tm.columnForCell(cell)
                               for cell in self.tm.getPredictiveCells())
    rawScore = (len(activeColumns - prevPredictedColumns) /
                float(len(activeColumns)))

    self.tm.compute(activeColumns)

    if self.useLikelihood:
      # Compute the log-likelihood score
      anomalyScore = self.anomalyLikelihood.anomalyProbability(
        inputData["value"], rawScore, inputData["timestamp"])
      logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)
      return (logScore, rawScore)

    return (rawScore, rawScore)
                           activationThreshold=15,
                           maxNewSynapseCount=20)

    print
    print "Training TM on sequences ... "
    numRepeatsBatch = 1
    numRptsPerSequence = 1

    np.random.seed(10)
    for rpt in xrange(numRepeatsBatch):
      # randomize the order of training sequences
      randomIdx = np.random.permutation(range(numTrain))
      for i in range(numTrain):
        for _ in xrange(numRptsPerSequence):
          for t in range(sequenceLength):
            tm.compute(activeColumnsTrain[randomIdx[i]][t], learn=True)
          tm.reset()
        print "Rpt: {}, {} out of {} done ".format(rpt, i, trainData.shape[0])

    # run TM over training data
    unionLength = 20
    print "Running TM on Training Data with union window {}".format(unionLength)
    (activeColTrain,
     activeCellsTrain,
     activeFreqTrain,
     predActiveFreqTrain) = runTMOverDatasetFast(tm, activeColumnsTrain, unionLength)

    # construct two distance matrices using training data
    distMatColumnTrain = calculateDistanceMat(activeColTrain, activeColTrain)
    distMatCellTrain = calculateDistanceMat(activeCellsTrain, activeCellsTrain)
    distMatActiveFreqTrain = calculateDistanceMat(activeFreqTrain, activeFreqTrain)
Beispiel #6
0
def go():
    valueEncoder = RandomDistributedScalarEncoder(resolution=0.88, seed=42)
    timestampEncoder = DateEncoder(timeOfDay=(
        21,
        9.49,
    ))

    inputWidth = timestampEncoder.getWidth() + valueEncoder.getWidth()

    sp = SpatialPooler(
        **{
            "globalInhibition": True,
            "columnDimensions": [2048],
            "inputDimensions": [inputWidth],
            "potentialRadius": inputWidth,
            "numActiveColumnsPerInhArea": 40,
            "seed": 1956,
            "potentialPct": 0.8,
            "boostStrength": 0.0,
            "synPermActiveInc": 0.003,
            "synPermConnected": 0.2,
            "synPermInactiveDec": 0.0005,
        })

    tm = TemporalMemory(
        **{
            "activationThreshold": 20,
            "cellsPerColumn": 32,
            "columnDimensions": (2048, ),
            "initialPermanence": 0.24,
            "maxSegmentsPerCell": 128,
            "maxSynapsesPerSegment": 128,
            "minThreshold": 13,
            "maxNewSynapseCount": 31,
            "permanenceDecrement": 0.008,
            "permanenceIncrement": 0.04,
            "seed": 1961,
        })

    inputPath = os.path.join(os.path.dirname(__file__),
                             "data/rec-center-hourly.csv")
    inputFile = open(inputPath, "rb")
    csvReader = csv.reader(inputFile)
    csvReader.next()
    csvReader.next()
    csvReader.next()

    encodedValue = np.zeros(valueEncoder.getWidth(), dtype=np.uint32)
    encodedTimestamp = np.zeros(timestampEncoder.getWidth(), dtype=np.uint32)
    spOutput = np.zeros(2048, dtype=np.float32)

    sanityInstance = sanity.SPTMInstance(sp, tm)

    for timestampStr, consumptionStr in csvReader:

        sanityInstance.waitForUserContinue()

        timestamp = datetime.datetime.strptime(timestampStr, "%m/%d/%y %H:%M")
        consumption = float(consumptionStr)

        timestampEncoder.encodeIntoArray(timestamp, encodedTimestamp)
        valueEncoder.encodeIntoArray(consumption, encodedValue)

        sensoryInput = np.concatenate((
            encodedTimestamp,
            encodedValue,
        ))
        sp.compute(sensoryInput, True, spOutput)

        activeColumns = np.flatnonzero(spOutput)
        predictedCells = tm.getPredictiveCells()
        tm.compute(activeColumns)

        activeInputBits = np.flatnonzero(sensoryInput)
        displayText = {
            "timestamp": timestampStr,
            "consumption": consumptionStr
        }

        sanityInstance.appendTimestep(activeInputBits, activeColumns,
                                      predictedCells, displayText)
def run_tm_noise_experiment(dim = 2048,
                            cellsPerColumn=1,
                            num_active = 40,
                            activationThreshold=16,
                            initialPermanence=0.8,
                            connectedPermanence=0.50,
                            minThreshold=16,
                            maxNewSynapseCount=20,
                            permanenceIncrement=0.05,
                            permanenceDecrement=0.00,
                            predictedSegmentDecrement=0.000,
                            maxSegmentsPerCell=255,
                            maxSynapsesPerSegment=255,
                            seed=42,
                            num_samples = 1,
                            num_trials = 1000,
                            sequence_length = 20,
                            training_iters = 1,
                            automatic_threshold = False,
                            noise_range = range(0, 100, 5)):

  """
  Run an experiment tracking the performance of the temporal memory given
  noise.  The number of active cells and the dimensions of the TM are
  fixed. We track performance by comparing the cells predicted to be
  active with the cells actually active in the sequence without noise at
  every timestep, and averaging across timesteps. Three metrics are used,
  correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard
  index) and cosine similarity (using scipy.spatial.distance.cosine). The
  Jaccard set similarity is the canonical metric used in the paper, but
  all three metrics tend to produce very similar results.

  Typically, this experiment is run to test the influence of activation
  threshold on noise tolerance, with multiple different thresholds tested.
  However, this experiment could also be used to examine the influence of
  factors such as sparsity and sequence length.

  Output is written to tm_noise_{threshold}}.txt, including sample size.

  We used three different activation threshold settings, 8, 12 and 16, mirroring
  the parameters used in the Poirazi neuron model experiment.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)

  for noise in noise_range:
    print noise
    for trial in range(num_trials):
      tm = TM(columnDimensions=(dim,),
          cellsPerColumn=cellsPerColumn,
          activationThreshold=activationThreshold,
          initialPermanence=initialPermanence,
          connectedPermanence=connectedPermanence,
          minThreshold=minThreshold,
          maxNewSynapseCount=maxNewSynapseCount,
          permanenceIncrement=permanenceIncrement,
          permanenceDecrement=permanenceDecrement,
          predictedSegmentDecrement=predictedSegmentDecrement,
          maxSegmentsPerCell=maxSegmentsPerCell,
          maxSynapsesPerSegment=maxSynapsesPerSegment,
          )#seed=seed)

      datapoints = []
      canonical_active_cells = []

      for sample in range(num_samples):
        data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length)
        datapoints.append(data)
        for i in range(training_iters):
          for j in range(data.nRows()):
            activeColumns = set(data.rowNonZeros(j)[0])
            tm.compute(activeColumns, learn = True)
          tm.reset()

        current_active_cells = []
        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = True)
          current_active_cells.append(tm.getActiveCells())
        canonical_active_cells.append(current_active_cells)
        tm.reset()

      # Now that the TM has been trained, check its performance on each sequence with noise added.
      correlations = []
      similarities = []
      csims = []
      for datapoint, active_cells in zip(datapoints, canonical_active_cells):
        data = copy.deepcopy(datapoint)
        apply_noise(data, noise)

        predicted_cells = []

        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = False)
          predicted_cells.append(tm.getPredictiveCells())

        similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])]
        dense_predicted_cells = convert_cell_lists_to_dense(2048*32, predicted_cells[:-1])
        dense_active_cells = convert_cell_lists_to_dense(2048*32, active_cells[1:])
        correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)]
        csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

        correlation = numpy.nan_to_num(correlation)
        csim = numpy.nan_to_num(csim)
        correlations.append(numpy.mean(correlation))
        similarities.append(numpy.mean(similarity))
        csims.append(numpy.mean(csim))

    correlation = numpy.mean(correlations)
    similarity = numpy.mean(similarities)
    csim = numpy.mean(csims)
    with open("tm_noise_{}.txt".format(activationThreshold), "a") as f:
      f.write(str(noise)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_trials) + "\n")
Beispiel #8
0
def run_tm_union_experiment(dim = 4000,
                            cellsPerColumn=1,
                            num_active = 40,
                            activationThreshold=5,
                            initialPermanence=0.8,
                            connectedPermanence=0.50,
                            minThreshold=5,
                            maxNewSynapseCount=20,
                            permanenceIncrement=0.05,
                            permanenceDecrement=0.00,
                            predictedSegmentDecrement=0.000,
                            maxSegmentsPerCell=255,
                            maxSynapsesPerSegment=255,
                            seed=42,
                            num_branches_range = range(50, 51, 1),
                            onset_length = 5,
                            training_iters = 10,
                            num_trials = 10000,
                            automatic_threshold = True,
                            save_results = True):
  """
  Run an experiment tracking the performance of the temporal memory given
  different input dimensions.  The number of active cells is kept fixed, so we
  are in effect varying the sparsity of the input.   We track performance by
  comparing the cells predicted to be active with the cells actually active in
  the sequence without noise at every timestep, and averaging across timesteps.
  Three metrics are used, correlation (Pearson's r, by numpy.corrcoef),
  set similarity (Jaccard index) and cosine similarity (using
  scipy.spatial.distance.cosine).  The Jaccard set similarity is the
  canonical metric used in the paper, but all three tend to produce very similar
  results.

  Output is written to tm_dim_{num_active}.txt, including sample size.

  We tested two different dimension settings, 2000 and 4000.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)

  for num_branches in num_branches_range:
    overlaps = []
    surprises = []
    csims = []
    for trial in range(num_trials):
      if (trial + 1) % 100 == 0:
        print trial + 1
      tm = TM(columnDimensions=(dim,),
              cellsPerColumn=cellsPerColumn,
              activationThreshold=activationThreshold,
              initialPermanence=initialPermanence,
              connectedPermanence=connectedPermanence,
              minThreshold=minThreshold,
              maxNewSynapseCount=maxNewSynapseCount,
              permanenceIncrement=permanenceIncrement,
              permanenceDecrement=permanenceDecrement,
              predictedSegmentDecrement=predictedSegmentDecrement,
              maxSegmentsPerCell=maxSegmentsPerCell,
              maxSynapsesPerSegment=maxSynapsesPerSegment,
              seed=seed)

      datapoints = []
      canonical_active_cells = []
      onset = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = onset_length)

      for branch in range(num_branches):
        datapoint = numpy.random.choice(dim, num_active, replace = False)
        datapoints.append(datapoint)
        for i in range(training_iters):
          for j in range(onset.nRows()):
            activeColumns = set(onset.rowNonZeros(j)[0])
            tm.compute(activeColumns, learn = True)
          tm.compute(datapoint, learn=True)
          tm.reset()

      for j in range(onset.nRows()):
        activeColumns = set(onset.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = False)
      predicted_cells = tm.getPredictiveCells()

      datapoint = numpy.random.choice(dim, num_active, replace = False)
      overlap = (1. * len(set(predicted_cells) & set(datapoint)))/len(datapoint)
      surprise = len(datapoint) - len(set(predicted_cells) & set(datapoint))
      dense_predicted_cells = numpy.zeros((dim*cellsPerColumn,))
      for cell in predicted_cells:
        dense_predicted_cells[cell] = 1.
      dense_active_cells = numpy.zeros((dim*cellsPerColumn,))
      for cell in datapoint:
        dense_active_cells[cell] = 1.
      csim = 1 - cosine(dense_predicted_cells, dense_active_cells)
      csim = numpy.nan_to_num(csim)
      overlaps.append(overlap)
      surprises.append(surprise)
      csims.append(csim)

    overlap = numpy.mean(overlaps)
    surprise = numpy.mean(surprises)
    csim = numpy.mean(csims)
    print dim, overlap, surprise, csim
    if save_results:
      with open("tm_union_n{}_a{}_c{}.txt".format(dim, num_active, cellsPerColumn), "a") as f:
        f.write(str(num_branches)+", " + str(overlap) + ", " + str(surprise) + ", " + str(csim) + ", " + str(num_trials) + "\n")
Beispiel #9
0
def runHotgym(numRecords):
    with open(_PARAMS_PATH, "r") as f:
        modelParams = yaml.safe_load(f)["modelParams"]
        enParams = modelParams["sensorParams"]["encoders"]
        spParams = modelParams["spParams"]
        tmParams = modelParams["tmParams"]

    # timeOfDayEncoder = DateEncoder(
    #   timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
    # weekendEncoder = DateEncoder(
    #   weekend=enParams["timestamp_weekend"]["weekend"])
    # scalarEncoder = RandomDistributedScalarEncoder(
    #   enParams["consumption"]["resolution"])

    rdseParams = RDSE_Parameters()
    rdseParams.size = 100
    rdseParams.sparsity = .10
    rdseParams.radius = 10
    scalarEncoder = RDSE(rdseParams)

    # encodingWidth = (timeOfDayEncoder.getWidth()
    #                  + weekendEncoder.getWidth()
    #                  + scalarEncoder.getWidth())

    encodingWidth = scalarEncoder.size

    sp = SpatialPooler(
        inputDimensions=(encodingWidth, ),
        columnDimensions=(spParams["columnCount"], ),
        potentialPct=spParams["potentialPct"],
        potentialRadius=encodingWidth,
        globalInhibition=spParams["globalInhibition"],
        localAreaDensity=spParams["localAreaDensity"],
        numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
        synPermInactiveDec=spParams["synPermInactiveDec"],
        synPermActiveInc=spParams["synPermActiveInc"],
        synPermConnected=spParams["synPermConnected"],
        boostStrength=spParams["boostStrength"],
        seed=spParams["seed"],
        wrapAround=True)

    tm = TemporalMemory(
        columnDimensions=(tmParams["columnCount"], ),
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=spParams["synPermConnected"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
        seed=tmParams["seed"])

    results = []
    with open(_INPUT_FILE_PATH, "r") as fin:
        reader = csv.reader(fin)
        headers = next(reader)
        next(reader)
        next(reader)

        for count, record in enumerate(reader):

            if count >= numRecords: break

            # Convert data string into Python date object.
            dateString = datetime.datetime.strptime(record[0],
                                                    "%m/%d/%y %H:%M")
            # Convert data value string into float.
            consumption = float(record[1])

            # To encode, we need to provide zero-filled numpy arrays for the encoders
            # to populate.
            # timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
            # weekendBits = numpy.zeros(weekendEncoder.getWidth())
            # consumptionBits = numpy.zeros(scalarEncoder.size)
            consumptionBits = SDR(scalarEncoder.size)

            # Now we call the encoders to create bit representations for each value.
            # timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
            # weekendEncoder.encodeIntoArray(dateString, weekendBits)
            scalarEncoder.encode(consumption, consumptionBits)

            # Concatenate all these encodings into one large encoding for Spatial
            # Pooling.
            # encoding = numpy.concatenate(
            #   [timeOfDayBits, weekendBits, consumptionBits]
            # )
            encoding = consumptionBits

            # Create an array to represent active columns, all initially zero. This
            # will be populated by the compute method below. It must have the same
            # dimensions as the Spatial Pooler.
            # activeColumns = numpy.zeros(spParams["columnCount"])
            activeColumns = SDR(spParams["columnCount"])

            encodingIn = numpy.uint32(encoding.dense)
            minicolumnsOut = numpy.uint32(activeColumns.dense)
            # Execute Spatial Pooling algorithm over input space.
            sp.compute(encodingIn, True, minicolumnsOut)
            activeColumnIndices = numpy.nonzero(minicolumnsOut)[0]

            # Execute Temporal Memory algorithm over active mini-columns.
            tm.compute(activeColumnIndices, learn=True)

            activeCells = tm.getActiveCells()
            print(len(activeCells))
            results.append(activeCells)

        return results
Beispiel #10
0
  cellsPerColumn=8,
  initialPermanence=0.21,
  connectedPermanence=0.3,
  minThreshold=15,
  maxNewSynapseCount=40,
  permanenceIncrement=0.1,
  permanenceDecrement=0.1,
  activationThreshold=15,
  predictedSegmentDecrement=0.01,
  )

for t in range(75):
  rnd = random.randrange(2)
  for k in range(4):
    if rnd == 0:
      tm.compute(set(seq1[k][:].nonzero()[0].tolist()), learn=True)
    else:
      tm.compute(set(seq2[k][:].nonzero()[0].tolist()), learn=True)

print ""
print "-"*50
print "We now have a look at the output of the TM when presented with the individual"
print "characters A, B, C, D, X, and Y. We might observe simultaneous predictions when"
print "presented with character D (predicting A and X), character Y (predicting A and X),"
print "and when presented with character C (predicting D and Y)."
print "N.B. Due to the stochasticity of this script, we might not observe simultaneous"
print "predictions in *all* the aforementioned characters."
print "-"*50
print ""

showPredictions()
Beispiel #11
0
def run_tm_dim_experiment(test_dims = range(300, 3100, 100),
                          cellsPerColumn=1,
                          num_active = 256,
                          activationThreshold=10,
                          initialPermanence=0.8,
                          connectedPermanence=0.50,
                          minThreshold=10,
                          maxNewSynapseCount=20,
                          permanenceIncrement=0.05,
                          permanenceDecrement=0.00,
                          predictedSegmentDecrement=0.000,
                          maxSegmentsPerCell=4000,
                          maxSynapsesPerSegment=255,
                          seed=42,
                          num_samples = 1000,
                          sequence_length = 20,
                          training_iters = 1,
                          automatic_threshold = False,
                          save_results = True):
  """
  Run an experiment tracking the performance of the temporal memory given
  different input dimensions.  The number of active cells is kept fixed, so we
  are in effect varying the sparsity of the input.   We track performance by
  comparing the cells predicted to be active with the cells actually active in
  the sequence without noise at every timestep, and averaging across timesteps.
  Three metrics are used, correlation (Pearson's r, by numpy.corrcoef),
  set similarity (Jaccard index) and cosine similarity (using
  scipy.spatial.distance.cosine).  The Jaccard set similarity is the
  canonical metric used in the paper, but all three tend to produce very similar
  results.

  Output is written to tm_dim_{num_active}.txt, including sample size.

  In our experiments, we used the set similarity metric (third column in output)
  along with three different values for num_active, 64, 128 and 256.  We used
  dimensions from 300 to 2900 in each case, testing every 100.  1000 sequences
  of length 20 were passed to the TM in each trial.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)
    print "Using activation threshold {}".format(activationThreshold)

  for dim in test_dims:
    tm = TM(columnDimensions=(dim,),
            cellsPerColumn=cellsPerColumn,
            activationThreshold=activationThreshold,
            initialPermanence=initialPermanence,
            connectedPermanence=connectedPermanence,
            minThreshold=minThreshold,
            maxNewSynapseCount=maxNewSynapseCount,
            permanenceIncrement=permanenceIncrement,
            permanenceDecrement=permanenceDecrement,
            predictedSegmentDecrement=predictedSegmentDecrement,
            maxSegmentsPerCell=maxSegmentsPerCell,
            maxSynapsesPerSegment=maxSynapsesPerSegment,
            seed=seed)

    tm.setMinThreshold(1000)

    datapoints = []
    canonical_active_cells = []

    for sample in range(num_samples):
      if (sample + 1) % 10 == 0:
        print sample + 1
      data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length)
      datapoints.append(data)
      for i in range(training_iters):
        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = True)
        tm.reset()

      current_active_cells = []
      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = True)
        current_active_cells.append(tm.getActiveCells())
      canonical_active_cells.append(current_active_cells)
      tm.reset()

    # Now that the TM has been trained, check its performance on each sequence with noise added.
    correlations = []
    similarities = []
    csims = []
    for datapoint, active_cells in zip(datapoints, canonical_active_cells):
      data = copy.deepcopy(datapoint)
      predicted_cells = []

      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = False)
        predicted_cells.append(tm.getPredictiveCells())
      tm.reset()

      similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])]
      dense_predicted_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, predicted_cells[:-1])
      dense_active_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, active_cells[1:])

      correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      correlation = numpy.nan_to_num(correlation)
      csim = numpy.nan_to_num(csim)
      correlations.append(numpy.mean(correlation))
      similarities.append(numpy.mean(similarity))
      csims.append(numpy.mean(csim))



    correlation = numpy.mean(correlations)
    similarity = numpy.mean(similarities)
    csim = numpy.mean(csims)
    print dim, correlation, similarity, csim
    if save_results:
        with open("tm_dim_{}.txt".format(num_active), "a") as f:
          f.write(str(dim)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_samples) + "\n")
                               activationThreshold=15,
                               maxNewSynapseCount=20)

        print
        print "Training TM on sequences ... "
        numRepeatsBatch = 1
        numRptsPerSequence = 1

        np.random.seed(10)
        for rpt in xrange(numRepeatsBatch):
            # randomize the order of training sequences
            randomIdx = np.random.permutation(range(numTrain))
            for i in range(numTrain):
                for _ in xrange(numRptsPerSequence):
                    for t in range(sequenceLength):
                        tm.compute(activeColumnsTrain[randomIdx[i]][t],
                                   learn=True)
                    tm.reset()
                print "Rpt: {}, {} out of {} done ".format(
                    rpt, i, trainData.shape[0])

        # run TM over training data
        unionLength = 20
        print "Running TM on Training Data with union window {}".format(
            unionLength)
        (activeColTrain, activeCellsTrain, activeFreqTrain,
         predActiveFreqTrain) = runTMOverDatasetFast(tm, activeColumnsTrain,
                                                     unionLength)

        # construct two distance matrices using training data
        distMatColumnTrain = calculateDistanceMat(activeColTrain,
                                                  activeColTrain)
Beispiel #13
0
for i, s_id in enumerate(seqs_train):
    s = uniqueSequences[s_id]
    #s = s[0:-1]
    #SP_SDR_train = numpy.zeros((seqLength,200))
    SP_SDR_train = numpy.zeros((seqLength, 6))
    SP_SDR_train = SP_SDR_train.astype(numpy.uint32)
    for j, symbol in enumerate(s):
        #print "symbol=", symbol
        #print "j=", j
        SP_SDR_train[j] = SDR_activity_codes[symbol]
    SP_SDR_seqs_train.append(SP_SDR_train)

    # now train temporal memory
    for j, SDR in enumerate(SP_SDR_train):
        tp.compute(SDR, learn=True)

    # how do we reset the tp ?
    tp.reset()


# Now, check predictions of tp
def printTemoralPredictions(SP_activeCol_seq, symbol_seq):
    for i, s in enumerate(SP_activeCol_seq):
        tp.compute(s, learn=False)
        print "TP Winner cells", tp.getWinnerCells()
        print "TP Predictive cells", tp.getPredictiveCells()
    tp.reset()
    print "\n"

Beispiel #14
0
class BaseNetwork(object):
  def __init__(self, inputMin=None, inputMax=None, runSanity=False):

    self.inputMin = inputMin
    self.inputMax = inputMax
    self.runSanity = runSanity

    self.encoder = None
    self.encoderOutput = None
    self.sp = None
    self.spOutput = None
    self.spOutputNZ = None
    self.tm = None
    self.anomalyScore = None
    if runSanity:
      self.sanity = None

    self.defaultEncoderResolution = 0.0001
    self.numColumns = 2048
    self.cellsPerColumn = 32

    self.predictedActiveCells = None
    self.previouslyPredictiveCells = None


  def initialize(self):

    # Scalar Encoder
    resolution = self.getEncoderResolution()
    self.encoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32)

    # Spatial Pooler
    spInputWidth = self.encoder.getWidth()
    self.spParams = {
      "globalInhibition": True,
      "columnDimensions": [self.numColumns],
      "inputDimensions": [spInputWidth],
      "potentialRadius": spInputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    }
    self.sp = SpatialPooler(**self.spParams)
    self.spOutput = np.zeros(self.numColumns, dtype=np.uint32)

    # Temporal Memory
    self.tmParams = {
      "activationThreshold": 20,
      "cellsPerColumn": self.cellsPerColumn,
      "columnDimensions": (self.numColumns,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    }
    self.tm = TemporalMemory(**self.tmParams)

    # Sanity
    if self.runSanity:
      self.sanity = sanity.SPTMInstance(self.sp, self.tm)


  def handleRecord(self, scalarValue, label=None, skipEncoding=False,
                   learningMode=True):
    """Process one record."""

    if self.runSanity:
      self.sanity.waitForUserContinue()

    # Encode the input data record if it hasn't already been encoded.
    if not skipEncoding:
      self.encodeValue(scalarValue)

    # Run the encoded data through the spatial pooler
    self.sp.compute(self.encoderOutput, learningMode, self.spOutput)
    self.spOutputNZ = self.spOutput.nonzero()[0]

    # WARNING: this needs to happen here, before the TM runs.
    self.previouslyPredictiveCells = self.tm.getPredictiveCells()

    # Run SP output through temporal memory
    self.tm.compute(self.spOutputNZ)
    self.predictedActiveCells = _computePredictedActiveCells(
      self.tm.getActiveCells(), self.previouslyPredictiveCells)

    # Anomaly score
    self.anomalyScore = _computeAnomalyScore(self.spOutputNZ,
                                             self.previouslyPredictiveCells,
                                             self.cellsPerColumn)

    # Run Sanity
    if self.runSanity:
      self.sanity.appendTimestep(self.getEncoderOutputNZ(),
                                 self.getSpOutputNZ(),
                                 self.previouslyPredictiveCells,
                                 {
                                   'value': scalarValue,
                                   'label':label
                                   })


  def encodeValue(self, scalarValue):
    self.encoder.encodeIntoArray(scalarValue, self.encoderOutput)


  def getEncoderResolution(self):
    """
    Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's 
    calculated from the data min and max, specific to the data stream.
    """
    if self.inputMin is None or self.inputMax is None:
      return self.defaultEncoderResolution
    else:
      rangePadding = abs(self.inputMax - self.inputMin) * 0.2
      minVal = self.inputMin - rangePadding
      maxVal = (self.inputMax + rangePadding
                if self.inputMin != self.inputMax
                else self.inputMin + 1)
      numBuckets = 130.0
      return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets)


  def getEncoderOutputNZ(self):
    return self.encoderOutput.nonzero()[0]


  def getSpOutputNZ(self):
    return self.spOutputNZ


  def getTmPredictiveCellsNZ(self):
    return self.tm.getPredictiveCells()


  def getTmActiveCellsNZ(self):
    return self.tm.getActiveCells()


  def getTmPredictedActiveCellsNZ(self):
    return self.predictedActiveCells


  def getRawAnomalyScore(self):
    return self.anomalyScore
def run_tm_noise_experiment(dim=2048,
                            cellsPerColumn=1,
                            num_active=40,
                            activationThreshold=16,
                            initialPermanence=0.8,
                            connectedPermanence=0.50,
                            minThreshold=16,
                            maxNewSynapseCount=20,
                            permanenceIncrement=0.05,
                            permanenceDecrement=0.00,
                            predictedSegmentDecrement=0.000,
                            maxSegmentsPerCell=255,
                            maxSynapsesPerSegment=255,
                            seed=42,
                            num_samples=1,
                            num_trials=1000,
                            sequence_length=20,
                            training_iters=1,
                            automatic_threshold=False,
                            noise_range=range(0, 100, 5)):
    """
  Run an experiment tracking the performance of the temporal memory given
  noise.  The number of active cells and the dimensions of the TM are
  fixed. We track performance by comparing the cells predicted to be
  active with the cells actually active in the sequence without noise at
  every timestep, and averaging across timesteps. Three metrics are used,
  correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard
  index) and cosine similarity (using scipy.spatial.distance.cosine). The
  Jaccard set similarity is the canonical metric used in the paper, but
  all three metrics tend to produce very similar results.

  Typically, this experiment is run to test the influence of activation
  threshold on noise tolerance, with multiple different thresholds tested.
  However, this experiment could also be used to examine the influence of
  factors such as sparsity and sequence length.

  Output is written to tm_noise_{threshold}}.txt, including sample size.

  We used three different activation threshold settings, 8, 12 and 16, mirroring
  the parameters used in the Poirazi neuron model experiment.
  """
    if automatic_threshold:
        activationThreshold = min(num_active / 2, maxNewSynapseCount / 2)
        minThreshold = min(num_active / 2, maxNewSynapseCount / 2)

    for noise in noise_range:
        print noise
        for trial in range(num_trials):
            tm = TM(
                columnDimensions=(dim, ),
                cellsPerColumn=cellsPerColumn,
                activationThreshold=activationThreshold,
                initialPermanence=initialPermanence,
                connectedPermanence=connectedPermanence,
                minThreshold=minThreshold,
                maxNewSynapseCount=maxNewSynapseCount,
                permanenceIncrement=permanenceIncrement,
                permanenceDecrement=permanenceDecrement,
                predictedSegmentDecrement=predictedSegmentDecrement,
                maxSegmentsPerCell=maxSegmentsPerCell,
                maxSynapsesPerSegment=maxSynapsesPerSegment,
            )  #seed=seed)

            datapoints = []
            canonical_active_cells = []

            for sample in range(num_samples):
                data = generate_evenly_distributed_data_sparse(
                    dim=dim,
                    num_active=num_active,
                    num_samples=sequence_length)
                datapoints.append(data)
                for i in range(training_iters):
                    for j in range(data.nRows()):
                        activeColumns = set(data.rowNonZeros(j)[0])
                        tm.compute(activeColumns, learn=True)
                    tm.reset()

                current_active_cells = []
                for j in range(data.nRows()):
                    activeColumns = set(data.rowNonZeros(j)[0])
                    tm.compute(activeColumns, learn=True)
                    current_active_cells.append(tm.getActiveCells())
                canonical_active_cells.append(current_active_cells)
                tm.reset()

            # Now that the TM has been trained, check its performance on each sequence with noise added.
            correlations = []
            similarities = []
            csims = []
            for datapoint, active_cells in zip(datapoints,
                                               canonical_active_cells):
                data = copy.deepcopy(datapoint)
                apply_noise(data, noise)

                predicted_cells = []

                for j in range(data.nRows()):
                    activeColumns = set(data.rowNonZeros(j)[0])
                    tm.compute(activeColumns, learn=False)
                    predicted_cells.append(tm.getPredictiveCells())

                similarity = [(0. + len(set(predicted) & set(active))) / len(
                    (set(predicted) | set(active)))
                              for predicted, active in zip(
                                  predicted_cells[:-1], active_cells[1:])]
                dense_predicted_cells = convert_cell_lists_to_dense(
                    2048 * 32, predicted_cells[:-1])
                dense_active_cells = convert_cell_lists_to_dense(
                    2048 * 32, active_cells[1:])
                correlation = [
                    numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1]
                    for predicted, active in zip(dense_predicted_cells,
                                                 dense_active_cells)
                ]
                csim = [
                    1 - cosine(predicted, active) for predicted, active in zip(
                        dense_predicted_cells, dense_active_cells)
                ]

                correlation = numpy.nan_to_num(correlation)
                csim = numpy.nan_to_num(csim)
                correlations.append(numpy.mean(correlation))
                similarities.append(numpy.mean(similarity))
                csims.append(numpy.mean(csim))

        correlation = numpy.mean(correlations)
        similarity = numpy.mean(similarities)
        csim = numpy.mean(csims)
        with open("tm_noise_{}.txt".format(activationThreshold), "a") as f:
            f.write(
                str(noise) + ", " + str(correlation) + ", " + str(similarity) +
                ", " + str(csim) + ", " + str(num_trials) + "\n")
Beispiel #16
0
class BaseNetwork(object):
  def __init__(self, inputMin=None, inputMax=None, runSanity=False):

    self.inputMin = inputMin
    self.inputMax = inputMax
    self.runSanity = runSanity

    self.encoder = None
    self.encoderOutput = None
    self.sp = None
    self.spOutput = None
    self.spOutputNZ = None
    self.tm = None
    self.anomalyScore = None
    if runSanity:
      self.sanity = None

    self.defaultEncoderResolution = 0.0001
    self.numColumns = 2048
    self.cellsPerColumn = 32

    self.predictedActiveCells = None
    self.previouslyPredictiveCells = None


  def initialize(self):

    # Scalar Encoder
    resolution = self.getEncoderResolution()
    self.encoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32)

    # Spatial Pooler
    spInputWidth = self.encoder.getWidth()
    self.spParams = {
      "globalInhibition": True,
      "columnDimensions": [self.numColumns],
      "inputDimensions": [spInputWidth],
      "potentialRadius": spInputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 5.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    }
    self.sp = SpatialPooler(**self.spParams)
    self.spOutput = np.zeros(self.numColumns, dtype=np.uint32)

    # Temporal Memory
    self.tmParams = {
      "activationThreshold": 20,
      "cellsPerColumn": self.cellsPerColumn,
      "columnDimensions": (self.numColumns,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    }
    self.tm = TemporalMemory(**self.tmParams)

    # Sanity
    if self.runSanity:
      self.sanity = sanity.SPTMInstance(self.sp, self.tm)


  def handleRecord(self, scalarValue, label=None, skipEncoding=False,
                   learningMode=True):
    """Process one record."""

    if self.runSanity:
      self.sanity.waitForUserContinue()

    # Encode the input data record if it hasn't already been encoded.
    if not skipEncoding:
      self.encodeValue(scalarValue)

    # Run the encoded data through the spatial pooler
    self.sp.compute(self.encoderOutput, learningMode, self.spOutput)
    self.spOutputNZ = self.spOutput.nonzero()[0]

    # WARNING: this needs to happen here, before the TM runs.
    self.previouslyPredictiveCells = self.tm.getPredictiveCells()

    # Run SP output through temporal memory
    self.tm.compute(self.spOutputNZ)
    self.predictedActiveCells = _computePredictedActiveCells(
      self.tm.getActiveCells(), self.previouslyPredictiveCells)

    # Anomaly score
    self.anomalyScore = _computeAnomalyScore(self.spOutputNZ,
                                             self.previouslyPredictiveCells,
                                             self.cellsPerColumn)

    # Run Sanity
    if self.runSanity:
      self.sanity.appendTimestep(self.getEncoderOutputNZ(),
                                 self.getSpOutputNZ(),
                                 self.previouslyPredictiveCells,
                                 {
                                   'value': scalarValue,
                                   'label':label
                                   })


  def encodeValue(self, scalarValue):
    self.encoder.encodeIntoArray(scalarValue, self.encoderOutput)


  def getEncoderResolution(self):
    """
    Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's 
    calculated from the data min and max, specific to the data stream.
    """
    if self.inputMin is None or self.inputMax is None:
      return self.defaultEncoderResolution
    else:
      rangePadding = abs(self.inputMax - self.inputMin) * 0.2
      minVal = self.inputMin - rangePadding
      maxVal = (self.inputMax + rangePadding
                if self.inputMin != self.inputMax
                else self.inputMin + 1)
      numBuckets = 130.0
      return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets)


  def getEncoderOutputNZ(self):
    return self.encoderOutput.nonzero()[0]


  def getSpOutputNZ(self):
    return self.spOutputNZ


  def getTmPredictiveCellsNZ(self):
    return self.tm.getPredictiveCells()


  def getTmActiveCellsNZ(self):
    return self.tm.getActiveCells()


  def getTmPredictedActiveCellsNZ(self):
    return self.predictedActiveCells


  def getRawAnomalyScore(self):
    return self.anomalyScore
def run_tm_dim_experiment(test_dims = range(300, 3100, 100),
                          cellsPerColumn=1,
                          num_active = 256,
                          activationThreshold=10,
                          initialPermanence=0.8,
                          connectedPermanence=0.50,
                          minThreshold=10,
                          maxNewSynapseCount=20,
                          permanenceIncrement=0.05,
                          permanenceDecrement=0.00,
                          predictedSegmentDecrement=0.000,
                          maxSegmentsPerCell=4000,
                          maxSynapsesPerSegment=255,
                          seed=42,
                          num_samples = 1000,
                          sequence_length = 20,
                          training_iters = 1,
                          automatic_threshold = False,
                          save_results = True):
  """
  Run an experiment tracking the performance of the temporal memory given
  different input dimensions.  The number of active cells is kept fixed, so we
  are in effect varying the sparsity of the input.   We track performance by
  comparing the cells predicted to be active with the cells actually active in
  the sequence without noise at every timestep, and averaging across timesteps.
  Three metrics are used, correlation (Pearson's r, by numpy.corrcoef),
  set similarity (Jaccard index) and cosine similarity (using
  scipy.spatial.distance.cosine).  The Jaccard set similarity is the
  canonical metric used in the paper, but all three tend to produce very similar
  results.

  Output is written to tm_dim_{num_active}.txt, including sample size.

  In our experiments, we used the set similarity metric (third column in output)
  along with three different values for num_active, 64, 128 and 256.  We used
  dimensions from 300 to 2900 in each case, testing every 100.  1000 sequences
  of length 20 were passed to the TM in each trial.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)
    print "Using activation threshold {}".format(activationThreshold)

  for dim in test_dims:
    tm = TM(columnDimensions=(dim,),
            cellsPerColumn=cellsPerColumn,
            activationThreshold=activationThreshold,
            initialPermanence=initialPermanence,
            connectedPermanence=connectedPermanence,
            minThreshold=minThreshold,
            maxNewSynapseCount=maxNewSynapseCount,
            permanenceIncrement=permanenceIncrement,
            permanenceDecrement=permanenceDecrement,
            predictedSegmentDecrement=predictedSegmentDecrement,
            maxSegmentsPerCell=maxSegmentsPerCell,
            maxSynapsesPerSegment=maxSynapsesPerSegment,
            seed=seed)

    tm.setMinThreshold(1000)

    datapoints = []
    canonical_active_cells = []

    for sample in range(num_samples):
      if (sample + 1) % 10 == 0:
        print sample + 1
      data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length)
      datapoints.append(data)
      for i in range(training_iters):
        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = True)
        tm.reset()

      current_active_cells = []
      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = True)
        current_active_cells.append(tm.getActiveCells())
      canonical_active_cells.append(current_active_cells)
      tm.reset()

    # Now that the TM has been trained, check its performance on each sequence with noise added.
    correlations = []
    similarities = []
    csims = []
    for datapoint, active_cells in zip(datapoints, canonical_active_cells):
      data = copy.deepcopy(datapoint)
      predicted_cells = []

      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = False)
        predicted_cells.append(tm.getPredictiveCells())
      tm.reset()

      similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])]
      dense_predicted_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, predicted_cells[:-1])
      dense_active_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, active_cells[1:])

      correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      correlation = numpy.nan_to_num(correlation)
      csim = numpy.nan_to_num(csim)
      correlations.append(numpy.mean(correlation))
      similarities.append(numpy.mean(similarity))
      csims.append(numpy.mean(csim))



    correlation = numpy.mean(correlations)
    similarity = numpy.mean(similarities)
    csim = numpy.mean(csims)
    print dim, correlation, similarity, csim
    if save_results:
        with open("tm_dim_{}.txt".format(num_active), "a") as f:
          f.write(str(dim)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_samples) + "\n")