def run_tm_noise_experiment(dim = 2048,
                            cellsPerColumn=1,
                            num_active = 40,
                            activationThreshold=16,
                            initialPermanence=0.8,
                            connectedPermanence=0.50,
                            minThreshold=16,
                            maxNewSynapseCount=20,
                            permanenceIncrement=0.05,
                            permanenceDecrement=0.00,
                            predictedSegmentDecrement=0.000,
                            maxSegmentsPerCell=255,
                            maxSynapsesPerSegment=255,
                            seed=42,
                            num_samples = 1,
                            num_trials = 1000,
                            sequence_length = 20,
                            training_iters = 1,
                            automatic_threshold = False,
                            noise_range = range(0, 100, 5)):

  """
  Run an experiment tracking the performance of the temporal memory given
  noise.  The number of active cells and the dimensions of the TM are
  fixed. We track performance by comparing the cells predicted to be
  active with the cells actually active in the sequence without noise at
  every timestep, and averaging across timesteps. Three metrics are used,
  correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard
  index) and cosine similarity (using scipy.spatial.distance.cosine). The
  Jaccard set similarity is the canonical metric used in the paper, but
  all three metrics tend to produce very similar results.

  Typically, this experiment is run to test the influence of activation
  threshold on noise tolerance, with multiple different thresholds tested.
  However, this experiment could also be used to examine the influence of
  factors such as sparsity and sequence length.

  Output is written to tm_noise_{threshold}}.txt, including sample size.

  We used three different activation threshold settings, 8, 12 and 16, mirroring
  the parameters used in the Poirazi neuron model experiment.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)

  for noise in noise_range:
    print noise
    for trial in range(num_trials):
      tm = TM(columnDimensions=(dim,),
          cellsPerColumn=cellsPerColumn,
          activationThreshold=activationThreshold,
          initialPermanence=initialPermanence,
          connectedPermanence=connectedPermanence,
          minThreshold=minThreshold,
          maxNewSynapseCount=maxNewSynapseCount,
          permanenceIncrement=permanenceIncrement,
          permanenceDecrement=permanenceDecrement,
          predictedSegmentDecrement=predictedSegmentDecrement,
          maxSegmentsPerCell=maxSegmentsPerCell,
          maxSynapsesPerSegment=maxSynapsesPerSegment,
          )#seed=seed)

      datapoints = []
      canonical_active_cells = []

      for sample in range(num_samples):
        data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length)
        datapoints.append(data)
        for i in range(training_iters):
          for j in range(data.nRows()):
            activeColumns = set(data.rowNonZeros(j)[0])
            tm.compute(activeColumns, learn = True)
          tm.reset()

        current_active_cells = []
        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = True)
          current_active_cells.append(tm.getActiveCells())
        canonical_active_cells.append(current_active_cells)
        tm.reset()

      # Now that the TM has been trained, check its performance on each sequence with noise added.
      correlations = []
      similarities = []
      csims = []
      for datapoint, active_cells in zip(datapoints, canonical_active_cells):
        data = copy.deepcopy(datapoint)
        apply_noise(data, noise)

        predicted_cells = []

        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = False)
          predicted_cells.append(tm.getPredictiveCells())

        similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])]
        dense_predicted_cells = convert_cell_lists_to_dense(2048*32, predicted_cells[:-1])
        dense_active_cells = convert_cell_lists_to_dense(2048*32, active_cells[1:])
        correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)]
        csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

        correlation = numpy.nan_to_num(correlation)
        csim = numpy.nan_to_num(csim)
        correlations.append(numpy.mean(correlation))
        similarities.append(numpy.mean(similarity))
        csims.append(numpy.mean(csim))

    correlation = numpy.mean(correlations)
    similarity = numpy.mean(similarities)
    csim = numpy.mean(csims)
    with open("tm_noise_{}.txt".format(activationThreshold), "a") as f:
      f.write(str(noise)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_trials) + "\n")
Beispiel #2
0
class BaseNetwork(object):
  def __init__(self, inputMin=None, inputMax=None, runSanity=False):

    self.inputMin = inputMin
    self.inputMax = inputMax
    self.runSanity = runSanity

    self.encoder = None
    self.encoderOutput = None
    self.sp = None
    self.spOutput = None
    self.spOutputNZ = None
    self.tm = None
    self.anomalyScore = None
    if runSanity:
      self.sanity = None

    self.defaultEncoderResolution = 0.0001
    self.numColumns = 2048
    self.cellsPerColumn = 32

    self.predictedActiveCells = None
    self.previouslyPredictiveCells = None


  def initialize(self):

    # Scalar Encoder
    resolution = self.getEncoderResolution()
    self.encoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32)

    # Spatial Pooler
    spInputWidth = self.encoder.getWidth()
    self.spParams = {
      "globalInhibition": True,
      "columnDimensions": [self.numColumns],
      "inputDimensions": [spInputWidth],
      "potentialRadius": spInputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 5.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    }
    self.sp = SpatialPooler(**self.spParams)
    self.spOutput = np.zeros(self.numColumns, dtype=np.uint32)

    # Temporal Memory
    self.tmParams = {
      "activationThreshold": 20,
      "cellsPerColumn": self.cellsPerColumn,
      "columnDimensions": (self.numColumns,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    }
    self.tm = TemporalMemory(**self.tmParams)

    # Sanity
    if self.runSanity:
      self.sanity = sanity.SPTMInstance(self.sp, self.tm)


  def handleRecord(self, scalarValue, label=None, skipEncoding=False,
                   learningMode=True):
    """Process one record."""

    if self.runSanity:
      self.sanity.waitForUserContinue()

    # Encode the input data record if it hasn't already been encoded.
    if not skipEncoding:
      self.encodeValue(scalarValue)

    # Run the encoded data through the spatial pooler
    self.sp.compute(self.encoderOutput, learningMode, self.spOutput)
    self.spOutputNZ = self.spOutput.nonzero()[0]

    # WARNING: this needs to happen here, before the TM runs.
    self.previouslyPredictiveCells = self.tm.getPredictiveCells()

    # Run SP output through temporal memory
    self.tm.compute(self.spOutputNZ)
    self.predictedActiveCells = _computePredictedActiveCells(
      self.tm.getActiveCells(), self.previouslyPredictiveCells)

    # Anomaly score
    self.anomalyScore = _computeAnomalyScore(self.spOutputNZ,
                                             self.previouslyPredictiveCells,
                                             self.cellsPerColumn)

    # Run Sanity
    if self.runSanity:
      self.sanity.appendTimestep(self.getEncoderOutputNZ(),
                                 self.getSpOutputNZ(),
                                 self.previouslyPredictiveCells,
                                 {
                                   'value': scalarValue,
                                   'label':label
                                   })


  def encodeValue(self, scalarValue):
    self.encoder.encodeIntoArray(scalarValue, self.encoderOutput)


  def getEncoderResolution(self):
    """
    Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's 
    calculated from the data min and max, specific to the data stream.
    """
    if self.inputMin is None or self.inputMax is None:
      return self.defaultEncoderResolution
    else:
      rangePadding = abs(self.inputMax - self.inputMin) * 0.2
      minVal = self.inputMin - rangePadding
      maxVal = (self.inputMax + rangePadding
                if self.inputMin != self.inputMax
                else self.inputMin + 1)
      numBuckets = 130.0
      return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets)


  def getEncoderOutputNZ(self):
    return self.encoderOutput.nonzero()[0]


  def getSpOutputNZ(self):
    return self.spOutputNZ


  def getTmPredictiveCellsNZ(self):
    return self.tm.getPredictiveCells()


  def getTmActiveCellsNZ(self):
    return self.tm.getActiveCells()


  def getTmPredictedActiveCellsNZ(self):
    return self.predictedActiveCells


  def getRawAnomalyScore(self):
    return self.anomalyScore
Beispiel #3
0
def run_tm_dim_experiment(test_dims = range(300, 3100, 100),
                          cellsPerColumn=1,
                          num_active = 256,
                          activationThreshold=10,
                          initialPermanence=0.8,
                          connectedPermanence=0.50,
                          minThreshold=10,
                          maxNewSynapseCount=20,
                          permanenceIncrement=0.05,
                          permanenceDecrement=0.00,
                          predictedSegmentDecrement=0.000,
                          maxSegmentsPerCell=4000,
                          maxSynapsesPerSegment=255,
                          seed=42,
                          num_samples = 1000,
                          sequence_length = 20,
                          training_iters = 1,
                          automatic_threshold = False,
                          save_results = True):
  """
  Run an experiment tracking the performance of the temporal memory given
  different input dimensions.  The number of active cells is kept fixed, so we
  are in effect varying the sparsity of the input.   We track performance by
  comparing the cells predicted to be active with the cells actually active in
  the sequence without noise at every timestep, and averaging across timesteps.
  Three metrics are used, correlation (Pearson's r, by numpy.corrcoef),
  set similarity (Jaccard index) and cosine similarity (using
  scipy.spatial.distance.cosine).  The Jaccard set similarity is the
  canonical metric used in the paper, but all three tend to produce very similar
  results.

  Output is written to tm_dim_{num_active}.txt, including sample size.

  In our experiments, we used the set similarity metric (third column in output)
  along with three different values for num_active, 64, 128 and 256.  We used
  dimensions from 300 to 2900 in each case, testing every 100.  1000 sequences
  of length 20 were passed to the TM in each trial.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)
    print "Using activation threshold {}".format(activationThreshold)

  for dim in test_dims:
    tm = TM(columnDimensions=(dim,),
            cellsPerColumn=cellsPerColumn,
            activationThreshold=activationThreshold,
            initialPermanence=initialPermanence,
            connectedPermanence=connectedPermanence,
            minThreshold=minThreshold,
            maxNewSynapseCount=maxNewSynapseCount,
            permanenceIncrement=permanenceIncrement,
            permanenceDecrement=permanenceDecrement,
            predictedSegmentDecrement=predictedSegmentDecrement,
            maxSegmentsPerCell=maxSegmentsPerCell,
            maxSynapsesPerSegment=maxSynapsesPerSegment,
            seed=seed)

    tm.setMinThreshold(1000)

    datapoints = []
    canonical_active_cells = []

    for sample in range(num_samples):
      if (sample + 1) % 10 == 0:
        print sample + 1
      data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length)
      datapoints.append(data)
      for i in range(training_iters):
        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = True)
        tm.reset()

      current_active_cells = []
      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = True)
        current_active_cells.append(tm.getActiveCells())
      canonical_active_cells.append(current_active_cells)
      tm.reset()

    # Now that the TM has been trained, check its performance on each sequence with noise added.
    correlations = []
    similarities = []
    csims = []
    for datapoint, active_cells in zip(datapoints, canonical_active_cells):
      data = copy.deepcopy(datapoint)
      predicted_cells = []

      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = False)
        predicted_cells.append(tm.getPredictiveCells())
      tm.reset()

      similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])]
      dense_predicted_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, predicted_cells[:-1])
      dense_active_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, active_cells[1:])

      correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      correlation = numpy.nan_to_num(correlation)
      csim = numpy.nan_to_num(csim)
      correlations.append(numpy.mean(correlation))
      similarities.append(numpy.mean(similarity))
      csims.append(numpy.mean(csim))



    correlation = numpy.mean(correlations)
    similarity = numpy.mean(similarities)
    csim = numpy.mean(csims)
    print dim, correlation, similarity, csim
    if save_results:
        with open("tm_dim_{}.txt".format(num_active), "a") as f:
          f.write(str(dim)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_samples) + "\n")
Beispiel #4
0
class BaseNetwork(object):
  def __init__(self, inputMin=None, inputMax=None, runSanity=False):

    self.inputMin = inputMin
    self.inputMax = inputMax
    self.runSanity = runSanity

    self.encoder = None
    self.encoderOutput = None
    self.sp = None
    self.spOutput = None
    self.spOutputNZ = None
    self.tm = None
    self.anomalyScore = None
    if runSanity:
      self.sanity = None

    self.defaultEncoderResolution = 0.0001
    self.numColumns = 2048
    self.cellsPerColumn = 32

    self.predictedActiveCells = None
    self.previouslyPredictiveCells = None


  def initialize(self):

    # Scalar Encoder
    resolution = self.getEncoderResolution()
    self.encoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32)

    # Spatial Pooler
    spInputWidth = self.encoder.getWidth()
    self.spParams = {
      "globalInhibition": True,
      "columnDimensions": [self.numColumns],
      "inputDimensions": [spInputWidth],
      "potentialRadius": spInputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    }
    self.sp = SpatialPooler(**self.spParams)
    self.spOutput = np.zeros(self.numColumns, dtype=np.uint32)

    # Temporal Memory
    self.tmParams = {
      "activationThreshold": 20,
      "cellsPerColumn": self.cellsPerColumn,
      "columnDimensions": (self.numColumns,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    }
    self.tm = TemporalMemory(**self.tmParams)

    # Sanity
    if self.runSanity:
      self.sanity = sanity.SPTMInstance(self.sp, self.tm)


  def handleRecord(self, scalarValue, label=None, skipEncoding=False,
                   learningMode=True):
    """Process one record."""

    if self.runSanity:
      self.sanity.waitForUserContinue()

    # Encode the input data record if it hasn't already been encoded.
    if not skipEncoding:
      self.encodeValue(scalarValue)

    # Run the encoded data through the spatial pooler
    self.sp.compute(self.encoderOutput, learningMode, self.spOutput)
    self.spOutputNZ = self.spOutput.nonzero()[0]

    # WARNING: this needs to happen here, before the TM runs.
    self.previouslyPredictiveCells = self.tm.getPredictiveCells()

    # Run SP output through temporal memory
    self.tm.compute(self.spOutputNZ)
    self.predictedActiveCells = _computePredictedActiveCells(
      self.tm.getActiveCells(), self.previouslyPredictiveCells)

    # Anomaly score
    self.anomalyScore = _computeAnomalyScore(self.spOutputNZ,
                                             self.previouslyPredictiveCells,
                                             self.cellsPerColumn)

    # Run Sanity
    if self.runSanity:
      self.sanity.appendTimestep(self.getEncoderOutputNZ(),
                                 self.getSpOutputNZ(),
                                 self.previouslyPredictiveCells,
                                 {
                                   'value': scalarValue,
                                   'label':label
                                   })


  def encodeValue(self, scalarValue):
    self.encoder.encodeIntoArray(scalarValue, self.encoderOutput)


  def getEncoderResolution(self):
    """
    Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's 
    calculated from the data min and max, specific to the data stream.
    """
    if self.inputMin is None or self.inputMax is None:
      return self.defaultEncoderResolution
    else:
      rangePadding = abs(self.inputMax - self.inputMin) * 0.2
      minVal = self.inputMin - rangePadding
      maxVal = (self.inputMax + rangePadding
                if self.inputMin != self.inputMax
                else self.inputMin + 1)
      numBuckets = 130.0
      return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets)


  def getEncoderOutputNZ(self):
    return self.encoderOutput.nonzero()[0]


  def getSpOutputNZ(self):
    return self.spOutputNZ


  def getTmPredictiveCellsNZ(self):
    return self.tm.getPredictiveCells()


  def getTmActiveCellsNZ(self):
    return self.tm.getActiveCells()


  def getTmPredictedActiveCellsNZ(self):
    return self.predictedActiveCells


  def getRawAnomalyScore(self):
    return self.anomalyScore
def run_tm_noise_experiment(dim=2048,
                            cellsPerColumn=1,
                            num_active=40,
                            activationThreshold=16,
                            initialPermanence=0.8,
                            connectedPermanence=0.50,
                            minThreshold=16,
                            maxNewSynapseCount=20,
                            permanenceIncrement=0.05,
                            permanenceDecrement=0.00,
                            predictedSegmentDecrement=0.000,
                            maxSegmentsPerCell=255,
                            maxSynapsesPerSegment=255,
                            seed=42,
                            num_samples=1,
                            num_trials=1000,
                            sequence_length=20,
                            training_iters=1,
                            automatic_threshold=False,
                            noise_range=range(0, 100, 5)):
    """
  Run an experiment tracking the performance of the temporal memory given
  noise.  The number of active cells and the dimensions of the TM are
  fixed. We track performance by comparing the cells predicted to be
  active with the cells actually active in the sequence without noise at
  every timestep, and averaging across timesteps. Three metrics are used,
  correlation (Pearson's r, by numpy.corrcoef), set similarity (Jaccard
  index) and cosine similarity (using scipy.spatial.distance.cosine). The
  Jaccard set similarity is the canonical metric used in the paper, but
  all three metrics tend to produce very similar results.

  Typically, this experiment is run to test the influence of activation
  threshold on noise tolerance, with multiple different thresholds tested.
  However, this experiment could also be used to examine the influence of
  factors such as sparsity and sequence length.

  Output is written to tm_noise_{threshold}}.txt, including sample size.

  We used three different activation threshold settings, 8, 12 and 16, mirroring
  the parameters used in the Poirazi neuron model experiment.
  """
    if automatic_threshold:
        activationThreshold = min(num_active / 2, maxNewSynapseCount / 2)
        minThreshold = min(num_active / 2, maxNewSynapseCount / 2)

    for noise in noise_range:
        print noise
        for trial in range(num_trials):
            tm = TM(
                columnDimensions=(dim, ),
                cellsPerColumn=cellsPerColumn,
                activationThreshold=activationThreshold,
                initialPermanence=initialPermanence,
                connectedPermanence=connectedPermanence,
                minThreshold=minThreshold,
                maxNewSynapseCount=maxNewSynapseCount,
                permanenceIncrement=permanenceIncrement,
                permanenceDecrement=permanenceDecrement,
                predictedSegmentDecrement=predictedSegmentDecrement,
                maxSegmentsPerCell=maxSegmentsPerCell,
                maxSynapsesPerSegment=maxSynapsesPerSegment,
            )  #seed=seed)

            datapoints = []
            canonical_active_cells = []

            for sample in range(num_samples):
                data = generate_evenly_distributed_data_sparse(
                    dim=dim,
                    num_active=num_active,
                    num_samples=sequence_length)
                datapoints.append(data)
                for i in range(training_iters):
                    for j in range(data.nRows()):
                        activeColumns = set(data.rowNonZeros(j)[0])
                        tm.compute(activeColumns, learn=True)
                    tm.reset()

                current_active_cells = []
                for j in range(data.nRows()):
                    activeColumns = set(data.rowNonZeros(j)[0])
                    tm.compute(activeColumns, learn=True)
                    current_active_cells.append(tm.getActiveCells())
                canonical_active_cells.append(current_active_cells)
                tm.reset()

            # Now that the TM has been trained, check its performance on each sequence with noise added.
            correlations = []
            similarities = []
            csims = []
            for datapoint, active_cells in zip(datapoints,
                                               canonical_active_cells):
                data = copy.deepcopy(datapoint)
                apply_noise(data, noise)

                predicted_cells = []

                for j in range(data.nRows()):
                    activeColumns = set(data.rowNonZeros(j)[0])
                    tm.compute(activeColumns, learn=False)
                    predicted_cells.append(tm.getPredictiveCells())

                similarity = [(0. + len(set(predicted) & set(active))) / len(
                    (set(predicted) | set(active)))
                              for predicted, active in zip(
                                  predicted_cells[:-1], active_cells[1:])]
                dense_predicted_cells = convert_cell_lists_to_dense(
                    2048 * 32, predicted_cells[:-1])
                dense_active_cells = convert_cell_lists_to_dense(
                    2048 * 32, active_cells[1:])
                correlation = [
                    numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1]
                    for predicted, active in zip(dense_predicted_cells,
                                                 dense_active_cells)
                ]
                csim = [
                    1 - cosine(predicted, active) for predicted, active in zip(
                        dense_predicted_cells, dense_active_cells)
                ]

                correlation = numpy.nan_to_num(correlation)
                csim = numpy.nan_to_num(csim)
                correlations.append(numpy.mean(correlation))
                similarities.append(numpy.mean(similarity))
                csims.append(numpy.mean(csim))

        correlation = numpy.mean(correlations)
        similarity = numpy.mean(similarities)
        csim = numpy.mean(csims)
        with open("tm_noise_{}.txt".format(activationThreshold), "a") as f:
            f.write(
                str(noise) + ", " + str(correlation) + ", " + str(similarity) +
                ", " + str(csim) + ", " + str(num_trials) + "\n")
Beispiel #6
0
def runHotgym(numRecords):
    with open(_PARAMS_PATH, "r") as f:
        modelParams = yaml.safe_load(f)["modelParams"]
        enParams = modelParams["sensorParams"]["encoders"]
        spParams = modelParams["spParams"]
        tmParams = modelParams["tmParams"]

    # timeOfDayEncoder = DateEncoder(
    #   timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
    # weekendEncoder = DateEncoder(
    #   weekend=enParams["timestamp_weekend"]["weekend"])
    # scalarEncoder = RandomDistributedScalarEncoder(
    #   enParams["consumption"]["resolution"])

    rdseParams = RDSE_Parameters()
    rdseParams.size = 100
    rdseParams.sparsity = .10
    rdseParams.radius = 10
    scalarEncoder = RDSE(rdseParams)

    # encodingWidth = (timeOfDayEncoder.getWidth()
    #                  + weekendEncoder.getWidth()
    #                  + scalarEncoder.getWidth())

    encodingWidth = scalarEncoder.size

    sp = SpatialPooler(
        inputDimensions=(encodingWidth, ),
        columnDimensions=(spParams["columnCount"], ),
        potentialPct=spParams["potentialPct"],
        potentialRadius=encodingWidth,
        globalInhibition=spParams["globalInhibition"],
        localAreaDensity=spParams["localAreaDensity"],
        numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
        synPermInactiveDec=spParams["synPermInactiveDec"],
        synPermActiveInc=spParams["synPermActiveInc"],
        synPermConnected=spParams["synPermConnected"],
        boostStrength=spParams["boostStrength"],
        seed=spParams["seed"],
        wrapAround=True)

    tm = TemporalMemory(
        columnDimensions=(tmParams["columnCount"], ),
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=spParams["synPermConnected"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
        seed=tmParams["seed"])

    results = []
    with open(_INPUT_FILE_PATH, "r") as fin:
        reader = csv.reader(fin)
        headers = next(reader)
        next(reader)
        next(reader)

        for count, record in enumerate(reader):

            if count >= numRecords: break

            # Convert data string into Python date object.
            dateString = datetime.datetime.strptime(record[0],
                                                    "%m/%d/%y %H:%M")
            # Convert data value string into float.
            consumption = float(record[1])

            # To encode, we need to provide zero-filled numpy arrays for the encoders
            # to populate.
            # timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
            # weekendBits = numpy.zeros(weekendEncoder.getWidth())
            # consumptionBits = numpy.zeros(scalarEncoder.size)
            consumptionBits = SDR(scalarEncoder.size)

            # Now we call the encoders to create bit representations for each value.
            # timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
            # weekendEncoder.encodeIntoArray(dateString, weekendBits)
            scalarEncoder.encode(consumption, consumptionBits)

            # Concatenate all these encodings into one large encoding for Spatial
            # Pooling.
            # encoding = numpy.concatenate(
            #   [timeOfDayBits, weekendBits, consumptionBits]
            # )
            encoding = consumptionBits

            # Create an array to represent active columns, all initially zero. This
            # will be populated by the compute method below. It must have the same
            # dimensions as the Spatial Pooler.
            # activeColumns = numpy.zeros(spParams["columnCount"])
            activeColumns = SDR(spParams["columnCount"])

            encodingIn = numpy.uint32(encoding.dense)
            minicolumnsOut = numpy.uint32(activeColumns.dense)
            # Execute Spatial Pooling algorithm over input space.
            sp.compute(encodingIn, True, minicolumnsOut)
            activeColumnIndices = numpy.nonzero(minicolumnsOut)[0]

            # Execute Temporal Memory algorithm over active mini-columns.
            tm.compute(activeColumnIndices, learn=True)

            activeCells = tm.getActiveCells()
            print(len(activeCells))
            results.append(activeCells)

        return results
def run_tm_dim_experiment(test_dims = range(300, 3100, 100),
                          cellsPerColumn=1,
                          num_active = 256,
                          activationThreshold=10,
                          initialPermanence=0.8,
                          connectedPermanence=0.50,
                          minThreshold=10,
                          maxNewSynapseCount=20,
                          permanenceIncrement=0.05,
                          permanenceDecrement=0.00,
                          predictedSegmentDecrement=0.000,
                          maxSegmentsPerCell=4000,
                          maxSynapsesPerSegment=255,
                          seed=42,
                          num_samples = 1000,
                          sequence_length = 20,
                          training_iters = 1,
                          automatic_threshold = False,
                          save_results = True):
  """
  Run an experiment tracking the performance of the temporal memory given
  different input dimensions.  The number of active cells is kept fixed, so we
  are in effect varying the sparsity of the input.   We track performance by
  comparing the cells predicted to be active with the cells actually active in
  the sequence without noise at every timestep, and averaging across timesteps.
  Three metrics are used, correlation (Pearson's r, by numpy.corrcoef),
  set similarity (Jaccard index) and cosine similarity (using
  scipy.spatial.distance.cosine).  The Jaccard set similarity is the
  canonical metric used in the paper, but all three tend to produce very similar
  results.

  Output is written to tm_dim_{num_active}.txt, including sample size.

  In our experiments, we used the set similarity metric (third column in output)
  along with three different values for num_active, 64, 128 and 256.  We used
  dimensions from 300 to 2900 in each case, testing every 100.  1000 sequences
  of length 20 were passed to the TM in each trial.
  """
  if automatic_threshold:
    activationThreshold = min(num_active/2, maxNewSynapseCount/2)
    minThreshold = min(num_active/2, maxNewSynapseCount/2)
    print "Using activation threshold {}".format(activationThreshold)

  for dim in test_dims:
    tm = TM(columnDimensions=(dim,),
            cellsPerColumn=cellsPerColumn,
            activationThreshold=activationThreshold,
            initialPermanence=initialPermanence,
            connectedPermanence=connectedPermanence,
            minThreshold=minThreshold,
            maxNewSynapseCount=maxNewSynapseCount,
            permanenceIncrement=permanenceIncrement,
            permanenceDecrement=permanenceDecrement,
            predictedSegmentDecrement=predictedSegmentDecrement,
            maxSegmentsPerCell=maxSegmentsPerCell,
            maxSynapsesPerSegment=maxSynapsesPerSegment,
            seed=seed)

    tm.setMinThreshold(1000)

    datapoints = []
    canonical_active_cells = []

    for sample in range(num_samples):
      if (sample + 1) % 10 == 0:
        print sample + 1
      data = generate_evenly_distributed_data_sparse(dim = dim, num_active = num_active, num_samples = sequence_length)
      datapoints.append(data)
      for i in range(training_iters):
        for j in range(data.nRows()):
          activeColumns = set(data.rowNonZeros(j)[0])
          tm.compute(activeColumns, learn = True)
        tm.reset()

      current_active_cells = []
      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = True)
        current_active_cells.append(tm.getActiveCells())
      canonical_active_cells.append(current_active_cells)
      tm.reset()

    # Now that the TM has been trained, check its performance on each sequence with noise added.
    correlations = []
    similarities = []
    csims = []
    for datapoint, active_cells in zip(datapoints, canonical_active_cells):
      data = copy.deepcopy(datapoint)
      predicted_cells = []

      for j in range(data.nRows()):
        activeColumns = set(data.rowNonZeros(j)[0])
        tm.compute(activeColumns, learn = False)
        predicted_cells.append(tm.getPredictiveCells())
      tm.reset()

      similarity = [(0.+len(set(predicted) & set(active)))/len((set(predicted) | set(active))) for predicted, active in zip (predicted_cells[:-1], active_cells[1:])]
      dense_predicted_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, predicted_cells[:-1])
      dense_active_cells = convert_cell_lists_to_dense(dim*cellsPerColumn, active_cells[1:])

      correlation = [numpy.corrcoef(numpy.asarray([predicted, active]))[0, 1] for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      csim = [1 - cosine(predicted, active) for predicted, active in zip(dense_predicted_cells, dense_active_cells)]

      correlation = numpy.nan_to_num(correlation)
      csim = numpy.nan_to_num(csim)
      correlations.append(numpy.mean(correlation))
      similarities.append(numpy.mean(similarity))
      csims.append(numpy.mean(csim))



    correlation = numpy.mean(correlations)
    similarity = numpy.mean(similarities)
    csim = numpy.mean(csims)
    print dim, correlation, similarity, csim
    if save_results:
        with open("tm_dim_{}.txt".format(num_active), "a") as f:
          f.write(str(dim)+", " + str(correlation) + ", " + str(similarity) + ", " + str(csim) + ", " + str(num_samples) + "\n")