def testVerbosity(self):
     """
 Test that nothing is printed out when verbosity=0
 """
     _stdout = sys.stdout
     sys.stdout = _stringio = StringIO()
     encoder = RandomDistributedScalarEncoder(name="mv", resolution=1.0, verbosity=0)
     output = numpy.zeros(encoder.getWidth(), dtype=defaultDtype)
     encoder.encodeIntoArray(23.0, output)
     encoder.getBucketIndices(23.0)
     sys.stdout = _stdout
     self.assertEqual(len(_stringio.getvalue()), 0, "zero verbosity doesn't lead to zero output")
 def testVerbosity(self):
   """
   Test that nothing is printed out when verbosity=0
   """
   _stdout = sys.stdout
   sys.stdout = _stringio = StringIO()
   encoder = RandomDistributedScalarEncoder(name="mv", resolution=1.0,
                                            verbosity=0)
   output = numpy.zeros(encoder.getWidth(), dtype=defaultDtype)
   encoder.encodeIntoArray(23.0, output)
   encoder.getBucketIndices(23.0)
   sys.stdout = _stdout
   self.assertEqual(len(_stringio.getvalue()), 0,
                    "zero verbosity doesn't lead to zero output")
Exemple #3
0
    def runSimpleSequence(self, resets, repetitions=1):
        scalarEncoder = RandomDistributedScalarEncoder(0.88, n=2048, w=41)

        instances = self._createInstances(cellsPerColumn=32)
        times = [0.0] * len(self.contestants)

        duration = 10000 * repetitions
        increment = 4
        sequenceLength = 25
        sequence = (i % (sequenceLength * 4)
                    for i in xrange(0, duration * increment, increment))
        t = 0

        encodedValue = numpy.zeros(2048, dtype=numpy.int32)

        for value in sequence:
            scalarEncoder.encodeIntoArray(value, output=encodedValue)
            activeBits = encodedValue.nonzero()[0]

            for i in xrange(len(self.contestants)):
                tmInstance = instances[i]
                computeFn = self.contestants[i][2]

                if resets:
                    if value == 0:
                        tmInstance.reset()

                start = time.clock()
                computeFn(tmInstance, encodedValue, activeBits)
                times[i] += time.clock() - start

            printProgressBar(t, duration, 50)
            t += 1

        clearProgressBar(50)

        results = []
        for i in xrange(len(self.contestants)):
            name = self.contestants[i][3]
            results.append((
                name,
                times[i],
            ))

        return results
Exemple #4
0
    def runHotgym(self, cellsPerColumn, repetitions=1):
        scalarEncoder = RandomDistributedScalarEncoder(0.88, n=2048, w=41)

        instances = self._createInstances(cellsPerColumn=cellsPerColumn)
        times = [0.0] * len(self.contestants)

        t = 0
        duration = HOTGYM_LENGTH * repetitions

        for _ in xrange(repetitions):
            with open(HOTGYM_PATH) as fin:
                reader = csv.reader(fin)
                reader.next()
                reader.next()
                reader.next()

                encodedValue = numpy.zeros(2048, dtype=numpy.int32)

                for timeStr, valueStr in reader:
                    value = float(valueStr)
                    scalarEncoder.encodeIntoArray(value, output=encodedValue)
                    activeBits = encodedValue.nonzero()[0]

                    for i in xrange(len(self.contestants)):
                        tmInstance = instances[i]
                        computeFn = self.contestants[i][2]

                        start = time.clock()
                        computeFn(tmInstance, encodedValue, activeBits)
                        times[i] += time.clock() - start

                    printProgressBar(t, duration, 50)
                    t += 1

        clearProgressBar(50)

        results = []
        for i in xrange(len(self.contestants)):
            name = self.contestants[i][3]
            results.append((
                name,
                times[i],
            ))

        return results
  def runSimpleSequence(self, resets, repetitions=1):
    scalarEncoder = RandomDistributedScalarEncoder(0.88, n=2048, w=41)

    instances = self._createInstances(cellsPerColumn=32)
    times = [0.0] * len(self.contestants)

    duration = 10000 * repetitions
    increment = 4
    sequenceLength = 25
    sequence = (i % (sequenceLength * 4)
                for i in xrange(0, duration * increment, increment))
    t = 0

    encodedValue = numpy.zeros(2048, dtype=numpy.int32)

    for value in sequence:
      scalarEncoder.encodeIntoArray(value, output=encodedValue)
      activeBits = encodedValue.nonzero()[0]

      for i in xrange(len(self.contestants)):
        tmInstance = instances[i]
        computeFn = self.contestants[i][2]

        if resets:
          if value == 0:
            tmInstance.reset()

        start = time.clock()
        computeFn(tmInstance, encodedValue, activeBits)
        times[i] += time.clock() - start

      printProgressBar(t, duration, 50)
      t += 1

    clearProgressBar(50)

    results = []
    for i in xrange(len(self.contestants)):
      name = self.contestants[i][3]
      results.append((name,
                      times[i],))

    return results
  def runHotgym(self, cellsPerColumn, repetitions=1):
    scalarEncoder = RandomDistributedScalarEncoder(0.88, n=2048, w=41)

    instances = self._createInstances(cellsPerColumn=cellsPerColumn)
    times = [0.0] * len(self.contestants)

    t = 0
    duration = HOTGYM_LENGTH * repetitions

    for _ in xrange(repetitions):
      with open(HOTGYM_PATH) as fin:
        reader = csv.reader(fin)
        reader.next()
        reader.next()
        reader.next()

        encodedValue = numpy.zeros(2048, dtype=numpy.uint32)

        for timeStr, valueStr in reader:
          value = float(valueStr)
          scalarEncoder.encodeIntoArray(value, output=encodedValue)
          activeBits = encodedValue.nonzero()[0]

          for i in xrange(len(self.contestants)):
            tmInstance = instances[i]
            computeFn = self.contestants[i][2]

            start = time.clock()
            computeFn(tmInstance, encodedValue, activeBits)
            times[i] += time.clock() - start

          printProgressBar(t, duration, 50)
          t += 1

    clearProgressBar(50)

    results = []
    for i in xrange(len(self.contestants)):
      name = self.contestants[i][3]
      results.append((name,
                      times[i],))

    return results
class DendriteDetector(AnomalyDetector):
    def initialize(self):
        # Keep track of value range for spatial anomaly detection.
        self.minVal = None
        self.maxVal = None

        # Time of day encoder
        self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49),
                                            name='time_enc')
        # RDSE encoder for the time series value.
        minResolution = 0.001
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = self.inputMax + rangePadding
        numBuckets = 130
        resolution = max(minResolution, (maxVal - minVal) / numBuckets)
        self.value_enc = RandomDistributedScalarEncoder(resolution=resolution,
                                                        name='value_rdse')

        # Spatial Pooler.
        encodingWidth = self.timeOfDayEncoder.getWidth(
        ) + self.value_enc.getWidth()
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth, ),
            columnDimensions=(2048, ),
            potentialPct=0.8,
            potentialRadius=encodingWidth,
            globalInhibition=1,
            numActiveColumnsPerInhArea=40,
            synPermInactiveDec=0.0005,
            synPermActiveInc=0.003,
            synPermConnected=0.2,
            boostStrength=0.0,
            seed=1956,
            wrapAround=True,
        )

        self.tm = TemporalMemory(
            columnDimensions=(2048, ),
            cellsPerColumn=32,
            activationThreshold=20,
            initialPermanence=.5,  # Increased to connectedPermanence.
            connectedPermanence=.5,
            minThreshold=13,
            maxNewSynapseCount=31,
            permanenceIncrement=0.04,
            permanenceDecrement=0.008,
            predictedSegmentDecrement=0.001,
            maxSegmentsPerCell=128,
            maxSynapsesPerSegment=
            128,  # Changed meaning. Also see connections.topology[2]
            seed=1993,
        )

        # Initialize the anomaly likelihood object
        numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            learningPeriod=numentaLearningPeriod,
            estimationSamples=self.probationaryPeriod - numentaLearningPeriod,
            reestimationPeriod=100,
        )

        self.age = 0

    def getAdditionalHeaders(self):
        """Returns a list of strings."""
        return ["raw_score"]

    def handleRecord(self, inputData):
        """
    Argument inputData is {"value": instantaneous_value, "timestamp": pandas.Timestamp}
    Returns a tuple (anomalyScore, rawScore).

    Internally to NuPIC "anomalyScore" corresponds to "likelihood_score"
    and "rawScore" corresponds to "anomaly_score". Sorry about that.
    """

        # Check for spatial anomalies and update min/max values.
        value = inputData["value"]
        spatialAnomaly = False
        if self.minVal != self.maxVal:
            tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE
            maxExpected = self.maxVal + tolerance
            minExpected = self.minVal - tolerance
            if value > maxExpected or value < minExpected:
                spatialAnomaly = True
        if self.maxVal is None or value > self.maxVal:
            self.maxVal = value
        if self.minVal is None or value < self.minVal:
            self.minVal = value

        # Run the HTM stack.  First Encoders.
        timestamp = inputData["timestamp"]
        timeOfDayBits = np.zeros(self.timeOfDayEncoder.getWidth())
        self.timeOfDayEncoder.encodeIntoArray(timestamp, timeOfDayBits)
        valueBits = np.zeros(self.value_enc.getWidth())
        self.value_enc.encodeIntoArray(value, valueBits)
        encoding = np.concatenate([timeOfDayBits, valueBits])
        # Spatial Pooler.
        activeColumns = np.zeros(self.sp.getNumColumns())
        self.sp.compute(encoding, True, activeColumns)
        activeColumnIndices = np.nonzero(activeColumns)[0]
        # Temporal Memory and Anomaly.
        predictions = self.tm.getPredictiveCells()
        predictedColumns = list(self.tm.mapCellsToColumns(predictions).keys())
        self.tm.compute(activeColumnIndices, learn=True)
        activeCells = self.tm.getActiveCells()
        rawScore = anomaly.computeRawAnomalyScore(activeColumnIndices,
                                                  predictedColumns)

        # Compute log(anomaly likelihood)
        anomalyScore = self.anomalyLikelihood.anomalyProbability(
            inputData["value"], rawScore, inputData["timestamp"])
        finalScore = logScore = self.anomalyLikelihood.computeLogLikelihood(
            anomalyScore)

        if spatialAnomaly:
            finalScore = 1.0

        if False:
            # Plot correlation of excitement versus compartmentalization.
            if self.age == 0:
                print("Correlation Plots ENABLED.")
            if False:
                start_age = 1000
                end_age = 1800
            else:
                start_age = 4000
                end_age = 7260
            if self.age == start_age:
                import correlation
                import random
                self.cor_samplers = []
                sampled_cells = []
                while len(self.cor_samplers) < 20:
                    n = random.choice(xrange(self.tm.numberOfCells()))
                    if n in sampled_cells:
                        continue
                    else:
                        sampled_cells.append(n)
                    neuron = self.tm.connections.dataForCell(n)
                    if neuron._roots:
                        c = correlation.CorrelationSampler(neuron._roots[0])
                        c.random_sample_points(100)
                        self.cor_samplers.append(c)
                print("Created %d Correlation Samplers" %
                      len(self.cor_samplers))
            if self.age >= start_age:
                for smplr in self.cor_samplers:
                    smplr.sample()
            if self.age == end_age:
                import matplotlib.pyplot as plt
                for idx, smplr in enumerate(self.cor_samplers):
                    if smplr.num_samples == 0:
                        print("No samples, plot not shown.")
                        continue
                    plt.figure("Sample %d" % idx)
                    smplr.plot(period=64)  # Different value!
                plt.show()

        if False:
            # Plot excitement of a typical detection on a dendrite.
            if self.age == 7265:
                #if self.age == 1800:
                import matplotlib.pyplot as plt
                import random
                from connections import SYN_CONNECTED_ACTIVE
                sampled_cells = set()
                for figure_num in xrange(40):
                    plt.figure("(%d)" % figure_num)
                    # Find an active cell to view.
                    cell = None
                    for attempt in range(100):
                        event = random.choice(self.tm.activeEvents)
                        cell = event.cell  # This is an integer.
                        if cell is not None and cell not in sampled_cells:
                            break
                    else:
                        break
                    sampled_cells.add(cell)
                    cell = self.tm.connections.dataForCell(cell)
                    # Organize the data.
                    EPSPs = []
                    excitement = []
                    distance_to_root = 0
                    segment_offsets = {}
                    branch = cell._roots[0]
                    while True:
                        segment_offsets[branch] = distance_to_root
                        distance_to_root += len(branch._synapses)
                        excitement.extend(branch.excitement)
                        for syn in branch._synapses:
                            if syn is None:
                                EPSPs.append(0)
                            else:
                                EPSPs.append(syn.state == SYN_CONNECTED_ACTIVE)
                        if branch.children:
                            branch = random.choice(branch.children)
                        else:
                            break
                    plt.plot(
                        np.arange(distance_to_root),
                        EPSPs,
                        'r',
                        np.arange(distance_to_root),
                        excitement,
                        'b',
                    )
                    plt.title(
                        "Dendrite Activation\n Horizontal line is activation threshold, Vertical lines are segment bifurcations"
                    )
                    plt.xlabel("Distance along Dendrite", )
                    plt.ylabel("EPSPs are Red, Excitement is Blue")
                    # Show lines where the excitement crosses thresholds.
                    plt.axhline(20, color='k')  # Hard coded parameter value.
                    for offset in segment_offsets.values():
                        if offset != 0:
                            plt.axvline(offset, color='k')
                print("\nShowing %d excitement plots." % len(sampled_cells))
                plt.show()

        self.age += 1

        return (finalScore, rawScore)
def runHotgym():

  timeOfDayEncoder = DateEncoder(timeOfDay=(21,1))
  weekendEncoder = DateEncoder(weekend=21)
  scalarEncoder = RandomDistributedScalarEncoder(0.88)

  encodingWidth = timeOfDayEncoder.getWidth() \
    + weekendEncoder.getWidth() \
    + scalarEncoder.getWidth()

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(2048),
    # What percent of the columns's receptive field is available for potential
    # synapses?
    potentialPct=0.85,
    # This means that the input space has no topology.
    globalInhibition=True,
    localAreaDensity=-1.0,
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=40.0,
    # How quickly synapses grow and degrade.
    synPermInactiveDec=0.005,
    synPermActiveInc=0.04,
    synPermConnected=0.1,
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=3.0,
    # Random number generator seed.
    seed=1956,
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(2048, ),
    # How many cells in each mini-column.
    cellsPerColumn=32,
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=16,
    initialPermanence=0.21,
    connectedPermanence=0.5,
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=12,
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=20,
    permanenceIncrement=0.1,
    permanenceDecrement=0.1,
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=128,
    maxSynapsesPerSegment=32,
    seed=1960
  )

  classifier = SDRClassifierFactory.create()

  with open (_INPUT_FILE_PATH) as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):
      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(2048)

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      probability, value = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(value, probability * 100))
Exemple #9
0
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth())

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(spParams["columnCount"]),
    # What percent of the columns"s receptive field is available for potential
    # synapses?
    potentialPct=spParams["potentialPct"],
    # This means that the input space has no topology.
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    # How quickly synapses grow and degrade.
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=spParams["boostStrength"],
    # Random number generator seed.
    seed=spParams["seed"],
    # TODO: is this useful?
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(tmParams["columnCount"],),
    # How many cells in each mini-column.
    cellsPerColumn=tmParams["cellsPerColumn"],
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    # TODO: This comes from the SP params, is this normal
    connectedPermanence=spParams["synPermConnected"],
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=tmParams["minThreshold"],
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
      results.append([oneStep, oneStepConfidence * 100, None, None])

    return results
Exemple #10
0
class NumentaTMLowLevelDetector(AnomalyDetector):
  """The 'numentaTM' detector, but not using the CLAModel or network API """
  def __init__(self, *args, **kwargs):
    super(NumentaTMLowLevelDetector, self).__init__(*args, **kwargs)

    self.valueEncoder = None
    self.encodedValue = None
    self.timestampEncoder = None
    self.encodedTimestamp = None
    self.sp = None
    self.spOutput = None
    self.tm = None
    self.anomalyLikelihood = None

    # Set this to False if you want to get results based on raw scores
    # without using AnomalyLikelihood. This will give worse results, but
    # useful for checking the efficacy of AnomalyLikelihood. You will need
    # to re-optimize the thresholds when running with this setting.
    self.useLikelihood = True


  def getAdditionalHeaders(self):
    """Returns a list of strings."""
    return ["raw_score"]


  def initialize(self):

    # Initialize the RDSE with a resolution; calculated from the data min and
    # max, the resolution is specific to the data stream.
    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
    minVal = self.inputMin - rangePadding
    maxVal = (self.inputMax + rangePadding
              if self.inputMin != self.inputMax
              else self.inputMin + 1)
    numBuckets = 130.0
    resolution = max(0.001, (maxVal - minVal) / numBuckets)
    self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                 dtype=np.uint32)

    # Initialize the timestamp encoder
    self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, ))
    self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                     dtype=np.uint32)

    inputWidth = (self.timestampEncoder.getWidth() +
                  self.valueEncoder.getWidth())

    self.sp = SpatialPooler(**{
      "globalInhibition": True,
      "columnDimensions": [2048],
      "inputDimensions": [inputWidth],
      "potentialRadius": inputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    })
    self.spOutput = np.zeros(2048, dtype=np.float32)

    self.tm = TemporalMemory(**{
      "activationThreshold": 20,
      "cellsPerColumn": 32,
      "columnDimensions": (2048,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    })

    if self.useLikelihood:
      learningPeriod = math.floor(self.probationaryPeriod / 2.0)
      self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
        claLearningPeriod=learningPeriod,
        estimationSamples=self.probationaryPeriod - learningPeriod,
        reestimationPeriod=100
      )


  def handleRecord(self, inputData):
    """Returns a tuple (anomalyScore, rawScore)."""

    # Encode the input data record
    self.valueEncoder.encodeIntoArray(
        inputData["value"], self.encodedValue)
    self.timestampEncoder.encodeIntoArray(
        inputData["timestamp"], self.encodedTimestamp)

    # Run the encoded data through the spatial pooler
    self.sp.compute(np.concatenate((self.encodedTimestamp,
                                    self.encodedValue,)),
                    True, self.spOutput)

    # At the current state, the set of the region's active columns and the set
    # of columns that have previously-predicted cells are used to calculate the
    # raw anomaly score.
    activeColumns = set(self.spOutput.nonzero()[0].tolist())
    prevPredictedColumns = set(self.tm.columnForCell(cell)
                               for cell in self.tm.getPredictiveCells())
    rawScore = (len(activeColumns - prevPredictedColumns) /
                float(len(activeColumns)))

    self.tm.compute(activeColumns)

    if self.useLikelihood:
      # Compute the log-likelihood score
      anomalyScore = self.anomalyLikelihood.anomalyProbability(
        inputData["value"], rawScore, inputData["timestamp"])
      logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)
      return (logScore, rawScore)

    return (rawScore, rawScore)
Exemple #11
0
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector):
    """The 'numenta' detector, with the following changes:

  - Use pure Temporal Memory, not the classic TP that uses backtracking.
  - Don't spatial pool the timestamp. Pass it in as distal input.
  - 1 cell per column.
  - Use w=41 in the scalar encoding, rather than w=21, to make up for the
    lost timestamp input to the spatial pooler.
  """
    def __init__(self, *args, **kwargs):
        super(DistalTimestamps1CellPerColumnDetector,
              self).__init__(*args, **kwargs)

        self.valueEncoder = None
        self.encodedValue = None
        self.timestampEncoder = None
        self.encodedTimestamp = None
        self.activeExternalCells = []
        self.prevActiveExternalCells = []
        self.sp = None
        self.spOutput = None
        self.etm = None
        self.anomalyLikelihood = None

    def getAdditionalHeaders(self):
        """Returns a list of strings."""
        return ["raw_score"]

    def initialize(self):
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = (self.inputMax + rangePadding
                  if self.inputMin != self.inputMax else self.inputMin + 1)
        numBuckets = 130.0
        resolution = max(0.001, (maxVal - minVal) / numBuckets)
        self.valueEncoder = RandomDistributedScalarEncoder(resolution,
                                                           w=41,
                                                           seed=42)
        self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                     dtype=np.uint32)

        self.timestampEncoder = DateEncoder(timeOfDay=(
            21,
            9.49,
        ))
        self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                         dtype=np.uint32)

        inputWidth = self.valueEncoder.getWidth()

        self.sp = SpatialPooler(
            **{
                "globalInhibition": True,
                "columnDimensions": [2048],
                "inputDimensions": [inputWidth],
                "potentialRadius": inputWidth,
                "numActiveColumnsPerInhArea": 40,
                "seed": 1956,
                "potentialPct": 0.8,
                "boostStrength": 0.0,
                "synPermActiveInc": 0.003,
                "synPermConnected": 0.2,
                "synPermInactiveDec": 0.0005,
            })
        self.spOutput = np.zeros(2048, dtype=np.float32)

        self.etm = ExtendedTemporalMemory(
            **{
                "activationThreshold": 13,
                "cellsPerColumn": 1,
                "columnDimensions": (2048, ),
                "basalInputDimensions": (self.timestampEncoder.getWidth(), ),
                "initialPermanence": 0.21,
                "maxSegmentsPerCell": 128,
                "maxSynapsesPerSegment": 32,
                "minThreshold": 10,
                "maxNewSynapseCount": 20,
                "permanenceDecrement": 0.1,
                "permanenceIncrement": 0.1,
                "seed": 1960,
                "checkInputs": False,
            })

        learningPeriod = math.floor(self.probationaryPeriod / 2.0)
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            claLearningPeriod=learningPeriod,
            estimationSamples=self.probationaryPeriod - learningPeriod,
            reestimationPeriod=100)

    def handleRecord(self, inputData):
        """Returns a tuple (anomalyScore, rawScore)."""

        self.valueEncoder.encodeIntoArray(inputData["value"],
                                          self.encodedValue)

        self.timestampEncoder.encodeIntoArray(inputData["timestamp"],
                                              self.encodedTimestamp)
        self.prevActiveExternalCells = self.activeExternalCells
        self.activeExternalCells = self.encodedTimestamp.nonzero()[0]

        self.sp.compute(self.encodedValue, True, self.spOutput)

        activeColumns = self.spOutput.nonzero()[0]
        activeColumnsSet = set(activeColumns.tolist())
        prevPredictedColumns = set(
            self.etm.columnForCell(cell)
            for cell in self.etm.getPredictiveCells())

        rawScore = (len(activeColumnsSet - prevPredictedColumns) /
                    float(len(activeColumns)))
        anomalyScore = self.anomalyLikelihood.anomalyProbability(
            inputData["value"], rawScore, inputData["timestamp"])
        logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)

        self.etm.compute(
            activeColumns,
            activeCellsExternalBasal=self.activeExternalCells,
            reinforceCandidatesExternalBasal=self.prevActiveExternalCells,
            growthCandidatesExternalBasal=self.prevActiveExternalCells)

        return (logScore, rawScore)
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector):
  """The 'numenta' detector, with the following changes:

  - Use pure Temporal Memory, not the classic TP that uses backtracking.
  - Don't spatial pool the timestamp. Pass it in as distal input.
  - 1 cell per column.
  - Use w=41 in the scalar encoding, rather than w=21, to make up for the
    lost timestamp input to the spatial pooler.
  """
  def __init__(self, *args, **kwargs):
    super(DistalTimestamps1CellPerColumnDetector, self).__init__(*args,
                                                                 **kwargs)

    self.valueEncoder = None
    self.encodedValue = None
    self.timestampEncoder = None
    self.encodedTimestamp = None
    self.activeExternalCells = []
    self.prevActiveExternalCells = []
    self.sp = None
    self.spOutput = None
    self.etm = None
    self.anomalyLikelihood = None


  def getAdditionalHeaders(self):
    """Returns a list of strings."""
    return ["raw_score"]


  def initialize(self):
    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
    minVal = self.inputMin - rangePadding
    maxVal = (self.inputMax + rangePadding
              if self.inputMin != self.inputMax
              else self.inputMin + 1)
    numBuckets = 130.0
    resolution = max(0.001, (maxVal - minVal) / numBuckets)
    self.valueEncoder = RandomDistributedScalarEncoder(resolution,
                                                       w=41,
                                                       seed=42)
    self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                 dtype=np.uint32)

    self.timestampEncoder = DateEncoder(timeOfDay=(21,9.49,))
    self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                     dtype=np.uint32)

    inputWidth = self.valueEncoder.getWidth()

    self.sp = SpatialPooler(**{
      "globalInhibition": True,
      "columnDimensions": [2048],
      "inputDimensions": [inputWidth],
      "potentialRadius": inputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    })
    self.spOutput = np.zeros(2048, dtype=np.float32)

    self.etm = ExtendedTemporalMemory(**{
      "activationThreshold": 13,
      "cellsPerColumn": 1,
      "columnDimensions": (2048,),
      "basalInputDimensions": (self.timestampEncoder.getWidth(),),
      "initialPermanence": 0.21,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 32,
      "minThreshold": 10,
      "maxNewSynapseCount": 20,
      "permanenceDecrement": 0.1,
      "permanenceIncrement": 0.1,
      "seed": 1960,
      "checkInputs": False,
    })

    learningPeriod = math.floor(self.probationaryPeriod / 2.0)
    self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
      claLearningPeriod=learningPeriod,
      estimationSamples=self.probationaryPeriod - learningPeriod,
      reestimationPeriod=100
    )


  def handleRecord(self, inputData):
    """Returns a tuple (anomalyScore, rawScore)."""

    self.valueEncoder.encodeIntoArray(inputData["value"],
                                      self.encodedValue)

    self.timestampEncoder.encodeIntoArray(inputData["timestamp"],
                                          self.encodedTimestamp)
    self.prevActiveExternalCells = self.activeExternalCells
    self.activeExternalCells = self.encodedTimestamp.nonzero()[0]

    self.sp.compute(self.encodedValue, True, self.spOutput)

    activeColumns = self.spOutput.nonzero()[0]
    activeColumnsSet = set(activeColumns.tolist())
    prevPredictedColumns = set(self.etm.columnForCell(cell)
                               for cell in self.etm.getPredictiveCells())

    rawScore = (len(activeColumnsSet - prevPredictedColumns) /
                float(len(activeColumns)))
    anomalyScore = self.anomalyLikelihood.anomalyProbability(
      inputData["value"], rawScore, inputData["timestamp"])
    logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)

    self.etm.compute(activeColumns,
                     activeCellsExternalBasal=self.activeExternalCells,
                     reinforceCandidatesExternalBasal=self.prevActiveExternalCells,
                     growthCandidatesExternalBasal=self.prevActiveExternalCells)

    return (logScore, rawScore)
Exemple #13
0
class BaseNetwork(object):
  def __init__(self, inputMin=None, inputMax=None, runSanity=False):

    self.inputMin = inputMin
    self.inputMax = inputMax
    self.runSanity = runSanity

    self.encoder = None
    self.encoderOutput = None
    self.sp = None
    self.spOutput = None
    self.spOutputNZ = None
    self.tm = None
    self.anomalyScore = None
    if runSanity:
      self.sanity = None

    self.defaultEncoderResolution = 0.0001
    self.numColumns = 2048
    self.cellsPerColumn = 32

    self.predictedActiveCells = None
    self.previouslyPredictiveCells = None


  def initialize(self):

    # Scalar Encoder
    resolution = self.getEncoderResolution()
    self.encoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32)

    # Spatial Pooler
    spInputWidth = self.encoder.getWidth()
    self.spParams = {
      "globalInhibition": True,
      "columnDimensions": [self.numColumns],
      "inputDimensions": [spInputWidth],
      "potentialRadius": spInputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    }
    self.sp = SpatialPooler(**self.spParams)
    self.spOutput = np.zeros(self.numColumns, dtype=np.uint32)

    # Temporal Memory
    self.tmParams = {
      "activationThreshold": 20,
      "cellsPerColumn": self.cellsPerColumn,
      "columnDimensions": (self.numColumns,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    }
    self.tm = TemporalMemory(**self.tmParams)

    # Sanity
    if self.runSanity:
      self.sanity = sanity.SPTMInstance(self.sp, self.tm)


  def handleRecord(self, scalarValue, label=None, skipEncoding=False,
                   learningMode=True):
    """Process one record."""

    if self.runSanity:
      self.sanity.waitForUserContinue()

    # Encode the input data record if it hasn't already been encoded.
    if not skipEncoding:
      self.encodeValue(scalarValue)

    # Run the encoded data through the spatial pooler
    self.sp.compute(self.encoderOutput, learningMode, self.spOutput)
    self.spOutputNZ = self.spOutput.nonzero()[0]

    # WARNING: this needs to happen here, before the TM runs.
    self.previouslyPredictiveCells = self.tm.getPredictiveCells()

    # Run SP output through temporal memory
    self.tm.compute(self.spOutputNZ)
    self.predictedActiveCells = _computePredictedActiveCells(
      self.tm.getActiveCells(), self.previouslyPredictiveCells)

    # Anomaly score
    self.anomalyScore = _computeAnomalyScore(self.spOutputNZ,
                                             self.previouslyPredictiveCells,
                                             self.cellsPerColumn)

    # Run Sanity
    if self.runSanity:
      self.sanity.appendTimestep(self.getEncoderOutputNZ(),
                                 self.getSpOutputNZ(),
                                 self.previouslyPredictiveCells,
                                 {
                                   'value': scalarValue,
                                   'label':label
                                   })


  def encodeValue(self, scalarValue):
    self.encoder.encodeIntoArray(scalarValue, self.encoderOutput)


  def getEncoderResolution(self):
    """
    Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's 
    calculated from the data min and max, specific to the data stream.
    """
    if self.inputMin is None or self.inputMax is None:
      return self.defaultEncoderResolution
    else:
      rangePadding = abs(self.inputMax - self.inputMin) * 0.2
      minVal = self.inputMin - rangePadding
      maxVal = (self.inputMax + rangePadding
                if self.inputMin != self.inputMax
                else self.inputMin + 1)
      numBuckets = 130.0
      return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets)


  def getEncoderOutputNZ(self):
    return self.encoderOutput.nonzero()[0]


  def getSpOutputNZ(self):
    return self.spOutputNZ


  def getTmPredictiveCellsNZ(self):
    return self.tm.getPredictiveCells()


  def getTmActiveCellsNZ(self):
    return self.tm.getActiveCells()


  def getTmPredictedActiveCellsNZ(self):
    return self.predictedActiveCells


  def getRawAnomalyScore(self):
    return self.anomalyScore
Exemple #14
0
anom_score = np.zeros((N_DATA + 1, ))
anom_logscore = np.zeros((N_DATA + 1, ))

anomaly_score = Anomaly(slidingWindowSize=25)

anomaly_likelihood = AnomalyLikelihood(learningPeriod=500,
                                       historicWindowSize=213)

dd = 0

for i, linha in enumerate(teste):

    #####################################################

    scalar_encoder.encodeIntoArray(linha[1], bits_scalar)
    time_encoder.encodeIntoArray(linha[0], bits_time)

    encoder_output = np.concatenate((bits_time, bits_scalar))

    ####################################################

    sdr_output = np.zeros(N_COLUMNS)
    sp.compute(encoder_output, True, sdr_output)
    active_columns = np.nonzero(sdr_output)[0]

    ####################################################

    tm.compute(active_columns, learn=True)

    ####################################################
def runHotgym(numRecords):
    with open(_PARAMS_PATH, "r") as f:
        modelParams = yaml.safe_load(f)["modelParams"]
        enParams = modelParams["sensorParams"]["encoders"]
        spParams = modelParams["spParams"]
        tmParams = modelParams["tmParams"]

    timeOfDayEncoder = DateEncoder(
        timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
    weekendEncoder = DateEncoder(
        weekend=enParams["timestamp_weekend"]["weekend"])
    CtEncoder = RandomDistributedScalarEncoder(enParams["Ct"]["resolution"])
    ZIP_10467Encoder = RandomDistributedScalarEncoder(
        enParams["ZIP_10467"]["resolution"])
    #  ZIP_10462Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10462"]["resolution"])
    #  ZIP_10475Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10475"]["resolution"])
    #  ZIP_10466Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10466"]["resolution"])
    #  ZIP_10469Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10469"]["resolution"])
    #  DEPT_11Encoder = RandomDistributedScalarEncoder(enParams["DEPT_11"]["resolution"])
    #  DEPT_24Encoder = RandomDistributedScalarEncoder(enParams["DEPT_24"]["resolution"])
    #  DEPT_41Encoder = RandomDistributedScalarEncoder(enParams["DEPT_41"]["resolution"])
    #  DEPT_34Encoder = RandomDistributedScalarEncoder(enParams["DEPT_34"]["resolution"])
    #  DEPT_31Encoder = RandomDistributedScalarEncoder(enParams["DEPT_31"]["resolution"])
    #  DEPT_60Encoder = RandomDistributedScalarEncoder(enParams["DEPT_60"]["resolution"])
    #  AGE_0_9Encoder = RandomDistributedScalarEncoder(enParams["AGE_0_9"]["resolution"])
    #  AGE_10_19Encoder = RandomDistributedScalarEncoder(enParams["AGE_10_19"]["resolution"])
    #  AGE_20_29Encoder = RandomDistributedScalarEncoder(enParams["AGE_20_29"]["resolution"])
    #  AGE_30_39Encoder = RandomDistributedScalarEncoder(enParams["AGE_30_39"]["resolution"])
    #  AGE_40_49Encoder = RandomDistributedScalarEncoder(enParams["AGE_40_49"]["resolution"])
    #  AGE_50_59Encoder = RandomDistributedScalarEncoder(enParams["AGE_50_59"]["resolution"])
    #  AGE_60_69Encoder = RandomDistributedScalarEncoder(enParams["AGE_60_69"]["resolution"])
    #  AGE_70_79Encoder = RandomDistributedScalarEncoder(enParams["AGE_70_79"]["resolution"])
    #  AGE_80_89Encoder = RandomDistributedScalarEncoder(enParams["AGE_80_89"]["resolution"])
    #  AGE_90_99Encoder = RandomDistributedScalarEncoder(enParams["AGE_90_99"]["resolution"])
    #  DIST_1_7Encoder = RandomDistributedScalarEncoder(enParams["DIST_1_7"]["resolution"])
    #  DIST_8_14Encoder = RandomDistributedScalarEncoder(enParams["DIST_8_14"]["resolution"])
    #  DIST_15_21Encoder = RandomDistributedScalarEncoder(enParams["DIST_15_21"]["resolution"])
    #  DIST_22_28Encoder = RandomDistributedScalarEncoder(enParams["DIST_22_28"]["resolution"])
    #  DIST_29_35Encoder = RandomDistributedScalarEncoder(enParams["DIST_29_35"]["resolution"])
    #  DIST_36_42Encoder = RandomDistributedScalarEncoder(enParams["DIST_36_42"]["resolution"])
    #  DIST_43_49Encoder = RandomDistributedScalarEncoder(enParams["DIST_43_49"]["resolution"])
    #  DIST_50_56Encoder = RandomDistributedScalarEncoder(enParams["DIST_50_56"]["resolution"])
    #  DIST_57_63Encoder = RandomDistributedScalarEncoder(enParams["DIST_57_63"]["resolution"])
    #  DIST_64_70Encoder = RandomDistributedScalarEncoder(enParams["DIST_64_70"]["resolution"])

    encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() +
                     CtEncoder.getWidth() * 2)

    sp = SpatialPooler(
        inputDimensions=(encodingWidth, ),
        columnDimensions=(spParams["columnCount"], ),
        potentialPct=spParams["potentialPct"],
        potentialRadius=encodingWidth,
        globalInhibition=spParams["globalInhibition"],
        localAreaDensity=spParams["localAreaDensity"],
        numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
        synPermInactiveDec=spParams["synPermInactiveDec"],
        synPermActiveInc=spParams["synPermActiveInc"],
        synPermConnected=spParams["synPermConnected"],
        boostStrength=spParams["boostStrength"],
        seed=spParams["seed"],
        wrapAround=True)

    tm = TemporalMemory(
        columnDimensions=(tmParams["columnCount"], ),
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=spParams["synPermConnected"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
        seed=tmParams["seed"])

    classifier = SDRClassifierFactory.create()
    results = []
    with open(_INPUT_FILE_PATH, "r") as fin:
        reader = csv.reader(fin)
        headers = reader.next()
        reader.next()
        reader.next()

        output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME)

        for count, record in enumerate(reader):

            if count >= numRecords: break

            # Convert data string into Python date object.
            dateString = datetime.datetime.strptime(record[0],
                                                    "%Y-%m-%d %H:%M:%S")
            # Convert data value string into float.
            Ct = float(record[1])
            ZIP_10467 = float(record[2])
            #      ZIP_10462 = float(record[3])
            #      ZIP_10475 = float(record[4])
            #      ZIP_10466 = float(record[5])
            #      ZIP_10469 = float(record[6])
            #      DEPT_11 = float(record[7])
            #      DEPT_24 = float(record[8])
            #      DEPT_41 = float(record[9])
            #      DEPT_34 = float(record[10])
            #      DEPT_31 = float(record[11])
            #      DEPT_60 = float(record[12])
            #      AGE_0_9 = float(record[13])
            #      AGE_10_19 = float(record[14])
            #      AGE_20_29 = float(record[15])
            #      AGE_30_39 = float(record[16])
            #      AGE_40_49 = float(record[17])
            #      AGE_50_59 = float(record[18])
            #      AGE_60_69 = float(record[19])
            #      AGE_70_79 = float(record[20])
            #      AGE_80_89 = float(record[21])
            #      AGE_90_99 = float(record[22])
            #      DIST_1_7 = float(record[23])
            #      DIST_8_14 = float(record[24])
            #      DIST_15_21 = float(record[25])
            #      DIST_22_28 = float(record[26])
            #      DIST_29_35 = float(record[27])
            #      DIST_36_42 = float(record[28])
            #      DIST_43_49 = float(record[29])
            #      DIST_50_56 = float(record[30])
            #      DIST_57_63 = float(record[31])
            #      DIST_64_70 = float(record[31])

            # To encode, we need to provide zero-filled numpy arrays for the encoders
            # to populate.
            timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
            weekendBits = numpy.zeros(weekendEncoder.getWidth())
            CtBits = numpy.zeros(CtEncoder.getWidth())
            ZIP_10467Bits = numpy.zeros(ZIP_10467Encoder.getWidth())
            #      ZIP_10462Bits = numpy.zeros(ZIP_10462Encoder.getWidth())
            #      ZIP_10475Bits = numpy.zeros(ZIP_10475Encoder.getWidth())
            #      ZIP_10466Bits = numpy.zeros(ZIP_10466Encoder.getWidth())
            #      ZIP_10469Bits = numpy.zeros(ZIP_10469Encoder.getWidth())
            #      DEPT_11Bits = numpy.zeros(DEPT_11Encoder.getWidth())
            #      DEPT_24Bits = numpy.zeros(DEPT_24Encoder.getWidth())
            #      DEPT_41Bits = numpy.zeros(DEPT_41Encoder.getWidth())
            #      DEPT_34Bits = numpy.zeros(DEPT_34Encoder.getWidth())
            #      DEPT_31Bits = numpy.zeros(DEPT_31Encoder.getWidth())
            #      DEPT_60Bits = numpy.zeros(DEPT_60Encoder.getWidth())
            #      AGE_0_9Bits = numpy.zeros(AGE_0_9Encoder.getWidth())
            #      AGE_10_19Bits = numpy.zeros(AGE_10_19Encoder.getWidth())
            #      AGE_20_29Bits = numpy.zeros(AGE_20_29Encoder.getWidth())
            #      AGE_30_39Bits = numpy.zeros(AGE_30_39Encoder.getWidth())
            #      AGE_40_49Bits = numpy.zeros(AGE_40_49Encoder.getWidth())
            #      AGE_50_59Bits = numpy.zeros(AGE_50_59Encoder.getWidth())
            #      AGE_60_69Bits = numpy.zeros(AGE_60_69Encoder.getWidth())
            #      AGE_70_79Bits = numpy.zeros(AGE_70_79Encoder.getWidth())
            #      AGE_80_89Bits = numpy.zeros(AGE_80_89Encoder.getWidth())
            #      AGE_90_99Bits = numpy.zeros(AGE_90_99Encoder.getWidth())
            #      DIST_1_7Bits = numpy.zeros(DIST_1_7Encoder.getWidth())
            #      DIST_8_14Bits = numpy.zeros(DIST_8_14Encoder.getWidth())
            #      DIST_15_21Bits = numpy.zeros(DIST_15_21Encoder.getWidth())
            #      DIST_22_28Bits = numpy.zeros(DIST_22_28Encoder.getWidth())
            #      DIST_29_35Bits = numpy.zeros(DIST_29_35Encoder.getWidth())
            #      DIST_36_42Bits = numpy.zeros(DIST_36_42Encoder.getWidth())
            #      DIST_43_49Bits = numpy.zeros(DIST_43_49Encoder.getWidth())
            #      DIST_50_56Bits = numpy.zeros(DIST_50_56Encoder.getWidth())
            #      DIST_57_63Bits = numpy.zeros(DIST_57_63Encoder.getWidth())
            #      DIST_64_70Bits = numpy.zeros(DIST_64_70Encoder.getWidth())

            # Now we call the encoders to create bit representations for each value.
            timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
            weekendEncoder.encodeIntoArray(dateString, weekendBits)
            CtEncoder.encodeIntoArray(Ct, CtBits)
            ZIP_10467Encoder.encodeIntoArray(ZIP_10467, ZIP_10467Bits)
            #      ZIP_10462Encoder.encodeIntoArray(ZIP_10462, ZIP_10462Bits)
            #      ZIP_10475Encoder.encodeIntoArray(ZIP_10475, ZIP_10475Bits)
            #      ZIP_10466Encoder.encodeIntoArray(ZIP_10466, ZIP_10466Bits)
            #      ZIP_10469Encoder.encodeIntoArray(ZIP_10469, ZIP_10469Bits)
            #      DEPT_11Encoder.encodeIntoArray(DEPT_11, DEPT_11Bits)
            #      DEPT_24Encoder.encodeIntoArray(DEPT_24, DEPT_24Bits)
            #      DEPT_41Encoder.encodeIntoArray(DEPT_41, DEPT_41Bits)
            #      DEPT_34Encoder.encodeIntoArray(DEPT_34, DEPT_34Bits)
            #      DEPT_31Encoder.encodeIntoArray(DEPT_31, DEPT_31Bits)
            #      DEPT_60Encoder.encodeIntoArray(DEPT_60, DEPT_60Bits)
            #      AGE_0_9Encoder.encodeIntoArray(AGE_0_9, AGE_0_9Bits)
            #      AGE_10_19Encoder.encodeIntoArray(AGE_10_19, AGE_10_19Bits)
            #      AGE_20_29Encoder.encodeIntoArray(AGE_20_29, AGE_20_29Bits)
            #      AGE_30_39Encoder.encodeIntoArray(AGE_30_39, AGE_30_39Bits)
            #      AGE_40_49Encoder.encodeIntoArray(AGE_40_49, AGE_40_49Bits)
            #      AGE_50_59Encoder.encodeIntoArray(AGE_50_59, AGE_50_59Bits)
            #      AGE_60_69Encoder.encodeIntoArray(AGE_60_69, AGE_60_69Bits)
            #      AGE_70_79Encoder.encodeIntoArray(AGE_70_79, AGE_70_79Bits)
            #      AGE_80_89Encoder.encodeIntoArray(AGE_80_89, AGE_80_89Bits)
            #      AGE_90_99Encoder.encodeIntoArray(AGE_90_99, AGE_90_99Bits)
            #      DIST_1_7Encoder.encodeIntoArray(DIST_1_7, DIST_1_7Bits)
            #      DIST_8_14Encoder.encodeIntoArray(DIST_8_14, DIST_8_14Bits)
            #      DIST_15_21Encoder.encodeIntoArray(DIST_15_21, DIST_15_21Bits)
            #      DIST_22_28Encoder.encodeIntoArray(DIST_22_28, DIST_22_28Bits)
            #      DIST_29_35Encoder.encodeIntoArray(DIST_29_35, DIST_29_35Bits)
            #      DIST_36_42Encoder.encodeIntoArray(DIST_36_42, DIST_36_42Bits)
            #      DIST_43_49Encoder.encodeIntoArray(DIST_43_49, DIST_43_49Bits)
            #      DIST_50_56Encoder.encodeIntoArray(DIST_50_56, DIST_50_56Bits)
            #      DIST_57_63Encoder.encodeIntoArray(DIST_57_63, DIST_57_63Bits)
            #      DIST_64_70Encoder.encodeIntoArray(DIST_64_70, DIST_64_70Bits)
            # Concatenate all these encodings into one large encoding for Spatial
            # Pooling.
            encoding = numpy.concatenate(
                [timeOfDayBits, weekendBits, CtBits, ZIP_10467Bits])
            #      encoding = numpy.concatenate(
            #        [timeOfDayBits, weekendBits, CtBits,
            #         ZIP_10467Bits, ZIP_10462Bits, ZIP_10475Bits, ZIP_10466Bits, ZIP_10469Bits,
            #         DEPT_11Bits, DEPT_24Bits, DEPT_41Bits, DEPT_34Bits, DEPT_31Bits,
            #         DEPT_60Bits, AGE_0_9Bits, AGE_10_19Bits, AGE_20_29Bits, AGE_30_39Bits,
            #         AGE_40_49Bits, AGE_50_59Bits, AGE_60_69Bits, AGE_70_79Bits, AGE_80_89Bits,
            #         AGE_90_99Bits, DIST_1_7Bits, DIST_8_14Bits, DIST_15_21Bits, DIST_22_28Bits,
            #         DIST_29_35Bits, DIST_36_42Bits, DIST_43_49Bits, DIST_50_56Bits, DIST_57_63Bits,
            #         DIST_64_70Bits])

            # Create an array to represent active columns, all initially zero. This
            # will be populated by the compute method below. It must have the same
            # dimensions as the Spatial Pooler.
            activeColumns = numpy.zeros(spParams["columnCount"])

            # Execute Spatial Pooling algorithm over input space.
            sp.compute(encoding, True, activeColumns)
            activeColumnIndices = numpy.nonzero(activeColumns)[0]

            # Execute Temporal Memory algorithm over active mini-columns.
            tm.compute(activeColumnIndices, learn=True)

            activeCells = tm.getActiveCells()

            # Get the bucket info for this input value for classification.
            bucketIdx = CtEncoder.getBucketIndices(Ct)[0]

            # Run classifier to translate active cells back to scalar value.
            classifierResult = classifier.compute(recordNum=count,
                                                  patternNZ=activeCells,
                                                  classification={
                                                      "bucketIdx": bucketIdx,
                                                      "actValue": Ct
                                                  },
                                                  learn=True,
                                                  infer=True)

            # Print the best prediction for 1 step out.
            oneStepConfidence, oneStep = sorted(zip(
                classifierResult[1], classifierResult["actualValues"]),
                                                reverse=True)[0]
            # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
            #      results.append([oneStep, oneStepConfidence * 100, None, None])
            results.append([record[0], Ct, oneStep, oneStepConfidence * 100])
            output.write(record[0], Ct, oneStep, oneStepConfidence * 100)

        output.close()
        return results
Exemple #16
0
class BaseNetwork(object):
  def __init__(self, inputMin=None, inputMax=None, runSanity=False):

    self.inputMin = inputMin
    self.inputMax = inputMax
    self.runSanity = runSanity

    self.encoder = None
    self.encoderOutput = None
    self.sp = None
    self.spOutput = None
    self.spOutputNZ = None
    self.tm = None
    self.anomalyScore = None
    if runSanity:
      self.sanity = None

    self.defaultEncoderResolution = 0.0001
    self.numColumns = 2048
    self.cellsPerColumn = 32

    self.predictedActiveCells = None
    self.previouslyPredictiveCells = None


  def initialize(self):

    # Scalar Encoder
    resolution = self.getEncoderResolution()
    self.encoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32)

    # Spatial Pooler
    spInputWidth = self.encoder.getWidth()
    self.spParams = {
      "globalInhibition": True,
      "columnDimensions": [self.numColumns],
      "inputDimensions": [spInputWidth],
      "potentialRadius": spInputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 5.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    }
    self.sp = SpatialPooler(**self.spParams)
    self.spOutput = np.zeros(self.numColumns, dtype=np.uint32)

    # Temporal Memory
    self.tmParams = {
      "activationThreshold": 20,
      "cellsPerColumn": self.cellsPerColumn,
      "columnDimensions": (self.numColumns,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    }
    self.tm = TemporalMemory(**self.tmParams)

    # Sanity
    if self.runSanity:
      self.sanity = sanity.SPTMInstance(self.sp, self.tm)


  def handleRecord(self, scalarValue, label=None, skipEncoding=False,
                   learningMode=True):
    """Process one record."""

    if self.runSanity:
      self.sanity.waitForUserContinue()

    # Encode the input data record if it hasn't already been encoded.
    if not skipEncoding:
      self.encodeValue(scalarValue)

    # Run the encoded data through the spatial pooler
    self.sp.compute(self.encoderOutput, learningMode, self.spOutput)
    self.spOutputNZ = self.spOutput.nonzero()[0]

    # WARNING: this needs to happen here, before the TM runs.
    self.previouslyPredictiveCells = self.tm.getPredictiveCells()

    # Run SP output through temporal memory
    self.tm.compute(self.spOutputNZ)
    self.predictedActiveCells = _computePredictedActiveCells(
      self.tm.getActiveCells(), self.previouslyPredictiveCells)

    # Anomaly score
    self.anomalyScore = _computeAnomalyScore(self.spOutputNZ,
                                             self.previouslyPredictiveCells,
                                             self.cellsPerColumn)

    # Run Sanity
    if self.runSanity:
      self.sanity.appendTimestep(self.getEncoderOutputNZ(),
                                 self.getSpOutputNZ(),
                                 self.previouslyPredictiveCells,
                                 {
                                   'value': scalarValue,
                                   'label':label
                                   })


  def encodeValue(self, scalarValue):
    self.encoder.encodeIntoArray(scalarValue, self.encoderOutput)


  def getEncoderResolution(self):
    """
    Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's 
    calculated from the data min and max, specific to the data stream.
    """
    if self.inputMin is None or self.inputMax is None:
      return self.defaultEncoderResolution
    else:
      rangePadding = abs(self.inputMax - self.inputMin) * 0.2
      minVal = self.inputMin - rangePadding
      maxVal = (self.inputMax + rangePadding
                if self.inputMin != self.inputMax
                else self.inputMin + 1)
      numBuckets = 130.0
      return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets)


  def getEncoderOutputNZ(self):
    return self.encoderOutput.nonzero()[0]


  def getSpOutputNZ(self):
    return self.spOutputNZ


  def getTmPredictiveCellsNZ(self):
    return self.tm.getPredictiveCells()


  def getTmActiveCellsNZ(self):
    return self.tm.getActiveCells()


  def getTmPredictedActiveCellsNZ(self):
    return self.predictedActiveCells


  def getRawAnomalyScore(self):
    return self.anomalyScore
Exemple #17
0
def go():
    valueEncoder = RandomDistributedScalarEncoder(resolution=0.88, seed=42)
    timestampEncoder = DateEncoder(timeOfDay=(
        21,
        9.49,
    ))

    inputWidth = timestampEncoder.getWidth() + valueEncoder.getWidth()

    sp = SpatialPooler(
        **{
            "globalInhibition": True,
            "columnDimensions": [2048],
            "inputDimensions": [inputWidth],
            "potentialRadius": inputWidth,
            "numActiveColumnsPerInhArea": 40,
            "seed": 1956,
            "potentialPct": 0.8,
            "boostStrength": 0.0,
            "synPermActiveInc": 0.003,
            "synPermConnected": 0.2,
            "synPermInactiveDec": 0.0005,
        })

    tm = TemporalMemory(
        **{
            "activationThreshold": 20,
            "cellsPerColumn": 32,
            "columnDimensions": (2048, ),
            "initialPermanence": 0.24,
            "maxSegmentsPerCell": 128,
            "maxSynapsesPerSegment": 128,
            "minThreshold": 13,
            "maxNewSynapseCount": 31,
            "permanenceDecrement": 0.008,
            "permanenceIncrement": 0.04,
            "seed": 1961,
        })

    inputPath = os.path.join(os.path.dirname(__file__),
                             "data/rec-center-hourly.csv")
    inputFile = open(inputPath, "rb")
    csvReader = csv.reader(inputFile)
    csvReader.next()
    csvReader.next()
    csvReader.next()

    encodedValue = np.zeros(valueEncoder.getWidth(), dtype=np.uint32)
    encodedTimestamp = np.zeros(timestampEncoder.getWidth(), dtype=np.uint32)
    spOutput = np.zeros(2048, dtype=np.float32)

    sanityInstance = sanity.SPTMInstance(sp, tm)

    for timestampStr, consumptionStr in csvReader:

        sanityInstance.waitForUserContinue()

        timestamp = datetime.datetime.strptime(timestampStr, "%m/%d/%y %H:%M")
        consumption = float(consumptionStr)

        timestampEncoder.encodeIntoArray(timestamp, encodedTimestamp)
        valueEncoder.encodeIntoArray(consumption, encodedValue)

        sensoryInput = np.concatenate((
            encodedTimestamp,
            encodedValue,
        ))
        sp.compute(sensoryInput, True, spOutput)

        activeColumns = np.flatnonzero(spOutput)
        predictedCells = tm.getPredictiveCells()
        tm.compute(activeColumns)

        activeInputBits = np.flatnonzero(sensoryInput)
        displayText = {
            "timestamp": timestampStr,
            "consumption": consumptionStr
        }

        sanityInstance.appendTimestep(activeInputBits, activeColumns,
                                      predictedCells, displayText)
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth())

  sp = SpatialPooler(
    inputDimensions=(encodingWidth,),
    columnDimensions=(spParams["columnCount"],),
    potentialPct=spParams["potentialPct"],
    potentialRadius=encodingWidth,
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    boostStrength=spParams["boostStrength"],
    seed=spParams["seed"],
    wrapAround=True
  )

  tm = TemporalMemory(
    columnDimensions=(tmParams["columnCount"],),
    cellsPerColumn=tmParams["cellsPerColumn"],
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    connectedPermanence=spParams["synPermConnected"],
    minThreshold=tmParams["minThreshold"],
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders to create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
      results.append([oneStep, oneStepConfidence * 100, None, None])

    return results
def runHotgym(numRecords):
    with open(_PARAMS_PATH, "r") as f:
        modelParams = yaml.safe_load(f)["modelParams"]
        enParams = modelParams["sensorParams"]["encoders"]
        spParams = modelParams["spParams"]
        tmParams = modelParams["tmParams"]

    scalarEncoder = RandomDistributedScalarEncoder(
        enParams["consumption"]["resolution"])
    scalarEncoder2 = RandomDistributedScalarEncoder(
        enParams["consumption2"]["resolution"])

    encodingWidth = (scalarEncoder.getWidth() + scalarEncoder2.getWidth())

    sp = SpatialPooler(
        inputDimensions=(encodingWidth, ),
        columnDimensions=(spParams["columnCount"], ),
        potentialPct=spParams["potentialPct"],
        potentialRadius=encodingWidth,
        globalInhibition=spParams["globalInhibition"],
        localAreaDensity=spParams["localAreaDensity"],
        numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
        synPermInactiveDec=spParams["synPermInactiveDec"],
        synPermActiveInc=spParams["synPermActiveInc"],
        synPermConnected=spParams["synPermConnected"],
        boostStrength=spParams["boostStrength"],
        seed=spParams["seed"],
        wrapAround=True)

    tm = TemporalMemory(
        columnDimensions=(tmParams["columnCount"], ),
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=spParams["synPermConnected"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
        seed=tmParams["seed"])

    classifier = SDRClassifierFactory.create()
    results = []
    with open(_INPUT_FILE_PATH, "r") as fin:
        reader = csv.reader(fin)
        headers = reader.next()
        reader.next()
        reader.next()

        output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME)

        for count, record in enumerate(reader):

            if count >= numRecords: break

            # Convert data string into Python date object.
            #      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
            # Convert data value string into float.
            prediction = float(record[1])
            prediction2 = float(record[2])

            # To encode, we need to provide zero-filled numpy arrays for the encoders
            # to populate.
            consumptionBits = numpy.zeros(scalarEncoder.getWidth())
            consumptionBits2 = numpy.zeros(scalarEncoder2.getWidth())

            # Now we call the encoders to create bit representations for each value.
            scalarEncoder.encodeIntoArray(prediction, consumptionBits)
            scalarEncoder2.encodeIntoArray(prediction2, consumptionBits2)

            # Concatenate all these encodings into one large encoding for Spatial
            # Pooling.
            encoding = numpy.concatenate([consumptionBits, consumptionBits2])

            # Create an array to represent active columns, all initially zero. This
            # will be populated by the compute method below. It must have the same
            # dimensions as the Spatial Pooler.
            activeColumns = numpy.zeros(spParams["columnCount"])

            # Execute Spatial Pooling algorithm over input space.
            sp.compute(encoding, True, activeColumns)
            activeColumnIndices = numpy.nonzero(activeColumns)[0]

            # Execute Temporal Memory algorithm over active mini-columns.
            tm.compute(activeColumnIndices, learn=True)

            activeCells = tm.getActiveCells()

            # Get the bucket info for this input value for classification.
            bucketIdx = scalarEncoder.getBucketIndices(prediction)[0]

            # Run classifier to translate active cells back to scalar value.
            classifierResult = classifier.compute(recordNum=count,
                                                  patternNZ=activeCells,
                                                  classification={
                                                      "bucketIdx": bucketIdx,
                                                      "actValue": prediction
                                                  },
                                                  learn=True,
                                                  infer=True)

            # Print the best prediction for 1 step out.
            oneStepConfidence, oneStep = sorted(zip(
                classifierResult[1], classifierResult["actualValues"]),
                                                reverse=True)[0]
            # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
            #      results.append([oneStep, oneStepConfidence * 100, None, None])
            results.append(
                [record[0], prediction, oneStep, oneStepConfidence * 100])
            output.write(record[0], prediction, oneStep,
                         oneStepConfidence * 100)

        output.close()
        return results
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth())

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(spParams["columnCount"]),
    # What percent of the columns"s receptive field is available for potential
    # synapses?
    potentialPct=spParams["potentialPct"],
    # This means that the input space has no topology.
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    # How quickly synapses grow and degrade.
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=spParams["boostStrength"],
    # Random number generator seed.
    seed=spParams["seed"],
    # TODO: is this useful?
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(tmParams["columnCount"],),
    # How many cells in each mini-column.
    cellsPerColumn=tmParams["cellsPerColumn"],
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    # TODO: This comes from the SP params, is this normal
    connectedPermanence=spParams["synPermConnected"],
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=tmParams["minThreshold"],
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
      results.append([oneStep, oneStepConfidence * 100, None, None])

    return results