Exemple #1
0
    def testResolution(self):
        """
    Test that numbers within the same resolution return the same encoding.
    Numbers outside the resolution should return different encodings.
    """
        encoder = RandomDistributedScalarEncoder(name="encoder",
                                                 resolution=1.0)

        # Since 23.0 is the first encoded number, it will be the offset.
        # Since resolution is 1, 22.9 and 23.4 should have the same bucket index and
        # encoding.
        e23 = encoder.encode(23.0)
        e23p1 = encoder.encode(23.1)
        e22p9 = encoder.encode(22.9)
        e24 = encoder.encode(24.0)
        self.assertEqual(e23.sum(), encoder.w)
        self.assertEqual(
            (e23 == e23p1).sum(), encoder.getWidth(),
            "Numbers within resolution don't have the same encoding")
        self.assertEqual(
            (e23 == e22p9).sum(), encoder.getWidth(),
            "Numbers within resolution don't have the same encoding")
        self.assertNotEqual(
            (e23 == e24).sum(), encoder.getWidth(),
            "Numbers outside resolution have the same encoding")

        e22p9 = encoder.encode(22.5)
        self.assertNotEqual(
            (e23 == e22p9).sum(), encoder.getWidth(),
            "Numbers outside resolution have the same encoding")
Exemple #2
0
class RDSEEncoder():
    def __init__(self, resolution=.5):
        """Create the encoder instance for our test and return it."""
        self.resolution = resolution
        self.series_encoder = RandomDistributedScalarEncoder(
            self.resolution, name="RDSE-(res={})".format(self.resolution))
        self.encoder = MultiEncoder()
        self.encoder.addEncoder("series", self.series_encoder)
        self.last_m_encode = np.zeros(1)

    def get_encoder(self):
        return self.encoder

    def get_resolution(self):
        return self.resolution

    def m_encode(self, inputData):
        self.last_m_encode = self.encoder.encode(inputData)
        return self.last_m_encode

    def m_overlap(self, inputData):
        temp = self.last_m_encode
        self.last_m_encode = self.encoder.encode(inputData)
        return numpy.sum(numpy.multiply(self.last_m_encode, temp))

    def r_encode(self, inputData):
        return self.series_encoder.encode(inputData)

    def r_overlap(self, inputA, inputB):
        return numpy.sum(
            numpy.multiply(self.series_encoder.encode(inputA),
                           self.series_encoder.encode(inputB)))
Exemple #3
0
    def initialize(self):
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = (self.inputMax + rangePadding
                  if self.inputMin != self.inputMax else self.inputMin + 1)
        numBuckets = 130.0
        resolution = max(0.001, (maxVal - minVal) / numBuckets)
        self.valueEncoder = RandomDistributedScalarEncoder(resolution,
                                                           w=41,
                                                           seed=42)
        self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                     dtype=np.uint32)

        self.timestampEncoder = DateEncoder(timeOfDay=(
            21,
            9.49,
        ))
        self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                         dtype=np.uint32)

        inputWidth = self.valueEncoder.getWidth()

        self.sp = SpatialPooler(
            **{
                "globalInhibition": True,
                "columnDimensions": [2048],
                "inputDimensions": [inputWidth],
                "potentialRadius": inputWidth,
                "numActiveColumnsPerInhArea": 40,
                "seed": 1956,
                "potentialPct": 0.8,
                "boostStrength": 0.0,
                "synPermActiveInc": 0.003,
                "synPermConnected": 0.2,
                "synPermInactiveDec": 0.0005,
            })
        self.spOutput = np.zeros(2048, dtype=np.float32)

        self.etm = ExtendedTemporalMemory(
            **{
                "activationThreshold": 13,
                "cellsPerColumn": 1,
                "columnDimensions": (2048, ),
                "basalInputDimensions": (self.timestampEncoder.getWidth(), ),
                "initialPermanence": 0.21,
                "maxSegmentsPerCell": 128,
                "maxSynapsesPerSegment": 32,
                "minThreshold": 10,
                "maxNewSynapseCount": 20,
                "permanenceDecrement": 0.1,
                "permanenceIncrement": 0.1,
                "seed": 1960,
                "checkInputs": False,
            })

        learningPeriod = math.floor(self.probationaryPeriod / 2.0)
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            claLearningPeriod=learningPeriod,
            estimationSamples=self.probationaryPeriod - learningPeriod,
            reestimationPeriod=100)
Exemple #4
0
 def __init__(self, resolution=.5):
     """Create the encoder instance for our test and return it."""
     self.resolution = resolution
     self.series_encoder = RandomDistributedScalarEncoder(
         self.resolution, name="RDSE-(res={})".format(self.resolution))
     self.encoder = MultiEncoder()
     self.encoder.addEncoder("series", self.series_encoder)
     self.last_m_encode = np.zeros(1)
  def testMissingValues(self):
    """
    Test that missing values and NaN return all zero's.
    """
    encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0)
    empty = encoder.encode(SENTINEL_VALUE_FOR_MISSING_DATA)
    self.assertEqual(empty.sum(), 0)

    empty = encoder.encode(float("nan"))
    self.assertEqual(empty.sum(), 0)
    def testResolution(self):
        """
    Test that numbers within the same resolution return the same encoding.
    Numbers outside the resolution should return different encodings.
    """
        encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0)

        # Since 23.0 is the first encoded number, it will be the offset.
        # Since resolution is 1, 22.9 and 23.4 should have the same bucket index and
        # encoding.
        e23 = encoder.encode(23.0)
        e23p1 = encoder.encode(23.1)
        e22p9 = encoder.encode(22.9)
        e24 = encoder.encode(24.0)
        self.assertEqual(e23.sum(), encoder.w)
        self.assertEqual(
            (e23 == e23p1).sum(), encoder.getWidth(), "Numbers within resolution don't have the same encoding"
        )
        self.assertEqual(
            (e23 == e22p9).sum(), encoder.getWidth(), "Numbers within resolution don't have the same encoding"
        )
        self.assertNotEqual((e23 == e24).sum(), encoder.getWidth(), "Numbers outside resolution have the same encoding")

        e22p9 = encoder.encode(22.5)
        self.assertNotEqual(
            (e23 == e22p9).sum(), encoder.getWidth(), "Numbers outside resolution have the same encoding"
        )
Exemple #7
0
def definir_encoders():
    
    """ 
    retorna o SIZE_ENCODER_, scalar_2_encoder, scalar_1_encoder, scalar_3_encoder, bits_scalar_1, bits_scalar_2, bits_scalar_3
    """  
    ###  A RESOLUCAO DOS 3 TINHA QUE SER 2.30 # TROCAR DEPOIS
    
    scalar_1_encoder = RandomDistributedScalarEncoder(resolution = 15.384615384615385,
                                                    seed = 42,
                                                    )

    #two inputs separated by less than the 'resolution' will have the same encoder output.
    scalar_2_encoder = RandomDistributedScalarEncoder(resolution = 15.384615384615385,
                                                    seed = 53)

    scalar_3_encoder = RandomDistributedScalarEncoder(resolution = 15.384615384615385,
                                                    seed = 21)

    #7 = how much bits represent one input
    #0.25 = radius = if an input ir greater than the radius in comparisson with anoter ..
    #they won't overlapp 

    bits_scalar_1 = np.zeros(scalar_1_encoder.getWidth())
    bits_scalar_2 = np.zeros(scalar_2_encoder.getWidth())
    bits_scalar_3 = np.zeros(scalar_3_encoder.getWidth())



    SIZE_ENCODER_ = np.size(bits_scalar_1) + np.size(bits_scalar_2) + np.size(bits_scalar_3)

    return SIZE_ENCODER_, scalar_2_encoder, scalar_1_encoder, scalar_3_encoder, bits_scalar_1, bits_scalar_2, bits_scalar_3
  def testWriteRead(self):
    original = RandomDistributedScalarEncoder(
        name="encoder", resolution=1.0, w=23, n=500, offset=0.0)

    originalValue = original.encode(1)

    proto1 = RandomDistributedScalarEncoderProto.new_message()
    original.write(proto1)

    # Write the proto to a temp file and read it back into a new proto
    with tempfile.TemporaryFile() as f:
      proto1.write(f)
      f.seek(0)
      proto2 = RandomDistributedScalarEncoderProto.read(f)

    encoder = RandomDistributedScalarEncoder.read(proto2)

    self.assertIsInstance(encoder, RandomDistributedScalarEncoder)
    self.assertEqual(encoder.resolution, original.resolution)
    self.assertEqual(encoder.w, original.w)
    self.assertEqual(encoder.n, original.n)
    self.assertEqual(encoder.name, original.name)
    self.assertEqual(encoder.verbosity, original.verbosity)
    self.assertEqual(encoder.minIndex, original.minIndex)
    self.assertEqual(encoder.maxIndex, original.maxIndex)
    encodedFromOriginal = original.encode(1)
    encodedFromNew = encoder.encode(1)
    self.assertTrue(numpy.array_equal(encodedFromNew, originalValue))
    self.assertEqual(original.decode(encodedFromNew),
                     encoder.decode(encodedFromOriginal))
    self.assertEqual(original.random.getSeed(), encoder.random.getSeed())

    for key, value in original.bucketMap.items():
      self.assertTrue(numpy.array_equal(value, encoder.bucketMap[key]))
Exemple #9
0
    def setUp(self):
        self.tmPy = TemporalMemoryPy(columnDimensions=[2048],
                                     cellsPerColumn=32,
                                     initialPermanence=0.5,
                                     connectedPermanence=0.8,
                                     minThreshold=10,
                                     maxNewSynapseCount=12,
                                     permanenceIncrement=0.1,
                                     permanenceDecrement=0.05,
                                     activationThreshold=15)

        self.tmCPP = TemporalMemoryCPP(columnDimensions=[2048],
                                       cellsPerColumn=32,
                                       initialPermanence=0.5,
                                       connectedPermanence=0.8,
                                       minThreshold=10,
                                       maxNewSynapseCount=12,
                                       permanenceIncrement=0.1,
                                       permanenceDecrement=0.05,
                                       activationThreshold=15)

        self.tp = TP(numberOfCols=2048,
                     cellsPerColumn=32,
                     initialPerm=0.5,
                     connectedPerm=0.8,
                     minThreshold=10,
                     newSynapseCount=12,
                     permanenceInc=0.1,
                     permanenceDec=0.05,
                     activationThreshold=15,
                     globalDecay=0,
                     burnIn=1,
                     checkSynapseConsistency=False,
                     pamLength=1)

        self.tp10x2 = TP10X2(numberOfCols=2048,
                             cellsPerColumn=32,
                             initialPerm=0.5,
                             connectedPerm=0.8,
                             minThreshold=10,
                             newSynapseCount=12,
                             permanenceInc=0.1,
                             permanenceDec=0.05,
                             activationThreshold=15,
                             globalDecay=0,
                             burnIn=1,
                             checkSynapseConsistency=False,
                             pamLength=1)

        self.scalarEncoder = RandomDistributedScalarEncoder(0.88)
Exemple #10
0
    def testCountOverlap(self):
        """
    Test that the internal method _countOverlap works as expected.
    """
        enc = RandomDistributedScalarEncoder(name='enc', resolution=1.0, n=500)

        r1 = numpy.array([1, 2, 3, 4, 5, 6])
        r2 = numpy.array([1, 2, 3, 4, 5, 6])
        self.assertEqual(enc._countOverlap(r1, r2), 6,
                         "_countOverlap result is incorrect")

        r1 = numpy.array([1, 2, 3, 4, 5, 6])
        r2 = numpy.array([1, 2, 3, 4, 5, 7])
        self.assertEqual(enc._countOverlap(r1, r2), 5,
                         "_countOverlap result is incorrect")

        r1 = numpy.array([1, 2, 3, 4, 5, 6])
        r2 = numpy.array([6, 5, 4, 3, 2, 1])
        self.assertEqual(enc._countOverlap(r1, r2), 6,
                         "_countOverlap result is incorrect")

        r1 = numpy.array([1, 2, 8, 4, 5, 6])
        r2 = numpy.array([1, 2, 3, 4, 9, 6])
        self.assertEqual(enc._countOverlap(r1, r2), 4,
                         "_countOverlap result is incorrect")

        r1 = numpy.array([1, 2, 3, 4, 5, 6])
        r2 = numpy.array([1, 2, 3])
        self.assertEqual(enc._countOverlap(r1, r2), 3,
                         "_countOverlap result is incorrect")

        r1 = numpy.array([7, 8, 9, 10, 11, 12])
        r2 = numpy.array([1, 2, 3, 4, 5, 6])
        self.assertEqual(enc._countOverlap(r1, r2), 0,
                         "_countOverlap result is incorrect")
 def _generateSequence():
   scalarEncoder = RandomDistributedScalarEncoder(0.88)
   sequence = []
   with open (_INPUT_FILE_PATH) as fin:
     reader = csv.reader(fin)
     reader.next()
     reader.next()
     reader.next()
     for _ in xrange(NUM_PATTERNS):
       record = reader.next()
       value = float(record[1])
       encodedValue = scalarEncoder.encode(value)
       activeBits = set(encodedValue.nonzero()[0])
       sequence.append(activeBits)
   return sequence
Exemple #12
0
def profileEnc(maxValue, nRuns):
  minV=0
  maxV=nRuns
  # generate input data
  data=numpy.random.randint(minV, maxV+1, nRuns)
  # instantiate measured encoders
  encScalar = ScalarEncoder(w=21, minval=minV, maxval=maxV, resolution=1)
  encRDSE = RDSE(resolution=1)
  
  # profile!  
  for d in data:
    encScalar.encode(d)
    encRDSE.encode(d)

  print "Scalar n=",encScalar.n," RDSE n=",encRDSE.n
  def testGetMethods(self):
    """
    Test that the getWidth, getDescription, and getDecoderOutputFieldTypes
    methods work.
    """
    enc = RandomDistributedScalarEncoder(name='theName', resolution=1.0, n=500)
    self.assertEqual(enc.getWidth(), 500,
                     "getWidth doesn't return the correct result")

    self.assertEqual(enc.getDescription(), [('theName', 0)],
                     "getDescription doesn't return the correct result")

    self.assertEqual(enc.getDecoderOutputFieldTypes(),
                (FieldMetaType.float, ),
                "getDecoderOutputFieldTypes doesn't return the correct result")
Exemple #14
0
def profileEnc(maxValue, nRuns):
    minV = 0
    maxV = nRuns
    # generate input data
    data = numpy.random.randint(minV, maxV + 1, nRuns)
    # instantiate measured encoders
    encScalar = ScalarEncoder(w=21, minval=minV, maxval=maxV, resolution=1)
    encRDSE = RDSE(resolution=1)

    # profile!
    for d in data:
        encScalar.encode(d)
        encRDSE.encode(d)

    print("Scalar n=", encScalar.n, " RDSE n=", encRDSE.n)
 def _generateSequence():
     scalarEncoder = RandomDistributedScalarEncoder(0.88)
     sequence = []
     with open(_INPUT_FILE_PATH) as fin:
         reader = csv.reader(fin)
         reader.next()
         reader.next()
         reader.next()
         for _ in xrange(NUM_PATTERNS):
             record = reader.next()
             value = float(record[1])
             encodedValue = scalarEncoder.encode(value)
             activeBits = set(encodedValue.nonzero()[0])
             sequence.append(activeBits)
     return sequence
def smart_encode(data_fl):
    encoder_list = []
    for i in data_fl.columns:
        if data_fl[i].dtype == 'M8[ns]':
            time_delta = data_fl[i][1] - data_fl[i][0]
            if time_delta >= pd.Timedelta(1, unit='M'):
                encoder_list += [[DateEncoder(season=(5, 1))]]
            elif time_delta >= pd.Timedelta(1, unit='D'):
                encoder_list += [[
                    DateEncoder(season=(21)),
                    DateEncoder(dayOfWeek=(21, 1)),
                    DateEncoder(weekend=5)
                ]]
            else:
                encoder_list += [[
                    DateEncoder(season=(5, 1)),
                    DateEncoder(dayOfWeek=(5, 1)),
                    DateEncoder(weekend=5),
                    DateEncoder(timeOfDay=(5, 1))
                ]]
        if data_fl[i].dtype == "float":
            col_range = data_fl[i].max() - data_fl[i].min()
            res = col_range / (400 - 21)
            encoder_list += [[RandomDistributedScalarEncoder(res)]]
    return encoder_list
  def testGetMethods(self):
    """
    Test that the getWidth, getDescription, and getDecoderOutputFieldTypes
    methods work.
    """
    encoder = RandomDistributedScalarEncoder(name="theName", resolution=1.0, n=500)
    self.assertEqual(encoder.getWidth(), 500,
                     "getWidth doesn't return the correct result")

    self.assertEqual(encoder.getDescription(), [("theName", 0)],
                     "getDescription doesn't return the correct result")

    self.assertEqual(encoder.getDecoderOutputFieldTypes(),
                     (FieldMetaType.float, ),
                     "getDecoderOutputFieldTypes doesn't return the correct"
                     " result")
  def initialize(self):

    # Initialize the RDSE with a resolution; calculated from the data min and
    # max, the resolution is specific to the data stream.
    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
    minVal = self.inputMin - rangePadding
    maxVal = (self.inputMax + rangePadding
              if self.inputMin != self.inputMax
              else self.inputMin + 1)
    numBuckets = 130.0
    resolution = max(0.001, (maxVal - minVal) / numBuckets)
    self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                 dtype=np.uint32)

    # Initialize the timestamp encoder
    self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, ))
    self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                     dtype=np.uint32)

    inputWidth = (self.timestampEncoder.getWidth() +
                  self.valueEncoder.getWidth())

    self.sp = SpatialPooler(**{
      "globalInhibition": True,
      "columnDimensions": [2048],
      "inputDimensions": [inputWidth],
      "potentialRadius": inputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    })
    self.spOutput = np.zeros(2048, dtype=np.float32)

    self.tm = TemporalMemory(**{
      "activationThreshold": 20,
      "cellsPerColumn": 32,
      "columnDimensions": (2048,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    })

    if self.useLikelihood:
      learningPeriod = math.floor(self.probationaryPeriod / 2.0)
      self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
        claLearningPeriod=learningPeriod,
        estimationSamples=self.probationaryPeriod - learningPeriod,
        reestimationPeriod=100
      )
def createEncoder(rdse_resolution):
    """Create the encoder instance for our test and return it."""
    series_rdse = RandomDistributedScalarEncoder(
        rdse_resolution,
        name="rdse with resolution {}".format(rdse_resolution))
    encoder = MultiEncoder()
    encoder.addEncoder("series", series_rdse)
    return encoder
Exemple #20
0
    def runHotgym(self, cellsPerColumn, repetitions=1):
        scalarEncoder = RandomDistributedScalarEncoder(0.88, n=2048, w=41)

        instances = self._createInstances(cellsPerColumn=cellsPerColumn)
        times = [0.0] * len(self.contestants)

        t = 0
        duration = HOTGYM_LENGTH * repetitions

        for _ in xrange(repetitions):
            with open(HOTGYM_PATH) as fin:
                reader = csv.reader(fin)
                reader.next()
                reader.next()
                reader.next()

                encodedValue = numpy.zeros(2048, dtype=numpy.int32)

                for timeStr, valueStr in reader:
                    value = float(valueStr)
                    scalarEncoder.encodeIntoArray(value, output=encodedValue)
                    activeBits = encodedValue.nonzero()[0]

                    for i in xrange(len(self.contestants)):
                        tmInstance = instances[i]
                        computeFn = self.contestants[i][2]

                        start = time.clock()
                        computeFn(tmInstance, encodedValue, activeBits)
                        times[i] += time.clock() - start

                    printProgressBar(t, duration, 50)
                    t += 1

        clearProgressBar(50)

        results = []
        for i in xrange(len(self.contestants)):
            name = self.contestants[i][3]
            results.append((
                name,
                times[i],
            ))

        return results
  def testCountOverlap(self):
    """
    Test that the internal method _countOverlap works as expected.
    """
    enc = RandomDistributedScalarEncoder(name='enc', resolution=1.0, n=500)

    r1 = numpy.array([1, 2, 3, 4, 5, 6])
    r2 = numpy.array([1, 2, 3, 4, 5, 6])
    self.assertEqual(enc._countOverlap(r1, r2), 6,
                     "_countOverlap result is incorrect")

    r1 = numpy.array([1, 2, 3, 4, 5, 6])
    r2 = numpy.array([1, 2, 3, 4, 5, 7])
    self.assertEqual(enc._countOverlap(r1, r2), 5,
                     "_countOverlap result is incorrect")

    r1 = numpy.array([1, 2, 3, 4, 5, 6])
    r2 = numpy.array([6, 5, 4, 3, 2, 1])
    self.assertEqual(enc._countOverlap(r1, r2), 6,
                     "_countOverlap result is incorrect")

    r1 = numpy.array([1, 2, 8, 4, 5, 6])
    r2 = numpy.array([1, 2, 3, 4, 9, 6])
    self.assertEqual(enc._countOverlap(r1, r2), 4,
                     "_countOverlap result is incorrect")

    r1 = numpy.array([1, 2, 3, 4, 5, 6])
    r2 = numpy.array([1, 2, 3])
    self.assertEqual(enc._countOverlap(r1, r2), 3,
                     "_countOverlap result is incorrect")

    r1 = numpy.array([7, 8, 9, 10, 11, 12])
    r2 = numpy.array([1, 2, 3, 4, 5, 6])
    self.assertEqual(enc._countOverlap(r1, r2), 0,
                     "_countOverlap result is incorrect")
Exemple #22
0
    def runSimpleSequence(self, resets, repetitions=1):
        scalarEncoder = RandomDistributedScalarEncoder(0.88, n=2048, w=41)

        instances = self._createInstances(cellsPerColumn=32)
        times = [0.0] * len(self.contestants)

        duration = 10000 * repetitions
        increment = 4
        sequenceLength = 25
        sequence = (i % (sequenceLength * 4)
                    for i in xrange(0, duration * increment, increment))
        t = 0

        encodedValue = numpy.zeros(2048, dtype=numpy.int32)

        for value in sequence:
            scalarEncoder.encodeIntoArray(value, output=encodedValue)
            activeBits = encodedValue.nonzero()[0]

            for i in xrange(len(self.contestants)):
                tmInstance = instances[i]
                computeFn = self.contestants[i][2]

                if resets:
                    if value == 0:
                        tmInstance.reset()

                start = time.clock()
                computeFn(tmInstance, encodedValue, activeBits)
                times[i] += time.clock() - start

            printProgressBar(t, duration, 50)
            t += 1

        clearProgressBar(50)

        results = []
        for i in xrange(len(self.contestants)):
            name = self.contestants[i][3]
            results.append((
                name,
                times[i],
            ))

        return results
Exemple #23
0
  def initialize(self):

    # Scalar Encoder
    resolution = self.getEncoderResolution()
    self.encoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32)

    # Spatial Pooler
    spInputWidth = self.encoder.getWidth()
    self.spParams = {
      "globalInhibition": True,
      "columnDimensions": [self.numColumns],
      "inputDimensions": [spInputWidth],
      "potentialRadius": spInputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    }
    self.sp = SpatialPooler(**self.spParams)
    self.spOutput = np.zeros(self.numColumns, dtype=np.uint32)

    # Temporal Memory
    self.tmParams = {
      "activationThreshold": 20,
      "cellsPerColumn": self.cellsPerColumn,
      "columnDimensions": (self.numColumns,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    }
    self.tm = TemporalMemory(**self.tmParams)

    # Sanity
    if self.runSanity:
      self.sanity = sanity.SPTMInstance(self.sp, self.tm)
Exemple #24
0
    def testEncoding(self):
        """
    Test basic encoding functionality. Create encodings without crashing and
    check they contain the correct number of on and off bits. Check some
    encodings for expected overlap. Test that encodings for old values don't
    change once we generate new buckets.
    """
        # Initialize with non-default parameters and encode with a number close to
        # the offset
        encoder = RandomDistributedScalarEncoder(name="encoder",
                                                 resolution=1.0,
                                                 w=23,
                                                 n=500,
                                                 offset=0.0)
        e0 = encoder.encode(-0.1)

        self.assertEqual(e0.sum(), 23, "Number of on bits is incorrect")
        self.assertEqual(e0.size, 500, "Width of the vector is incorrect")
        self.assertEqual(
            encoder.getBucketIndices(0.0)[0], encoder._maxBuckets / 2,
            "Offset doesn't correspond to middle bucket")
        self.assertEqual(len(encoder.bucketMap), 1,
                         "Number of buckets is not 1")

        # Encode with a number that is resolution away from offset. Now we should
        # have two buckets and this encoding should be one bit away from e0
        e1 = encoder.encode(1.0)
        self.assertEqual(len(encoder.bucketMap), 2,
                         "Number of buckets is not 2")
        self.assertEqual(e1.sum(), 23, "Number of on bits is incorrect")
        self.assertEqual(e1.size, 500, "Width of the vector is incorrect")
        self.assertEqual(computeOverlap(e0, e1), 22,
                         "Overlap is not equal to w-1")

        # Encode with a number that is resolution*w away from offset. Now we should
        # have many buckets and this encoding should have very little overlap with
        # e0
        e25 = encoder.encode(25.0)
        self.assertGreater(len(encoder.bucketMap), 23,
                           "Number of buckets is not 2")
        self.assertEqual(e25.sum(), 23, "Number of on bits is incorrect")
        self.assertEqual(e25.size, 500, "Width of the vector is incorrect")
        self.assertLess(computeOverlap(e0, e25), 4, "Overlap is too high")

        # Test encoding consistency. The encodings for previous numbers
        # shouldn't change even though we have added additional buckets
        self.assertTrue(
            numpy.array_equal(e0, encoder.encode(-0.1)),
            "Encodings are not consistent - they have changed after new buckets "
            "have been created")
        self.assertTrue(
            numpy.array_equal(e1, encoder.encode(1.0)),
            "Encodings are not consistent - they have changed after new buckets "
            "have been created")
  def testParameterChecks(self):
    """
    Test that some bad construction parameters get handled.
    """
    # n must be >= 6*w
    with self.assertRaises(ValueError):
      RandomDistributedScalarEncoder(name="mv", resolution=1.0, n=int(5.9*21))

    # n must be an int
    with self.assertRaises(ValueError):
      RandomDistributedScalarEncoder(name="mv", resolution=1.0, n=5.9*21)

    # w can't be negative
    with self.assertRaises(ValueError):
      RandomDistributedScalarEncoder(name="mv", resolution=1.0, w=-1)

    # resolution can't be negative
    with self.assertRaises(ValueError):
      RandomDistributedScalarEncoder(name="mv", resolution=-2)
  def runHotgym(self, cellsPerColumn, repetitions=1):
    scalarEncoder = RandomDistributedScalarEncoder(0.88, n=2048, w=41)

    instances = self._createInstances(cellsPerColumn=cellsPerColumn)
    times = [0.0] * len(self.contestants)

    t = 0
    duration = HOTGYM_LENGTH * repetitions

    for _ in xrange(repetitions):
      with open(HOTGYM_PATH) as fin:
        reader = csv.reader(fin)
        reader.next()
        reader.next()
        reader.next()

        encodedValue = numpy.zeros(2048, dtype=numpy.uint32)

        for timeStr, valueStr in reader:
          value = float(valueStr)
          scalarEncoder.encodeIntoArray(value, output=encodedValue)
          activeBits = encodedValue.nonzero()[0]

          for i in xrange(len(self.contestants)):
            tmInstance = instances[i]
            computeFn = self.contestants[i][2]

            start = time.clock()
            computeFn(tmInstance, encodedValue, activeBits)
            times[i] += time.clock() - start

          printProgressBar(t, duration, 50)
          t += 1

    clearProgressBar(50)

    results = []
    for i in xrange(len(self.contestants)):
      name = self.contestants[i][3]
      results.append((name,
                      times[i],))

    return results
  def runSimpleSequence(self, resets, repetitions=1):
    scalarEncoder = RandomDistributedScalarEncoder(0.88, n=2048, w=41)

    instances = self._createInstances(cellsPerColumn=32)
    times = [0.0] * len(self.contestants)

    duration = 10000 * repetitions
    increment = 4
    sequenceLength = 25
    sequence = (i % (sequenceLength * 4)
                for i in xrange(0, duration * increment, increment))
    t = 0

    encodedValue = numpy.zeros(2048, dtype=numpy.int32)

    for value in sequence:
      scalarEncoder.encodeIntoArray(value, output=encodedValue)
      activeBits = encodedValue.nonzero()[0]

      for i in xrange(len(self.contestants)):
        tmInstance = instances[i]
        computeFn = self.contestants[i][2]

        if resets:
          if value == 0:
            tmInstance.reset()

        start = time.clock()
        computeFn(tmInstance, encodedValue, activeBits)
        times[i] += time.clock() - start

      printProgressBar(t, duration, 50)
      t += 1

    clearProgressBar(50)

    results = []
    for i in xrange(len(self.contestants)):
      name = self.contestants[i][3]
      results.append((name,
                      times[i],))

    return results
def calculateEncoderModelAccuracy(nBuckets, numCols, w, trainData, trainLabel):
    maxValue = np.max(trainData)
    minValue = np.min(trainData)

    resolution = (maxValue - minValue) / nBuckets
    encoder = RandomDistributedScalarEncoder(resolution, w=w, n=numCols)

    activeColumnsTrain = runEncoderOverDataset(encoder, trainData)
    distMatColumnTrain = calculateDistanceMatTrain(activeColumnsTrain)
    meanAccuracy, outcomeColumn = calculateAccuracy(distMatColumnTrain,
                                                    trainLabel, trainLabel)
    accuracyColumnOnly = np.mean(outcomeColumn)
    return accuracyColumnOnly
  def testOffset(self):
    """
    Test that offset is working properly
    """
    encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0)
    encoder.encode(23.0)
    self.assertEqual(encoder._offset, 23.0,
              "Offset not specified and not initialized to first input")

    encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0,
                                             offset=25.0)
    encoder.encode(23.0)
    self.assertEqual(encoder._offset, 25.0,
                     "Offset not initialized to specified constructor"
                     " parameter")
  def setUp(self):
    self.tmPy = TemporalMemoryPy(columnDimensions=[2048],
                                 cellsPerColumn=32,
                                 initialPermanence=0.5,
                                 connectedPermanence=0.8,
                                 minThreshold=10,
                                 maxNewSynapseCount=12,
                                 permanenceIncrement=0.1,
                                 permanenceDecrement=0.05,
                                 activationThreshold=15)

    self.tmCPP = TemporalMemoryCPP(columnDimensions=[2048],
                                   cellsPerColumn=32,
                                   initialPermanence=0.5,
                                   connectedPermanence=0.8,
                                   minThreshold=10,
                                   maxNewSynapseCount=12,
                                   permanenceIncrement=0.1,
                                   permanenceDecrement=0.05,
                                   activationThreshold=15)

    self.tp = TP(numberOfCols=2048,
                 cellsPerColumn=32,
                 initialPerm=0.5,
                 connectedPerm=0.8,
                 minThreshold=10,
                 newSynapseCount=12,
                 permanenceInc=0.1,
                 permanenceDec=0.05,
                 activationThreshold=15,
                 globalDecay=0, burnIn=1,
                 checkSynapseConsistency=False,
                 pamLength=1)

    self.tp10x2 = TP10X2(numberOfCols=2048,
                         cellsPerColumn=32,
                         initialPerm=0.5,
                         connectedPerm=0.8,
                         minThreshold=10,
                         newSynapseCount=12,
                         permanenceInc=0.1,
                         permanenceDec=0.05,
                         activationThreshold=15,
                         globalDecay=0, burnIn=1,
                         checkSynapseConsistency=False,
                         pamLength=1)

    self.scalarEncoder = RandomDistributedScalarEncoder(0.88)
 def testVerbosity(self):
   """
   Test that nothing is printed out when verbosity=0
   """
   _stdout = sys.stdout
   sys.stdout = _stringio = StringIO()
   encoder = RandomDistributedScalarEncoder(name="mv", resolution=1.0,
                                            verbosity=0)
   output = numpy.zeros(encoder.getWidth(), dtype=defaultDtype)
   encoder.encodeIntoArray(23.0, output)
   encoder.getBucketIndices(23.0)
   sys.stdout = _stdout
   self.assertEqual(len(_stringio.getvalue()), 0,
                    "zero verbosity doesn't lead to zero output")
  def testOverlapStatistics(self):
    """
    Check that the overlaps for the encodings are within the expected range.
    Here we ask the encoder to create a bunch of representations under somewhat
    stressful conditions, and then verify they are correct. We rely on the fact
    that the _overlapOK and _countOverlapIndices methods are working correctly.
    """
    seed = getSeed()

    # Generate about 600 encodings. Set n relatively low to increase
    # chance of false overlaps
    encoder = RandomDistributedScalarEncoder(resolution=1.0, w=11, n=150,
                                             seed=seed)
    encoder.encode(0.0)
    encoder.encode(-300.0)
    encoder.encode(300.0)
    self.assertTrue(validateEncoder(encoder, subsampling=3),
                    "Illegal overlap encountered in encoder")
    def testOffset(self):
        """
    Test that offset is working properly
    """
        encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0)
        encoder.encode(23.0)
        self.assertEqual(encoder._offset, 23.0, "Offset not specified and not initialized to first input")

        encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0, offset=25.0)
        encoder.encode(23.0)
        self.assertEqual(encoder._offset, 25.0, "Offset not initialized to specified constructor" " parameter")
 def testVerbosity(self):
     """
 Test that nothing is printed out when verbosity=0
 """
     _stdout = sys.stdout
     sys.stdout = _stringio = StringIO()
     encoder = RandomDistributedScalarEncoder(name="mv", resolution=1.0, verbosity=0)
     output = numpy.zeros(encoder.getWidth(), dtype=defaultDtype)
     encoder.encodeIntoArray(23.0, output)
     encoder.getBucketIndices(23.0)
     sys.stdout = _stdout
     self.assertEqual(len(_stringio.getvalue()), 0, "zero verbosity doesn't lead to zero output")
    def testOverlapStatistics(self):
        """
    Check that the overlaps for the encodings are within the expected range.
    Here we ask the encoder to create a bunch of representations under somewhat
    stressful conditions, and then verify they are correct. We rely on the fact
    that the _overlapOK and _countOverlapIndices methods are working correctly.
    """
        seed = getSeed()

        # Generate about 600 encodings. Set n relatively low to increase
        # chance of false overlaps
        encoder = RandomDistributedScalarEncoder(resolution=1.0, w=11, n=150, seed=seed)
        encoder.encode(0.0)
        encoder.encode(-300.0)
        encoder.encode(300.0)
        self.assertTrue(validateEncoder(encoder, subsampling=3), "Illegal overlap encountered in encoder")
Exemple #36
0
  def initialize(self):

    # Scalar Encoder
    resolution = self.getEncoderResolution()
    self.encoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32)

    # Spatial Pooler
    spInputWidth = self.encoder.getWidth()
    self.spParams = {
      "globalInhibition": True,
      "columnDimensions": [self.numColumns],
      "inputDimensions": [spInputWidth],
      "potentialRadius": spInputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 5.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    }
    self.sp = SpatialPooler(**self.spParams)
    self.spOutput = np.zeros(self.numColumns, dtype=np.uint32)

    # Temporal Memory
    self.tmParams = {
      "activationThreshold": 20,
      "cellsPerColumn": self.cellsPerColumn,
      "columnDimensions": (self.numColumns,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    }
    self.tm = TemporalMemory(**self.tmParams)

    # Sanity
    if self.runSanity:
      self.sanity = sanity.SPTMInstance(self.sp, self.tm)
  def testEncoding(self):
    """
    Test basic encoding functionality. Create encodings without crashing and
    check they contain the correct number of on and off bits. Check some
    encodings for expected overlap. Test that encodings for old values don't
    change once we generate new buckets.
    """
    # Initialize with non-default parameters and encode with a number close to
    # the offset
    enc = RandomDistributedScalarEncoder(name='enc', resolution=1.0, w=23,
                                         n=500, offset = 0.0)
    e0 = enc.encode(-0.1)

    self.assertEqual(e0.sum(), 23, "Number of on bits is incorrect")
    self.assertEqual(e0.size, 500, "Width of the vector is incorrect")
    self.assertEqual(enc.getBucketIndices(0.0)[0], enc._maxBuckets / 2,
                     "Offset doesn't correspond to middle bucket")
    self.assertEqual(len(enc.bucketMap), 1, "Number of buckets is not 1")

    # Encode with a number that is resolution away from offset. Now we should
    # have two buckets and this encoding should be one bit away from e0
    e1 = enc.encode(1.0)
    self.assertEqual(len(enc.bucketMap), 2, "Number of buckets is not 2")
    self.assertEqual(e1.sum(), 23, "Number of on bits is incorrect")
    self.assertEqual(e1.size, 500, "Width of the vector is incorrect")
    self.assertEqual(computeOverlap(e0, e1), 22,
                     "Overlap is not equal to w-1")

    # Encode with a number that is resolution*w away from offset. Now we should
    # have many buckets and this encoding should have very little overlap with
    # e0
    e25 = enc.encode(25.0)
    self.assertGreater(len(enc.bucketMap), 23, "Number of buckets is not 2")
    self.assertEqual(e25.sum(), 23, "Number of on bits is incorrect")
    self.assertEqual(e25.size, 500, "Width of the vector is incorrect")
    self.assertLess(computeOverlap(e0, e25), 4,
                     "Overlap is too high")

    # Test encoding consistency. The encodings for previous numbers
    # shouldn't change even though we have added additional buckets
    self.assertEqual((e0 == enc.encode(-0.1)).sum(), 500,
      "Encodings are not consistent - they have changed after new buckets "
      "have been created")
    self.assertEqual((e1 == enc.encode(1.0)).sum(), 500,
      "Encodings are not consistent - they have changed after new buckets "
      "have been created")
Exemple #38
0
    def runRandom(self, repetitions=1):
        scalarEncoder = RandomDistributedScalarEncoder(0.88, n=2048, w=41)

        instances = self._createInstances(cellsPerColumn=32)
        times = [0.0] * len(self.contestants)

        duration = 1000 * repetitions
        t = 0

        encodedValue = numpy.zeros(2048, dtype=numpy.int32)

        for _ in xrange(duration):
            activeBits = random.sample(xrange(2048), 40)
            encodedValue = numpy.zeros(2048, dtype=numpy.int32)
            encodedValue[activeBits] = 1

            for i in xrange(len(self.contestants)):
                tmInstance = instances[i]
                computeFn = self.contestants[i][2]

                start = time.clock()
                computeFn(tmInstance, encodedValue, activeBits)
                times[i] += time.clock() - start

            printProgressBar(t, duration, 50)
            t += 1

        clearProgressBar(50)

        results = []
        for i in xrange(len(self.contestants)):
            name = self.contestants[i][3]
            results.append((
                name,
                times[i],
            ))

        return results
Exemple #39
0
def RDSE(**kwargs):
    """ 
    RANDOM DISTRIBUTED SCALAR ENCODER, see definition for more info
    Parameters --     
    
    @param resolution: inputs separated by more than the resolution will have
        different, but possible overlapping, representations 
    
    @param w: Number of ON bits which encode a single value, must be odd to
        to avoid centering problems
    
    @param n: Total number of bits in the output must be >= w
        
    @param name: Optional string which will become part of the description
    
    @param offset: Floating point offset used to map scalar inputs to bucket
        indices. If set to None, the very first input that is encoded will be 
        used to determine the offset.
    
    @param seed: Seed used by numpy rnadom number generator, if set to -1, the
        generator will be initialized without a fixed seed       
    """
    return RandomDistributedScalarEncoder(**kwargs)
  def testSeed(self):
    """
    Test that initializing twice with the same seed returns identical encodings
    and different when not specified
    """
    encoder1 = RandomDistributedScalarEncoder(name="encoder1", resolution=1.0,
                                              seed=42)
    encoder2 = RandomDistributedScalarEncoder(name="encoder2", resolution=1.0,
                                              seed=42)
    encoder3 = RandomDistributedScalarEncoder(name="encoder3", resolution=1.0,
                                              seed=-1)
    encoder4 = RandomDistributedScalarEncoder(name="encoder4", resolution=1.0,
                                              seed=-1)

    e1 = encoder1.encode(23.0)
    e2 = encoder2.encode(23.0)
    e3 = encoder3.encode(23.0)
    e4 = encoder4.encode(23.0)

    self.assertEqual((e1 == e2).sum(), encoder1.getWidth(),
        "Same seed gives rise to different encodings")

    self.assertNotEqual((e1 == e3).sum(), encoder1.getWidth(),
        "Different seeds gives rise to same encodings")

    self.assertNotEqual((e3 == e4).sum(), encoder1.getWidth(),
        "seeds of -1 give rise to same encodings")
Exemple #41
0
class BaseNetwork(object):
  def __init__(self, inputMin=None, inputMax=None, runSanity=False):

    self.inputMin = inputMin
    self.inputMax = inputMax
    self.runSanity = runSanity

    self.encoder = None
    self.encoderOutput = None
    self.sp = None
    self.spOutput = None
    self.spOutputNZ = None
    self.tm = None
    self.anomalyScore = None
    if runSanity:
      self.sanity = None

    self.defaultEncoderResolution = 0.0001
    self.numColumns = 2048
    self.cellsPerColumn = 32

    self.predictedActiveCells = None
    self.previouslyPredictiveCells = None


  def initialize(self):

    # Scalar Encoder
    resolution = self.getEncoderResolution()
    self.encoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encoderOutput = np.zeros(self.encoder.getWidth(), dtype=np.uint32)

    # Spatial Pooler
    spInputWidth = self.encoder.getWidth()
    self.spParams = {
      "globalInhibition": True,
      "columnDimensions": [self.numColumns],
      "inputDimensions": [spInputWidth],
      "potentialRadius": spInputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 5.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    }
    self.sp = SpatialPooler(**self.spParams)
    self.spOutput = np.zeros(self.numColumns, dtype=np.uint32)

    # Temporal Memory
    self.tmParams = {
      "activationThreshold": 20,
      "cellsPerColumn": self.cellsPerColumn,
      "columnDimensions": (self.numColumns,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    }
    self.tm = TemporalMemory(**self.tmParams)

    # Sanity
    if self.runSanity:
      self.sanity = sanity.SPTMInstance(self.sp, self.tm)


  def handleRecord(self, scalarValue, label=None, skipEncoding=False,
                   learningMode=True):
    """Process one record."""

    if self.runSanity:
      self.sanity.waitForUserContinue()

    # Encode the input data record if it hasn't already been encoded.
    if not skipEncoding:
      self.encodeValue(scalarValue)

    # Run the encoded data through the spatial pooler
    self.sp.compute(self.encoderOutput, learningMode, self.spOutput)
    self.spOutputNZ = self.spOutput.nonzero()[0]

    # WARNING: this needs to happen here, before the TM runs.
    self.previouslyPredictiveCells = self.tm.getPredictiveCells()

    # Run SP output through temporal memory
    self.tm.compute(self.spOutputNZ)
    self.predictedActiveCells = _computePredictedActiveCells(
      self.tm.getActiveCells(), self.previouslyPredictiveCells)

    # Anomaly score
    self.anomalyScore = _computeAnomalyScore(self.spOutputNZ,
                                             self.previouslyPredictiveCells,
                                             self.cellsPerColumn)

    # Run Sanity
    if self.runSanity:
      self.sanity.appendTimestep(self.getEncoderOutputNZ(),
                                 self.getSpOutputNZ(),
                                 self.previouslyPredictiveCells,
                                 {
                                   'value': scalarValue,
                                   'label':label
                                   })


  def encodeValue(self, scalarValue):
    self.encoder.encodeIntoArray(scalarValue, self.encoderOutput)


  def getEncoderResolution(self):
    """
    Compute the Random Distributed Scalar Encoder (RDSE) resolution. It's 
    calculated from the data min and max, specific to the data stream.
    """
    if self.inputMin is None or self.inputMax is None:
      return self.defaultEncoderResolution
    else:
      rangePadding = abs(self.inputMax - self.inputMin) * 0.2
      minVal = self.inputMin - rangePadding
      maxVal = (self.inputMax + rangePadding
                if self.inputMin != self.inputMax
                else self.inputMin + 1)
      numBuckets = 130.0
      return max(self.defaultEncoderResolution, (maxVal - minVal) / numBuckets)


  def getEncoderOutputNZ(self):
    return self.encoderOutput.nonzero()[0]


  def getSpOutputNZ(self):
    return self.spOutputNZ


  def getTmPredictiveCellsNZ(self):
    return self.tm.getPredictiveCells()


  def getTmActiveCellsNZ(self):
    return self.tm.getActiveCells()


  def getTmPredictedActiveCellsNZ(self):
    return self.predictedActiveCells


  def getRawAnomalyScore(self):
    return self.anomalyScore
import numpy as np
from nupic.encoders import ScalarEncoder
ScalarEncoder?

enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=False, forced=True)
print "3 =", enc.encode(3)
print "4 =", enc.encode(4)
print "5 =", enc.encode(5)
print "1000 =", enc.encode(1000)


from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder

RandomDistributedScalarEncoder?

rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5)

print "3 =   ", rdse.encode(3)
print "4 =   ", rdse.encode(4)
print "5 =   ", rdse.encode(5)
print
print "100 = ", rdse.encode(100)
print "100000 =", rdse.encode(1000)


import datetime
from nupic.encoders.date import DateEncoder

DateEncoder?

class NumentaTMLowLevelDetector(AnomalyDetector):
  """The 'numentaTM' detector, but not using the CLAModel or network API """
  def __init__(self, *args, **kwargs):
    super(NumentaTMLowLevelDetector, self).__init__(*args, **kwargs)

    self.valueEncoder = None
    self.encodedValue = None
    self.timestampEncoder = None
    self.encodedTimestamp = None
    self.sp = None
    self.spOutput = None
    self.tm = None
    self.anomalyLikelihood = None

    # Set this to False if you want to get results based on raw scores
    # without using AnomalyLikelihood. This will give worse results, but
    # useful for checking the efficacy of AnomalyLikelihood. You will need
    # to re-optimize the thresholds when running with this setting.
    self.useLikelihood = True


  def getAdditionalHeaders(self):
    """Returns a list of strings."""
    return ["raw_score"]


  def initialize(self):

    # Initialize the RDSE with a resolution; calculated from the data min and
    # max, the resolution is specific to the data stream.
    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
    minVal = self.inputMin - rangePadding
    maxVal = (self.inputMax + rangePadding
              if self.inputMin != self.inputMax
              else self.inputMin + 1)
    numBuckets = 130.0
    resolution = max(0.001, (maxVal - minVal) / numBuckets)
    self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                 dtype=np.uint32)

    # Initialize the timestamp encoder
    self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, ))
    self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                     dtype=np.uint32)

    inputWidth = (self.timestampEncoder.getWidth() +
                  self.valueEncoder.getWidth())

    self.sp = SpatialPooler(**{
      "globalInhibition": True,
      "columnDimensions": [2048],
      "inputDimensions": [inputWidth],
      "potentialRadius": inputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    })
    self.spOutput = np.zeros(2048, dtype=np.float32)

    self.tm = TemporalMemory(**{
      "activationThreshold": 20,
      "cellsPerColumn": 32,
      "columnDimensions": (2048,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    })

    if self.useLikelihood:
      learningPeriod = math.floor(self.probationaryPeriod / 2.0)
      self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
        claLearningPeriod=learningPeriod,
        estimationSamples=self.probationaryPeriod - learningPeriod,
        reestimationPeriod=100
      )


  def handleRecord(self, inputData):
    """Returns a tuple (anomalyScore, rawScore)."""

    # Encode the input data record
    self.valueEncoder.encodeIntoArray(
        inputData["value"], self.encodedValue)
    self.timestampEncoder.encodeIntoArray(
        inputData["timestamp"], self.encodedTimestamp)

    # Run the encoded data through the spatial pooler
    self.sp.compute(np.concatenate((self.encodedTimestamp,
                                    self.encodedValue,)),
                    True, self.spOutput)

    # At the current state, the set of the region's active columns and the set
    # of columns that have previously-predicted cells are used to calculate the
    # raw anomaly score.
    activeColumns = set(self.spOutput.nonzero()[0].tolist())
    prevPredictedColumns = set(self.tm.columnForCell(cell)
                               for cell in self.tm.getPredictiveCells())
    rawScore = (len(activeColumns - prevPredictedColumns) /
                float(len(activeColumns)))

    self.tm.compute(activeColumns)

    if self.useLikelihood:
      # Compute the log-likelihood score
      anomalyScore = self.anomalyLikelihood.anomalyProbability(
        inputData["value"], rawScore, inputData["timestamp"])
      logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)
      return (logScore, rawScore)

    return (rawScore, rawScore)
    def initialize(self):
        # Keep track of value range for spatial anomaly detection.
        self.minVal = None
        self.maxVal = None

        # Time of day encoder
        self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49),
                                            name='time_enc')
        # RDSE encoder for the time series value.
        minResolution = 0.001
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = self.inputMax + rangePadding
        numBuckets = 130
        resolution = max(minResolution, (maxVal - minVal) / numBuckets)
        self.value_enc = RandomDistributedScalarEncoder(resolution=resolution,
                                                        name='value_rdse')

        # Spatial Pooler.
        encodingWidth = self.timeOfDayEncoder.getWidth(
        ) + self.value_enc.getWidth()
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth, ),
            columnDimensions=(2048, ),
            potentialPct=0.8,
            potentialRadius=encodingWidth,
            globalInhibition=1,
            numActiveColumnsPerInhArea=40,
            synPermInactiveDec=0.0005,
            synPermActiveInc=0.003,
            synPermConnected=0.2,
            boostStrength=0.0,
            seed=1956,
            wrapAround=True,
        )

        self.tm = TemporalMemory(
            columnDimensions=(2048, ),
            cellsPerColumn=32,
            activationThreshold=20,
            initialPermanence=.5,  # Increased to connectedPermanence.
            connectedPermanence=.5,
            minThreshold=13,
            maxNewSynapseCount=31,
            permanenceIncrement=0.04,
            permanenceDecrement=0.008,
            predictedSegmentDecrement=0.001,
            maxSegmentsPerCell=128,
            maxSynapsesPerSegment=
            128,  # Changed meaning. Also see connections.topology[2]
            seed=1993,
        )

        # Initialize the anomaly likelihood object
        numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            learningPeriod=numentaLearningPeriod,
            estimationSamples=self.probationaryPeriod - numentaLearningPeriod,
            reestimationPeriod=100,
        )

        self.age = 0
class DendriteDetector(AnomalyDetector):
    def initialize(self):
        # Keep track of value range for spatial anomaly detection.
        self.minVal = None
        self.maxVal = None

        # Time of day encoder
        self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49),
                                            name='time_enc')
        # RDSE encoder for the time series value.
        minResolution = 0.001
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = self.inputMax + rangePadding
        numBuckets = 130
        resolution = max(minResolution, (maxVal - minVal) / numBuckets)
        self.value_enc = RandomDistributedScalarEncoder(resolution=resolution,
                                                        name='value_rdse')

        # Spatial Pooler.
        encodingWidth = self.timeOfDayEncoder.getWidth(
        ) + self.value_enc.getWidth()
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth, ),
            columnDimensions=(2048, ),
            potentialPct=0.8,
            potentialRadius=encodingWidth,
            globalInhibition=1,
            numActiveColumnsPerInhArea=40,
            synPermInactiveDec=0.0005,
            synPermActiveInc=0.003,
            synPermConnected=0.2,
            boostStrength=0.0,
            seed=1956,
            wrapAround=True,
        )

        self.tm = TemporalMemory(
            columnDimensions=(2048, ),
            cellsPerColumn=32,
            activationThreshold=20,
            initialPermanence=.5,  # Increased to connectedPermanence.
            connectedPermanence=.5,
            minThreshold=13,
            maxNewSynapseCount=31,
            permanenceIncrement=0.04,
            permanenceDecrement=0.008,
            predictedSegmentDecrement=0.001,
            maxSegmentsPerCell=128,
            maxSynapsesPerSegment=
            128,  # Changed meaning. Also see connections.topology[2]
            seed=1993,
        )

        # Initialize the anomaly likelihood object
        numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            learningPeriod=numentaLearningPeriod,
            estimationSamples=self.probationaryPeriod - numentaLearningPeriod,
            reestimationPeriod=100,
        )

        self.age = 0

    def getAdditionalHeaders(self):
        """Returns a list of strings."""
        return ["raw_score"]

    def handleRecord(self, inputData):
        """
    Argument inputData is {"value": instantaneous_value, "timestamp": pandas.Timestamp}
    Returns a tuple (anomalyScore, rawScore).

    Internally to NuPIC "anomalyScore" corresponds to "likelihood_score"
    and "rawScore" corresponds to "anomaly_score". Sorry about that.
    """

        # Check for spatial anomalies and update min/max values.
        value = inputData["value"]
        spatialAnomaly = False
        if self.minVal != self.maxVal:
            tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE
            maxExpected = self.maxVal + tolerance
            minExpected = self.minVal - tolerance
            if value > maxExpected or value < minExpected:
                spatialAnomaly = True
        if self.maxVal is None or value > self.maxVal:
            self.maxVal = value
        if self.minVal is None or value < self.minVal:
            self.minVal = value

        # Run the HTM stack.  First Encoders.
        timestamp = inputData["timestamp"]
        timeOfDayBits = np.zeros(self.timeOfDayEncoder.getWidth())
        self.timeOfDayEncoder.encodeIntoArray(timestamp, timeOfDayBits)
        valueBits = np.zeros(self.value_enc.getWidth())
        self.value_enc.encodeIntoArray(value, valueBits)
        encoding = np.concatenate([timeOfDayBits, valueBits])
        # Spatial Pooler.
        activeColumns = np.zeros(self.sp.getNumColumns())
        self.sp.compute(encoding, True, activeColumns)
        activeColumnIndices = np.nonzero(activeColumns)[0]
        # Temporal Memory and Anomaly.
        predictions = self.tm.getPredictiveCells()
        predictedColumns = list(self.tm.mapCellsToColumns(predictions).keys())
        self.tm.compute(activeColumnIndices, learn=True)
        activeCells = self.tm.getActiveCells()
        rawScore = anomaly.computeRawAnomalyScore(activeColumnIndices,
                                                  predictedColumns)

        # Compute log(anomaly likelihood)
        anomalyScore = self.anomalyLikelihood.anomalyProbability(
            inputData["value"], rawScore, inputData["timestamp"])
        finalScore = logScore = self.anomalyLikelihood.computeLogLikelihood(
            anomalyScore)

        if spatialAnomaly:
            finalScore = 1.0

        if False:
            # Plot correlation of excitement versus compartmentalization.
            if self.age == 0:
                print("Correlation Plots ENABLED.")
            if False:
                start_age = 1000
                end_age = 1800
            else:
                start_age = 4000
                end_age = 7260
            if self.age == start_age:
                import correlation
                import random
                self.cor_samplers = []
                sampled_cells = []
                while len(self.cor_samplers) < 20:
                    n = random.choice(xrange(self.tm.numberOfCells()))
                    if n in sampled_cells:
                        continue
                    else:
                        sampled_cells.append(n)
                    neuron = self.tm.connections.dataForCell(n)
                    if neuron._roots:
                        c = correlation.CorrelationSampler(neuron._roots[0])
                        c.random_sample_points(100)
                        self.cor_samplers.append(c)
                print("Created %d Correlation Samplers" %
                      len(self.cor_samplers))
            if self.age >= start_age:
                for smplr in self.cor_samplers:
                    smplr.sample()
            if self.age == end_age:
                import matplotlib.pyplot as plt
                for idx, smplr in enumerate(self.cor_samplers):
                    if smplr.num_samples == 0:
                        print("No samples, plot not shown.")
                        continue
                    plt.figure("Sample %d" % idx)
                    smplr.plot(period=64)  # Different value!
                plt.show()

        if False:
            # Plot excitement of a typical detection on a dendrite.
            if self.age == 7265:
                #if self.age == 1800:
                import matplotlib.pyplot as plt
                import random
                from connections import SYN_CONNECTED_ACTIVE
                sampled_cells = set()
                for figure_num in xrange(40):
                    plt.figure("(%d)" % figure_num)
                    # Find an active cell to view.
                    cell = None
                    for attempt in range(100):
                        event = random.choice(self.tm.activeEvents)
                        cell = event.cell  # This is an integer.
                        if cell is not None and cell not in sampled_cells:
                            break
                    else:
                        break
                    sampled_cells.add(cell)
                    cell = self.tm.connections.dataForCell(cell)
                    # Organize the data.
                    EPSPs = []
                    excitement = []
                    distance_to_root = 0
                    segment_offsets = {}
                    branch = cell._roots[0]
                    while True:
                        segment_offsets[branch] = distance_to_root
                        distance_to_root += len(branch._synapses)
                        excitement.extend(branch.excitement)
                        for syn in branch._synapses:
                            if syn is None:
                                EPSPs.append(0)
                            else:
                                EPSPs.append(syn.state == SYN_CONNECTED_ACTIVE)
                        if branch.children:
                            branch = random.choice(branch.children)
                        else:
                            break
                    plt.plot(
                        np.arange(distance_to_root),
                        EPSPs,
                        'r',
                        np.arange(distance_to_root),
                        excitement,
                        'b',
                    )
                    plt.title(
                        "Dendrite Activation\n Horizontal line is activation threshold, Vertical lines are segment bifurcations"
                    )
                    plt.xlabel("Distance along Dendrite", )
                    plt.ylabel("EPSPs are Red, Excitement is Blue")
                    # Show lines where the excitement crosses thresholds.
                    plt.axhline(20, color='k')  # Hard coded parameter value.
                    for offset in segment_offsets.values():
                        if offset != 0:
                            plt.axvline(offset, color='k')
                print("\nShowing %d excitement plots." % len(sampled_cells))
                plt.show()

        self.age += 1

        return (finalScore, rawScore)
def runHotgym():

  timeOfDayEncoder = DateEncoder(timeOfDay=(21,1))
  weekendEncoder = DateEncoder(weekend=21)
  scalarEncoder = RandomDistributedScalarEncoder(0.88)

  encodingWidth = timeOfDayEncoder.getWidth() \
    + weekendEncoder.getWidth() \
    + scalarEncoder.getWidth()

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(2048),
    # What percent of the columns's receptive field is available for potential
    # synapses?
    potentialPct=0.85,
    # This means that the input space has no topology.
    globalInhibition=True,
    localAreaDensity=-1.0,
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=40.0,
    # How quickly synapses grow and degrade.
    synPermInactiveDec=0.005,
    synPermActiveInc=0.04,
    synPermConnected=0.1,
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=3.0,
    # Random number generator seed.
    seed=1956,
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(2048, ),
    # How many cells in each mini-column.
    cellsPerColumn=32,
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=16,
    initialPermanence=0.21,
    connectedPermanence=0.5,
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=12,
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=20,
    permanenceIncrement=0.1,
    permanenceDecrement=0.1,
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=128,
    maxSynapsesPerSegment=32,
    seed=1960
  )

  classifier = SDRClassifierFactory.create()

  with open (_INPUT_FILE_PATH) as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):
      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(2048)

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      probability, value = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(value, probability * 100))
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector):
  """The 'numenta' detector, with the following changes:

  - Use pure Temporal Memory, not the classic TP that uses backtracking.
  - Don't spatial pool the timestamp. Pass it in as distal input.
  - 1 cell per column.
  - Use w=41 in the scalar encoding, rather than w=21, to make up for the
    lost timestamp input to the spatial pooler.
  """
  def __init__(self, *args, **kwargs):
    super(DistalTimestamps1CellPerColumnDetector, self).__init__(*args,
                                                                 **kwargs)

    self.valueEncoder = None
    self.encodedValue = None
    self.timestampEncoder = None
    self.encodedTimestamp = None
    self.activeExternalCells = []
    self.prevActiveExternalCells = []
    self.sp = None
    self.spOutput = None
    self.etm = None
    self.anomalyLikelihood = None


  def getAdditionalHeaders(self):
    """Returns a list of strings."""
    return ["raw_score"]


  def initialize(self):
    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
    minVal = self.inputMin - rangePadding
    maxVal = (self.inputMax + rangePadding
              if self.inputMin != self.inputMax
              else self.inputMin + 1)
    numBuckets = 130.0
    resolution = max(0.001, (maxVal - minVal) / numBuckets)
    self.valueEncoder = RandomDistributedScalarEncoder(resolution,
                                                       w=41,
                                                       seed=42)
    self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                 dtype=np.uint32)

    self.timestampEncoder = DateEncoder(timeOfDay=(21,9.49,))
    self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                     dtype=np.uint32)

    inputWidth = self.valueEncoder.getWidth()

    self.sp = SpatialPooler(**{
      "globalInhibition": True,
      "columnDimensions": [2048],
      "inputDimensions": [inputWidth],
      "potentialRadius": inputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    })
    self.spOutput = np.zeros(2048, dtype=np.float32)

    self.etm = ExtendedTemporalMemory(**{
      "activationThreshold": 13,
      "cellsPerColumn": 1,
      "columnDimensions": (2048,),
      "basalInputDimensions": (self.timestampEncoder.getWidth(),),
      "initialPermanence": 0.21,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 32,
      "minThreshold": 10,
      "maxNewSynapseCount": 20,
      "permanenceDecrement": 0.1,
      "permanenceIncrement": 0.1,
      "seed": 1960,
      "checkInputs": False,
    })

    learningPeriod = math.floor(self.probationaryPeriod / 2.0)
    self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
      claLearningPeriod=learningPeriod,
      estimationSamples=self.probationaryPeriod - learningPeriod,
      reestimationPeriod=100
    )


  def handleRecord(self, inputData):
    """Returns a tuple (anomalyScore, rawScore)."""

    self.valueEncoder.encodeIntoArray(inputData["value"],
                                      self.encodedValue)

    self.timestampEncoder.encodeIntoArray(inputData["timestamp"],
                                          self.encodedTimestamp)
    self.prevActiveExternalCells = self.activeExternalCells
    self.activeExternalCells = self.encodedTimestamp.nonzero()[0]

    self.sp.compute(self.encodedValue, True, self.spOutput)

    activeColumns = self.spOutput.nonzero()[0]
    activeColumnsSet = set(activeColumns.tolist())
    prevPredictedColumns = set(self.etm.columnForCell(cell)
                               for cell in self.etm.getPredictiveCells())

    rawScore = (len(activeColumnsSet - prevPredictedColumns) /
                float(len(activeColumns)))
    anomalyScore = self.anomalyLikelihood.anomalyProbability(
      inputData["value"], rawScore, inputData["timestamp"])
    logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)

    self.etm.compute(activeColumns,
                     activeCellsExternalBasal=self.activeExternalCells,
                     reinforceCandidatesExternalBasal=self.prevActiveExternalCells,
                     growthCandidatesExternalBasal=self.prevActiveExternalCells)

    return (logScore, rawScore)
  def testMapBucketIndexToNonZeroBits(self):
    """
    Test that mapBucketIndexToNonZeroBits works and that max buckets and
    clipping are handled properly.
    """
    encoder = RandomDistributedScalarEncoder(resolution=1.0, w=11, n=150)
    # Set a low number of max buckets
    encoder._initializeBucketMap(10, None)
    encoder.encode(0.0)
    encoder.encode(-7.0)
    encoder.encode(7.0)

    self.assertEqual(len(encoder.bucketMap), encoder._maxBuckets,
      "_maxBuckets exceeded")
    self.assertTrue(
      numpy.array_equal(encoder.mapBucketIndexToNonZeroBits(-1),
                        encoder.bucketMap[0]),
                        "mapBucketIndexToNonZeroBits did not handle negative"
                        " index")
    self.assertTrue(
      numpy.array_equal(encoder.mapBucketIndexToNonZeroBits(1000),
                        encoder.bucketMap[9]),
      "mapBucketIndexToNonZeroBits did not handle negative index")

    e23 = encoder.encode(23.0)
    e6  = encoder.encode(6)
    self.assertEqual((e23 == e6).sum(), encoder.getWidth(),
      "Values not clipped correctly during encoding")

    ep8 = encoder.encode(-8)
    ep7  = encoder.encode(-7)
    self.assertEqual((ep8 == ep7).sum(), encoder.getWidth(),
      "Values not clipped correctly during encoding")

    self.assertEqual(encoder.getBucketIndices(-8)[0], 0,
                     "getBucketIndices returned negative bucket index")
    self.assertEqual(encoder.getBucketIndices(23)[0], encoder._maxBuckets-1,
                     "getBucketIndices returned bucket index that is too"
                     " large")
 def testEncodeInvalidInputType(self):
   encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0,
                                            verbosity=0)
   with self.assertRaises(TypeError):
     encoder.encode("String")
class TemporalMemoryPerformanceTest(unittest.TestCase):

  def setUp(self):
    self.tmPy = TemporalMemoryPy(columnDimensions=[2048],
                                 cellsPerColumn=32,
                                 initialPermanence=0.5,
                                 connectedPermanence=0.8,
                                 minThreshold=10,
                                 maxNewSynapseCount=12,
                                 permanenceIncrement=0.1,
                                 permanenceDecrement=0.05,
                                 activationThreshold=15)

    self.tmCPP = TemporalMemoryCPP(columnDimensions=[2048],
                                   cellsPerColumn=32,
                                   initialPermanence=0.5,
                                   connectedPermanence=0.8,
                                   minThreshold=10,
                                   maxNewSynapseCount=12,
                                   permanenceIncrement=0.1,
                                   permanenceDecrement=0.05,
                                   activationThreshold=15)

    self.tp = TP(numberOfCols=2048,
                 cellsPerColumn=32,
                 initialPerm=0.5,
                 connectedPerm=0.8,
                 minThreshold=10,
                 newSynapseCount=12,
                 permanenceInc=0.1,
                 permanenceDec=0.05,
                 activationThreshold=15,
                 globalDecay=0, burnIn=1,
                 checkSynapseConsistency=False,
                 pamLength=1)

    self.tp10x2 = TP10X2(numberOfCols=2048,
                         cellsPerColumn=32,
                         initialPerm=0.5,
                         connectedPerm=0.8,
                         minThreshold=10,
                         newSynapseCount=12,
                         permanenceInc=0.1,
                         permanenceDec=0.05,
                         activationThreshold=15,
                         globalDecay=0, burnIn=1,
                         checkSynapseConsistency=False,
                         pamLength=1)

    self.scalarEncoder = RandomDistributedScalarEncoder(0.88)


  def testSingleSequence(self):
    print "Test: Single sequence"

    sequence = self._generateSequence()
    times = self._feedAll(sequence)

    self.assertTrue(times[1] < times[0])
    self.assertTrue(times[3] < times[2])


  # ==============================
  # Helper functions
  # ==============================

  def _generateSequence(self):
    sequence = []    
    with open (_INPUT_FILE_PATH) as fin:
      reader = csv.reader(fin)
      reader.next()
      reader.next()
      reader.next()
      for _ in xrange(NUM_PATTERNS):
        record = reader.next()
        value = float(record[1])
        encodedValue = self.scalarEncoder.encode(value)
        activeBits = set(encodedValue.nonzero()[0])
        sequence.append(activeBits)
    return sequence


  def _feedAll(self, sequence, learn=True, num=1):
    repeatedSequence = sequence * num

    def tmComputeFn(pattern, instance):
      instance.compute(pattern, learn)

    def tpComputeFn(pattern, instance):
      array = self._patternToNumpyArray(pattern)
      instance.compute(array, enableLearn=learn, computeInfOutput=True)

    modelParams = [
      (self.tmPy, tmComputeFn),
      (self.tmCPP, tmComputeFn),
      (self.tp, tpComputeFn),
      (self.tp10x2, tpComputeFn)
    ]
    times = [0] * len(modelParams)

    for patNum, pattern in enumerate(repeatedSequence):
      for ix, params in enumerate(modelParams):
        times[ix] += self._feedOne(pattern, *params)
      self._printProgressBar(patNum, len(repeatedSequence), 50)

    print
    print "TM (py):\t{0}s".format(times[0])
    print "TM (C++):\t{0}s".format(times[1])
    print "TP:\t\t{0}s".format(times[2])
    print "TP10X2:\t\t{0}s".format(times[3])

    return times


  @staticmethod
  def _feedOne(pattern, instance, computeFn):
    start = time.clock()

    if pattern == None:
      instance.reset()
    else:
      computeFn(pattern, instance)

    elapsed = time.clock() - start

    return elapsed


  @staticmethod
  def _patternToNumpyArray(pattern):
    array = numpy.zeros(2048, dtype='int32')
    array[list(pattern)] = 1

    return array


  @staticmethod
  def _printProgressBar(completed, total, nDots):
    def numberOfDots(n):
      return (n * nDots) // total
    completedDots = numberOfDots(completed)
    if completedDots != numberOfDots(completed - 1):
      print "\r|" + ("." * completedDots) + (" " * (nDots - completedDots)) + "|",
      sys.stdout.flush()
  def testOverlapOK(self):
    """
    Test that the internal method _overlapOK works as expected.
    """
    # Create a fake set of encodings.
    encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0,
                                             w=5, n=5*20)
    midIdx = encoder._maxBuckets/2
    encoder.bucketMap[midIdx-3] = numpy.array(range(4, 9)) # Not ok with
                                                           # midIdx-1
    encoder.bucketMap[midIdx-2] = numpy.array(range(3, 8))
    encoder.bucketMap[midIdx-1] = numpy.array(range(4, 9))
    encoder.bucketMap[midIdx]   = numpy.array(range(5, 10))
    encoder.bucketMap[midIdx+1] = numpy.array(range(6, 11))
    encoder.bucketMap[midIdx+2] = numpy.array(range(7, 12))
    encoder.bucketMap[midIdx+3] = numpy.array(range(8, 13))
    encoder.minIndex = midIdx - 3
    encoder.maxIndex = midIdx + 3

    self.assertTrue(encoder._overlapOK(midIdx, midIdx-1),
                    "_overlapOK didn't work")
    self.assertTrue(encoder._overlapOK(midIdx-2, midIdx+3),
                    "_overlapOK didn't work")
    self.assertFalse(encoder._overlapOK(midIdx-3, midIdx-1),
                    "_overlapOK didn't work")

    # We'll just use our own numbers
    self.assertTrue(encoder._overlapOK(100, 50, 0),
                    "_overlapOK didn't work for far values")
    self.assertTrue(encoder._overlapOK(100, 50, encoder._maxOverlap),
                    "_overlapOK didn't work for far values")
    self.assertFalse(encoder._overlapOK(100, 50, encoder._maxOverlap+1),
                     "_overlapOK didn't work for far values")
    self.assertTrue(encoder._overlapOK(50, 50, 5),
                    "_overlapOK didn't work for near values")
    self.assertTrue(encoder._overlapOK(48, 50, 3),
                    "_overlapOK didn't work for near values")
    self.assertTrue(encoder._overlapOK(46, 50, 1),
                    "_overlapOK didn't work for near values")
    self.assertTrue(encoder._overlapOK(45, 50, encoder._maxOverlap),
                    "_overlapOK didn't work for near values")
    self.assertFalse(encoder._overlapOK(48, 50, 4),
                     "_overlapOK didn't work for near values")
    self.assertFalse(encoder._overlapOK(48, 50, 2),
                     "_overlapOK didn't work for near values")
    self.assertFalse(encoder._overlapOK(46, 50, 2),
                     "_overlapOK didn't work for near values")
    self.assertFalse(encoder._overlapOK(50, 50, 6),
                     "_overlapOK didn't work for near values")
  def testCountOverlapIndices(self):
    """
    Test that the internal method _countOverlapIndices works as expected.
    """
    # Create a fake set of encodings.
    encoder = RandomDistributedScalarEncoder(name="encoder", resolution=1.0,
                                             w=5, n=5*20)
    midIdx = encoder._maxBuckets/2
    encoder.bucketMap[midIdx-2] = numpy.array(range(3, 8))
    encoder.bucketMap[midIdx-1] = numpy.array(range(4, 9))
    encoder.bucketMap[midIdx]   = numpy.array(range(5, 10))
    encoder.bucketMap[midIdx+1] = numpy.array(range(6, 11))
    encoder.bucketMap[midIdx+2] = numpy.array(range(7, 12))
    encoder.bucketMap[midIdx+3] = numpy.array(range(8, 13))
    encoder.minIndex = midIdx - 2
    encoder.maxIndex = midIdx + 3

    # Indices must exist
    with self.assertRaises(ValueError):
      encoder._countOverlapIndices(midIdx-3, midIdx-2)
    with self.assertRaises(ValueError):
      encoder._countOverlapIndices(midIdx-2, midIdx-3)

    # Test some overlaps
    self.assertEqual(encoder._countOverlapIndices(midIdx-2, midIdx-2), 5,
                     "_countOverlapIndices didn't work")
    self.assertEqual(encoder._countOverlapIndices(midIdx-1, midIdx-2), 4,
                     "_countOverlapIndices didn't work")
    self.assertEqual(encoder._countOverlapIndices(midIdx+1, midIdx-2), 2,
                     "_countOverlapIndices didn't work")
    self.assertEqual(encoder._countOverlapIndices(midIdx-2, midIdx+3), 0,
                     "_countOverlapIndices didn't work")
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth())

  sp = SpatialPooler(
    inputDimensions=(encodingWidth,),
    columnDimensions=(spParams["columnCount"],),
    potentialPct=spParams["potentialPct"],
    potentialRadius=encodingWidth,
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    boostStrength=spParams["boostStrength"],
    seed=spParams["seed"],
    wrapAround=True
  )

  tm = TemporalMemory(
    columnDimensions=(tmParams["columnCount"],),
    cellsPerColumn=tmParams["cellsPerColumn"],
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    connectedPermanence=spParams["synPermConnected"],
    minThreshold=tmParams["minThreshold"],
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders to create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
      results.append([oneStep, oneStepConfidence * 100, None, None])

    return results
def runHotgym(numRecords):
    with open(_PARAMS_PATH, "r") as f:
        modelParams = yaml.safe_load(f)["modelParams"]
        enParams = modelParams["sensorParams"]["encoders"]
        spParams = modelParams["spParams"]
        tmParams = modelParams["tmParams"]

    scalarEncoder = RandomDistributedScalarEncoder(
        enParams["consumption"]["resolution"])
    scalarEncoder2 = RandomDistributedScalarEncoder(
        enParams["consumption2"]["resolution"])

    encodingWidth = (scalarEncoder.getWidth() + scalarEncoder2.getWidth())

    sp = SpatialPooler(
        inputDimensions=(encodingWidth, ),
        columnDimensions=(spParams["columnCount"], ),
        potentialPct=spParams["potentialPct"],
        potentialRadius=encodingWidth,
        globalInhibition=spParams["globalInhibition"],
        localAreaDensity=spParams["localAreaDensity"],
        numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
        synPermInactiveDec=spParams["synPermInactiveDec"],
        synPermActiveInc=spParams["synPermActiveInc"],
        synPermConnected=spParams["synPermConnected"],
        boostStrength=spParams["boostStrength"],
        seed=spParams["seed"],
        wrapAround=True)

    tm = TemporalMemory(
        columnDimensions=(tmParams["columnCount"], ),
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=spParams["synPermConnected"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
        seed=tmParams["seed"])

    classifier = SDRClassifierFactory.create()
    results = []
    with open(_INPUT_FILE_PATH, "r") as fin:
        reader = csv.reader(fin)
        headers = reader.next()
        reader.next()
        reader.next()

        output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME)

        for count, record in enumerate(reader):

            if count >= numRecords: break

            # Convert data string into Python date object.
            #      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
            # Convert data value string into float.
            prediction = float(record[1])
            prediction2 = float(record[2])

            # To encode, we need to provide zero-filled numpy arrays for the encoders
            # to populate.
            consumptionBits = numpy.zeros(scalarEncoder.getWidth())
            consumptionBits2 = numpy.zeros(scalarEncoder2.getWidth())

            # Now we call the encoders to create bit representations for each value.
            scalarEncoder.encodeIntoArray(prediction, consumptionBits)
            scalarEncoder2.encodeIntoArray(prediction2, consumptionBits2)

            # Concatenate all these encodings into one large encoding for Spatial
            # Pooling.
            encoding = numpy.concatenate([consumptionBits, consumptionBits2])

            # Create an array to represent active columns, all initially zero. This
            # will be populated by the compute method below. It must have the same
            # dimensions as the Spatial Pooler.
            activeColumns = numpy.zeros(spParams["columnCount"])

            # Execute Spatial Pooling algorithm over input space.
            sp.compute(encoding, True, activeColumns)
            activeColumnIndices = numpy.nonzero(activeColumns)[0]

            # Execute Temporal Memory algorithm over active mini-columns.
            tm.compute(activeColumnIndices, learn=True)

            activeCells = tm.getActiveCells()

            # Get the bucket info for this input value for classification.
            bucketIdx = scalarEncoder.getBucketIndices(prediction)[0]

            # Run classifier to translate active cells back to scalar value.
            classifierResult = classifier.compute(recordNum=count,
                                                  patternNZ=activeCells,
                                                  classification={
                                                      "bucketIdx": bucketIdx,
                                                      "actValue": prediction
                                                  },
                                                  learn=True,
                                                  infer=True)

            # Print the best prediction for 1 step out.
            oneStepConfidence, oneStep = sorted(zip(
                classifierResult[1], classifierResult["actualValues"]),
                                                reverse=True)[0]
            # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
            #      results.append([oneStep, oneStepConfidence * 100, None, None])
            results.append(
                [record[0], prediction, oneStep, oneStepConfidence * 100])
            output.write(record[0], prediction, oneStep,
                         oneStepConfidence * 100)

        output.close()
        return results
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth())

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(spParams["columnCount"]),
    # What percent of the columns"s receptive field is available for potential
    # synapses?
    potentialPct=spParams["potentialPct"],
    # This means that the input space has no topology.
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    # How quickly synapses grow and degrade.
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=spParams["boostStrength"],
    # Random number generator seed.
    seed=spParams["seed"],
    # TODO: is this useful?
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(tmParams["columnCount"],),
    # How many cells in each mini-column.
    cellsPerColumn=tmParams["cellsPerColumn"],
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    # TODO: This comes from the SP params, is this normal
    connectedPermanence=spParams["synPermConnected"],
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=tmParams["minThreshold"],
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
      results.append([oneStep, oneStepConfidence * 100, None, None])

    return results