Example #1
0
    def initialize(self):
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = (self.inputMax + rangePadding
                  if self.inputMin != self.inputMax else self.inputMin + 1)
        numBuckets = 130.0
        resolution = max(0.001, (maxVal - minVal) / numBuckets)
        self.valueEncoder = RandomDistributedScalarEncoder(resolution,
                                                           w=41,
                                                           seed=42)
        self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                     dtype=np.uint32)

        self.timestampEncoder = DateEncoder(timeOfDay=(
            21,
            9.49,
        ))
        self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                         dtype=np.uint32)

        inputWidth = self.valueEncoder.getWidth()

        self.sp = SpatialPooler(
            **{
                "globalInhibition": True,
                "columnDimensions": [2048],
                "inputDimensions": [inputWidth],
                "potentialRadius": inputWidth,
                "numActiveColumnsPerInhArea": 40,
                "seed": 1956,
                "potentialPct": 0.8,
                "boostStrength": 0.0,
                "synPermActiveInc": 0.003,
                "synPermConnected": 0.2,
                "synPermInactiveDec": 0.0005,
            })
        self.spOutput = np.zeros(2048, dtype=np.float32)

        self.etm = ExtendedTemporalMemory(
            **{
                "activationThreshold": 13,
                "cellsPerColumn": 1,
                "columnDimensions": (2048, ),
                "basalInputDimensions": (self.timestampEncoder.getWidth(), ),
                "initialPermanence": 0.21,
                "maxSegmentsPerCell": 128,
                "maxSynapsesPerSegment": 32,
                "minThreshold": 10,
                "maxNewSynapseCount": 20,
                "permanenceDecrement": 0.1,
                "permanenceIncrement": 0.1,
                "seed": 1960,
                "checkInputs": False,
            })

        learningPeriod = math.floor(self.probationaryPeriod / 2.0)
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            claLearningPeriod=learningPeriod,
            estimationSamples=self.probationaryPeriod - learningPeriod,
            reestimationPeriod=100)
Example #2
0
    def setUp(self):
        # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of
        # day
        # use of forced is not recommended, used here for readability, see scalar.py
        self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5)
        # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov,
        # 2010, 14:55
        self._d = datetime.datetime(2010, 11, 4, 14, 55)
        self._bits = self._e.encode(self._d)
        # season is aaabbbcccddd (1 bit/month) # TODO should be <<3?
        # should be 000000000111 (centered on month 11 - Nov)
        seasonExpected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]

        # week is MTWTFSS
        # contrary to localtime documentation, Monday = 0 (for python
        #  datetime.datetime.timetuple()
        dayOfWeekExpected = [0, 0, 0, 1, 0, 0, 0]

        # not a weekend, so it should be "False"
        weekendExpected = [1, 0]

        # time of day has radius of 4 hours and w of 5 so each bit = 240/5
        # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6
        # should be 30 bits total (30 * 48 minutes = 24 hours)
        timeOfDayExpected = ([
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
            0, 0, 0, 0, 0, 0, 0, 0
        ])
        self._expected = numpy.array(seasonExpected + dayOfWeekExpected +
                                     weekendExpected + timeOfDayExpected,
                                     dtype=defaultDtype)
Example #3
0
def smart_encode(data_fl):
    encoder_list = []
    for i in data_fl.columns:
        if data_fl[i].dtype == 'M8[ns]':
            time_delta = data_fl[i][1] - data_fl[i][0]
            if time_delta >= pd.Timedelta(1, unit='M'):
                encoder_list += [[DateEncoder(season=(5, 1))]]
            elif time_delta >= pd.Timedelta(1, unit='D'):
                encoder_list += [[
                    DateEncoder(season=(21)),
                    DateEncoder(dayOfWeek=(21, 1)),
                    DateEncoder(weekend=5)
                ]]
            else:
                encoder_list += [[
                    DateEncoder(season=(5, 1)),
                    DateEncoder(dayOfWeek=(5, 1)),
                    DateEncoder(weekend=5),
                    DateEncoder(timeOfDay=(5, 1))
                ]]
        if data_fl[i].dtype == "float":
            col_range = data_fl[i].max() - data_fl[i].min()
            res = col_range / (400 - 21)
            encoder_list += [[RandomDistributedScalarEncoder(res)]]
    return encoder_list
Example #4
0
  def setUp(self):
    # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of
    # day
    # use of forced is not recommended, used here for readability, see scalar.py
    self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5)
    # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov,
    # 2010, 14:55
    self._d = datetime.datetime(2010, 11, 4, 14, 55)
    self._bits = self._e.encode(self._d)
    # season is aaabbbcccddd (1 bit/month) # TODO should be <<3?
    # should be 000000000111 (centered on month 11 - Nov)
    seasonExpected = [0,0,0,0,0,0,0,0,0,1,1,1]

    # week is MTWTFSS
    # contrary to localtime documentation, Monday = 0 (for python
    #  datetime.datetime.timetuple()
    dayOfWeekExpected = [0,0,0,1,0,0,0]

    # not a weekend, so it should be "False"
    weekendExpected = [1, 0]

    # time of day has radius of 4 hours and w of 5 so each bit = 240/5
    # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6
    # should be 30 bits total (30 * 48 minutes = 24 hours)
    timeOfDayExpected = (
      [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0])
    self._expected = numpy.array(seasonExpected +
                                 dayOfWeekExpected +
                                 weekendExpected +
                                 timeOfDayExpected, dtype=defaultDtype)
  def initialize(self):

    # Initialize the RDSE with a resolution; calculated from the data min and
    # max, the resolution is specific to the data stream.
    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
    minVal = self.inputMin - rangePadding
    maxVal = (self.inputMax + rangePadding
              if self.inputMin != self.inputMax
              else self.inputMin + 1)
    numBuckets = 130.0
    resolution = max(0.001, (maxVal - minVal) / numBuckets)
    self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                 dtype=np.uint32)

    # Initialize the timestamp encoder
    self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, ))
    self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                     dtype=np.uint32)

    inputWidth = (self.timestampEncoder.getWidth() +
                  self.valueEncoder.getWidth())

    self.sp = SpatialPooler(**{
      "globalInhibition": True,
      "columnDimensions": [2048],
      "inputDimensions": [inputWidth],
      "potentialRadius": inputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    })
    self.spOutput = np.zeros(2048, dtype=np.float32)

    self.tm = TemporalMemory(**{
      "activationThreshold": 20,
      "cellsPerColumn": 32,
      "columnDimensions": (2048,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    })

    if self.useLikelihood:
      learningPeriod = math.floor(self.probationaryPeriod / 2.0)
      self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
        claLearningPeriod=learningPeriod,
        estimationSamples=self.probationaryPeriod - learningPeriod,
        reestimationPeriod=100
      )
Example #6
0
  def testHoliday(self):
    '''look at holiday more carefully because of the smooth transition'''
    e = DateEncoder(holiday=5)
    holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype='uint8')
    notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype='uint8')
    holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype='uint8')

    d = datetime.datetime(2010, 12, 25, 4, 55)
    self.assertTrue((e.encode(d) == holiday).all())

    d = datetime.datetime(2008, 12, 27, 4, 55)
    self.assertTrue((e.encode(d) == notholiday).all())

    d = datetime.datetime(1999, 12, 26, 8, 00)
    self.assertTrue((e.encode(d) == holiday2).all())

    d = datetime.datetime(2011, 12, 24, 16, 00)
    self.assertTrue((e.encode(d) == holiday2).all())
Example #7
0
  def testHoliday(self):
    '''look at holiday more carefully because of the smooth transition'''
    # use of forced is not recommended, used here for readibility, see scalar.py
    e = DateEncoder(holiday=5, forced=True)
    holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype='uint8')
    notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype='uint8')
    holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype='uint8')

    d = datetime.datetime(2010, 12, 25, 4, 55)
    self.assertTrue((e.encode(d) == holiday).all())

    d = datetime.datetime(2008, 12, 27, 4, 55)
    self.assertTrue((e.encode(d) == notholiday).all())

    d = datetime.datetime(1999, 12, 26, 8, 00)
    self.assertTrue((e.encode(d) == holiday2).all())

    d = datetime.datetime(2011, 12, 24, 16, 00)
    self.assertTrue((e.encode(d) == holiday2).all())
Example #8
0
  def testHoliday(self):
    """look at holiday more carefully because of the smooth transition"""
    # use of forced is not recommended, used here for readability, see
    # scalar.py
    e = DateEncoder(holiday=5, forced=True)
    holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype="uint8")
    notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype="uint8")
    holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype="uint8")

    d = datetime.datetime(2010, 12, 25, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))

    d = datetime.datetime(2008, 12, 27, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), notholiday))

    d = datetime.datetime(1999, 12, 26, 8, 00)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday2))

    d = datetime.datetime(2011, 12, 24, 16, 00)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday2))
Example #9
0
  def testWeekend(self):
    """Test weekend encoder"""
    # use of forced is not recommended, used here for readability, see scalar.py
    e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]), forced=True)
    mon = DateEncoder(customDays=(21, "Monday"), forced=True)

    e2 = DateEncoder(weekend=(21, 1), forced=True)
    d = datetime.datetime(1988, 5, 29, 20, 00)
    self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))
    for _ in range(300):
      d = d+datetime.timedelta(days=1)
      self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))

      #Make sure
      if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0:
        self.assertEqual(d.weekday(), 0)
      else:
        self.assertNotEqual(d.weekday(), 0)
Example #10
0
 def __init__(self):
     self.lat = ScalarEncoder(name='latitude',  w=3, n=100, minval=-90, maxval=90,
                     periodic=False)
     self.long= ScalarEncoder(name='longitude',  w=3, n=100, minval=-180, maxval=180,
                     periodic=True)
     self.timeenc= DateEncoder(season=0, dayOfWeek=1, weekend=3, timeOfDay=5)
     self.likes = ScalarEncoder(name='likes',  w=3, n=50, minval=0, maxval=100000,
                     periodic=False)
     self.people = ScalarEncoder(name='numpeople',  w=3, n=20, minval=0, maxval=100,
                     periodic=False)
     self.categories = SDRCategoryEncoder(n=87, w=3, categoryList = None,
                          name="cats", verbosity=0)
     self.run()
Example #11
0
    def testHoliday(self):
        '''look at holiday more carefully because of the smooth transition'''
        e = DateEncoder(holiday=5)
        holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype='uint8')
        notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype='uint8')
        holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype='uint8')

        d = datetime.datetime(2010, 12, 25, 4, 55)
        self.assertTrue((e.encode(d) == holiday).all())

        d = datetime.datetime(2008, 12, 27, 4, 55)
        self.assertTrue((e.encode(d) == notholiday).all())

        d = datetime.datetime(1999, 12, 26, 8, 00)
        self.assertTrue((e.encode(d) == holiday2).all())

        d = datetime.datetime(2011, 12, 24, 16, 00)
        self.assertTrue((e.encode(d) == holiday2).all())
Example #12
0
  def testReadWrite(self):
    originalTS = datetime.datetime(1997, 8, 29, 2, 14)
    originalValue = self._e.encode(originalTS)

    proto1 = DateEncoderProto.new_message()
    self._e.write(proto1)

    # Write the proto to a temp file and read it back into a new proto
    with tempfile.TemporaryFile() as f:
      proto1.write(f)
      f.seek(0)
      proto2 = DateEncoderProto.read(f)

    encoder = DateEncoder.read(proto2)

    self.assertIsInstance(encoder, DateEncoder)
    self.assertEqual(encoder.width, self._e.width)
    self.assertEqual(encoder.weekendOffset, self._e.weekendOffset)
    self.assertEqual(encoder.timeOfDayOffset, self._e.timeOfDayOffset)
    self.assertEqual(encoder.seasonOffset, self._e.seasonOffset)
    self.assertEqual(encoder.dayOfWeekOffset, self._e.dayOfWeekOffset)
    self.assertIsInstance(encoder.customDaysEncoder,
                          self._e.customDaysEncoder.__class__)
    self.assertIsInstance(encoder.dayOfWeekEncoder,
                          self._e.dayOfWeekEncoder.__class__)
    self.assertIsInstance(encoder.seasonEncoder,
                          self._e.seasonEncoder.__class__)
    self.assertIsInstance(encoder.timeOfDayEncoder,
                          self._e.timeOfDayEncoder.__class__)
    self.assertIsInstance(encoder.weekendEncoder,
                          self._e.weekendEncoder.__class__)
    self.assertTrue(numpy.array_equal(self._bits, encoder.encode(self._d)))
    self.assertTrue(numpy.array_equal(encoder.encode(originalTS),
                                      originalValue))
    self.assertEqual(self._e.decode(encoder.encode(self._d)),
                     encoder.decode(self._e.encode(self._d)))
Example #13
0
    def testReadWrite(self):
        originalTS = datetime.datetime(1997, 8, 29, 2, 14)
        originalValue = self._e.encode(originalTS)

        proto1 = DateEncoderProto.new_message()
        self._e.write(proto1)

        # Write the proto to a temp file and read it back into a new proto
        with tempfile.TemporaryFile() as f:
            proto1.write(f)
            f.seek(0)
            proto2 = DateEncoderProto.read(f)

        encoder = DateEncoder.read(proto2)

        self.assertIsInstance(encoder, DateEncoder)
        self.assertEqual(encoder.width, self._e.width)
        self.assertEqual(encoder.weekendOffset, self._e.weekendOffset)
        self.assertEqual(encoder.timeOfDayOffset, self._e.timeOfDayOffset)
        self.assertEqual(encoder.seasonOffset, self._e.seasonOffset)
        self.assertEqual(encoder.dayOfWeekOffset, self._e.dayOfWeekOffset)
        self.assertIsInstance(encoder.customDaysEncoder,
                              self._e.customDaysEncoder.__class__)
        self.assertIsInstance(encoder.dayOfWeekEncoder,
                              self._e.dayOfWeekEncoder.__class__)
        self.assertIsInstance(encoder.seasonEncoder,
                              self._e.seasonEncoder.__class__)
        self.assertIsInstance(encoder.timeOfDayEncoder,
                              self._e.timeOfDayEncoder.__class__)
        self.assertIsInstance(encoder.weekendEncoder,
                              self._e.weekendEncoder.__class__)
        self.assertTrue(numpy.array_equal(self._bits, encoder.encode(self._d)))
        self.assertTrue(
            numpy.array_equal(encoder.encode(originalTS), originalValue))
        self.assertEqual(self._e.decode(encoder.encode(self._d)),
                         encoder.decode(self._e.encode(self._d)))
Example #14
0
    def testHoliday(self):
        '''look at holiday more carefully because of the smooth transition'''
        # use of forced is not recommended, used here for readibility, see scalar.py
        e = DateEncoder(holiday=5, forced=True)
        holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype='uint8')
        notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype='uint8')
        holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype='uint8')

        d = datetime.datetime(2010, 12, 25, 4, 55)
        self.assertTrue((e.encode(d) == holiday).all())

        d = datetime.datetime(2008, 12, 27, 4, 55)
        self.assertTrue((e.encode(d) == notholiday).all())

        d = datetime.datetime(1999, 12, 26, 8, 00)
        self.assertTrue((e.encode(d) == holiday2).all())

        d = datetime.datetime(2011, 12, 24, 16, 00)
        self.assertTrue((e.encode(d) == holiday2).all())
  def testHolidayMultiple(self):
    """look at holiday more carefully because of the smooth transition"""
    # use of forced is not recommended, used here for readability, see
    # scalar.py
    e = DateEncoder(holiday=5, forced=True, holidays=[(12, 25), (2018, 4, 1), (2017, 4, 16)])
    holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8")
    notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8")

    d = datetime.datetime(2011, 12, 25, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))

    d = datetime.datetime(2007, 12, 2, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), notholiday))

    d = datetime.datetime(2018, 4, 1, 16, 10)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))

    d = datetime.datetime(2017, 4, 16, 16, 10)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))
Example #16
0
    def testHoliday(self):
        """look at holiday more carefully because of the smooth transition"""
        # use of forced is not recommended, used here for readability, see
        # scalar.py
        e = DateEncoder(holiday=5, forced=True)
        holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8")
        notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8")
        holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype="uint8")

        d = datetime.datetime(2010, 12, 25, 4, 55)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday))

        d = datetime.datetime(2008, 12, 27, 4, 55)
        self.assertTrue(numpy.array_equal(e.encode(d), notholiday))

        d = datetime.datetime(1999, 12, 26, 8, 00)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday2))

        d = datetime.datetime(2011, 12, 24, 16, 00)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday2))
Example #17
0
def HTM_AD(
        Data='Test',
        vars={'value': ['num']},
        prec_param=5,
        pooler_out=2024,  # Number of columns of the pooler output
        cell_col=5,  # HTM cells per column
        W=72,  # Window parameter
        W_prim=5,  # Local window for anomaly detection likelihood
        eps=1e-6,  # to Avoid by zero divisions
        athreshold=0.95):
    """
    This function performs HTM based anomaly detection on a time series provided
    :param Data:
    :param vars: Possible values: num, tod, weekend
    :param prec_param: A parameter that defines how much precision the number encoder has
        The encoder precision depends on the variability of the data,
        The real precision is computed taking into account both the precision parameter and data std
        A high precision might mean a high error at predicting the variable value in noisy variables
    :param pooler_out: Number of columns of the pooler output
    :param cell_col: HTM cells per column
    :param W: Window parameter
    :param W_prim: Local window for anomaly detection likelihood
    :param eps: to Avoid by zero divisions
    :param athreshold: To classify based on anomaly likelihood whether there is an anomaly or not
    :return: The Data + 3 columns
        Anomaly: indicates the error of within the value predicted by the HTM network
        Anomaly_likelihood: indicates the likelihood of the data into being anomalous
        Anomaly_flag: classifies the data in anomalous vs non anomalous
    """

    if Data == 'Test':  # If there is not data available, simply loads the temperature benchmark dataset
        # Import data
        Data = pd.read_csv('anomaly_API/Data/sample.csv',
                           parse_dates=True,
                           index_col='timestamp')
        Data = Data.resample('H').bfill().interpolate()

    TODE = DateEncoder(timeOfDay=(21, 1))
    WENDE = DateEncoder(weekend=21)

    var_encoders = set()
    # Spatial Pooler Parameters
    for x in vars:
        for y in vars[x]:
            if y == 'num':
                exec(
                    "RDSE_" + x +
                    " = RandomDistributedScalarEncoder(resolution=Data['" + x +
                    "'].std()/prec_param)", locals(), globals())
                var_encoders.add(Encoder(x, ["RDSE_" + x]))
            elif y == 'weekend':
                var_encoders.add(Encoder(x, ["WENDE"]))
            elif y == 'tod':
                var_encoders.add(Encoder(x, ["TODE"]))
            else:
                return {"error": "Variable encoder type is not recognized "}

    encoder_width = 0  # Computes encoder width
    for x in var_encoders:
        for y in x.encoders:
            exec("s = " + y + ".getWidth()", locals(), globals())
            encoder_width += s

    SP = SpatialPooler(
        inputDimensions=encoder_width,
        columnDimensions=pooler_out,
        potentialPct=0.8,
        globalInhibition=True,
        numActiveColumnsPerInhArea=pooler_out //
        50,  # Gets 2% of the total area
        boostStrength=1.0,
        wrapAround=False)
    TM = TemporalMemory(columnDimensions=(pooler_out, ),
                        cellsPerColumn=cell_col)

    Data['Anomaly'] = 0.0
    Data['Anomaly_Likelihood'] = 0.0

    # Train Spatial Pooler
    print("Spatial pooler learning")

    start = time.time()

    active_columns = np.zeros(pooler_out)

    for x in range(len(Data)):
        encoder = multiencode(var_encoders, Data, x)
        SP.compute(encoder, True, active_columns)

    end = time.time()
    print(end - start)

    # Temporal pooler
    print("Temporal pooler learning")

    start = time.time()

    A_score = np.zeros(len(Data))
    for x in range(len(Data)):
        encoder = multiencode(var_encoders, Data, x)
        SP.compute(encoder, False, active_columns)
        col_index = active_columns.nonzero()[0]
        TM.compute(col_index, learn=True)
        if x > 0:
            inter = set(col_index).intersection(Prev_pred_col)
            inter_l = len(inter)
            active_l = len(col_index)
            A_score[x] = 1 - (inter_l / active_l)
            Data.iat[x, -2] = A_score[x]
        Prev_pred_col = list(
            set(x // cell_col for x in TM.getPredictiveCells()))

    end = time.time()
    print(end - start)

    AL_score = np.zeros(len(Data))
    # Computes the likelihood of the anomaly
    for x in range(len(Data)):
        if x > 0:
            W_vec = A_score[max(0, x - W):x]
            W_prim_vec = A_score[max(0, x - W_prim):x]
            AL_score[x] = 1 - 2 * norm.sf(
                abs(np.mean(W_vec) - np.mean(W_prim_vec)) /
                max(np.std(W_vec), eps))
            Data.iat[x, -1] = AL_score[x]

    Data['Anomaly_flag'] = athreshold < Data['Anomaly_Likelihood']

    return Data
from nupic.encoders.date import DateEncoder
from nupic.encoders.random_distributed_scalar import \
    RandomDistributedScalarEncoder

timeOfDayEncoder = DateEncoder(timeOfDay=(21, 1))
weekendEncoder = DateEncoder(weekend=21)
scalarEncoder = RandomDistributedScalarEncoder(0.88)
Example #19
0
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector):
    """The 'numenta' detector, with the following changes:

  - Use pure Temporal Memory, not the classic TP that uses backtracking.
  - Don't spatial pool the timestamp. Pass it in as distal input.
  - 1 cell per column.
  - Use w=41 in the scalar encoding, rather than w=21, to make up for the
    lost timestamp input to the spatial pooler.
  """
    def __init__(self, *args, **kwargs):
        super(DistalTimestamps1CellPerColumnDetector,
              self).__init__(*args, **kwargs)

        self.valueEncoder = None
        self.encodedValue = None
        self.timestampEncoder = None
        self.encodedTimestamp = None
        self.activeExternalCells = []
        self.prevActiveExternalCells = []
        self.sp = None
        self.spOutput = None
        self.etm = None
        self.anomalyLikelihood = None

    def getAdditionalHeaders(self):
        """Returns a list of strings."""
        return ["raw_score"]

    def initialize(self):
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = (self.inputMax + rangePadding
                  if self.inputMin != self.inputMax else self.inputMin + 1)
        numBuckets = 130.0
        resolution = max(0.001, (maxVal - minVal) / numBuckets)
        self.valueEncoder = RandomDistributedScalarEncoder(resolution,
                                                           w=41,
                                                           seed=42)
        self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                     dtype=np.uint32)

        self.timestampEncoder = DateEncoder(timeOfDay=(
            21,
            9.49,
        ))
        self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                         dtype=np.uint32)

        inputWidth = self.valueEncoder.getWidth()

        self.sp = SpatialPooler(
            **{
                "globalInhibition": True,
                "columnDimensions": [2048],
                "inputDimensions": [inputWidth],
                "potentialRadius": inputWidth,
                "numActiveColumnsPerInhArea": 40,
                "seed": 1956,
                "potentialPct": 0.8,
                "boostStrength": 0.0,
                "synPermActiveInc": 0.003,
                "synPermConnected": 0.2,
                "synPermInactiveDec": 0.0005,
            })
        self.spOutput = np.zeros(2048, dtype=np.float32)

        self.etm = ExtendedTemporalMemory(
            **{
                "activationThreshold": 13,
                "cellsPerColumn": 1,
                "columnDimensions": (2048, ),
                "basalInputDimensions": (self.timestampEncoder.getWidth(), ),
                "initialPermanence": 0.21,
                "maxSegmentsPerCell": 128,
                "maxSynapsesPerSegment": 32,
                "minThreshold": 10,
                "maxNewSynapseCount": 20,
                "permanenceDecrement": 0.1,
                "permanenceIncrement": 0.1,
                "seed": 1960,
                "checkInputs": False,
            })

        learningPeriod = math.floor(self.probationaryPeriod / 2.0)
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            claLearningPeriod=learningPeriod,
            estimationSamples=self.probationaryPeriod - learningPeriod,
            reestimationPeriod=100)

    def handleRecord(self, inputData):
        """Returns a tuple (anomalyScore, rawScore)."""

        self.valueEncoder.encodeIntoArray(inputData["value"],
                                          self.encodedValue)

        self.timestampEncoder.encodeIntoArray(inputData["timestamp"],
                                              self.encodedTimestamp)
        self.prevActiveExternalCells = self.activeExternalCells
        self.activeExternalCells = self.encodedTimestamp.nonzero()[0]

        self.sp.compute(self.encodedValue, True, self.spOutput)

        activeColumns = self.spOutput.nonzero()[0]
        activeColumnsSet = set(activeColumns.tolist())
        prevPredictedColumns = set(
            self.etm.columnForCell(cell)
            for cell in self.etm.getPredictiveCells())

        rawScore = (len(activeColumnsSet - prevPredictedColumns) /
                    float(len(activeColumns)))
        anomalyScore = self.anomalyLikelihood.anomalyProbability(
            inputData["value"], rawScore, inputData["timestamp"])
        logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)

        self.etm.compute(
            activeColumns,
            activeCellsExternalBasal=self.activeExternalCells,
            reinforceCandidatesExternalBasal=self.prevActiveExternalCells,
            growthCandidatesExternalBasal=self.prevActiveExternalCells)

        return (logScore, rawScore)
Example #20
0
def runHotgym():

  timeOfDayEncoder = DateEncoder(timeOfDay=(21,1))
  weekendEncoder = DateEncoder(weekend=21)
  scalarEncoder = RandomDistributedScalarEncoder(0.88)

  encodingWidth = timeOfDayEncoder.getWidth() \
    + weekendEncoder.getWidth() \
    + scalarEncoder.getWidth()

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(2048),
    # What percent of the columns's receptive field is available for potential
    # synapses?
    potentialPct=0.85,
    # This means that the input space has no topology.
    globalInhibition=True,
    localAreaDensity=-1.0,
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=40.0,
    # How quickly synapses grow and degrade.
    synPermInactiveDec=0.005,
    synPermActiveInc=0.04,
    synPermConnected=0.1,
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=3.0,
    # Random number generator seed.
    seed=1956,
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(2048, ),
    # How many cells in each mini-column.
    cellsPerColumn=32,
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=16,
    initialPermanence=0.21,
    connectedPermanence=0.5,
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=12,
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=20,
    permanenceIncrement=0.1,
    permanenceDecrement=0.1,
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=128,
    maxSynapsesPerSegment=32,
    seed=1960
  )

  classifier = SDRClassifierFactory.create()

  with open (_INPUT_FILE_PATH) as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):
      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(2048)

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      probability, value = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(value, probability * 100))
class DistalTimestamps1CellPerColumnDetector(AnomalyDetector):
  """The 'numenta' detector, with the following changes:

  - Use pure Temporal Memory, not the classic TP that uses backtracking.
  - Don't spatial pool the timestamp. Pass it in as distal input.
  - 1 cell per column.
  - Use w=41 in the scalar encoding, rather than w=21, to make up for the
    lost timestamp input to the spatial pooler.
  """
  def __init__(self, *args, **kwargs):
    super(DistalTimestamps1CellPerColumnDetector, self).__init__(*args,
                                                                 **kwargs)

    self.valueEncoder = None
    self.encodedValue = None
    self.timestampEncoder = None
    self.encodedTimestamp = None
    self.activeExternalCells = []
    self.prevActiveExternalCells = []
    self.sp = None
    self.spOutput = None
    self.etm = None
    self.anomalyLikelihood = None


  def getAdditionalHeaders(self):
    """Returns a list of strings."""
    return ["raw_score"]


  def initialize(self):
    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
    minVal = self.inputMin - rangePadding
    maxVal = (self.inputMax + rangePadding
              if self.inputMin != self.inputMax
              else self.inputMin + 1)
    numBuckets = 130.0
    resolution = max(0.001, (maxVal - minVal) / numBuckets)
    self.valueEncoder = RandomDistributedScalarEncoder(resolution,
                                                       w=41,
                                                       seed=42)
    self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                 dtype=np.uint32)

    self.timestampEncoder = DateEncoder(timeOfDay=(21,9.49,))
    self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                     dtype=np.uint32)

    inputWidth = self.valueEncoder.getWidth()

    self.sp = SpatialPooler(**{
      "globalInhibition": True,
      "columnDimensions": [2048],
      "inputDimensions": [inputWidth],
      "potentialRadius": inputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    })
    self.spOutput = np.zeros(2048, dtype=np.float32)

    self.etm = ExtendedTemporalMemory(**{
      "activationThreshold": 13,
      "cellsPerColumn": 1,
      "columnDimensions": (2048,),
      "basalInputDimensions": (self.timestampEncoder.getWidth(),),
      "initialPermanence": 0.21,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 32,
      "minThreshold": 10,
      "maxNewSynapseCount": 20,
      "permanenceDecrement": 0.1,
      "permanenceIncrement": 0.1,
      "seed": 1960,
      "checkInputs": False,
    })

    learningPeriod = math.floor(self.probationaryPeriod / 2.0)
    self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
      claLearningPeriod=learningPeriod,
      estimationSamples=self.probationaryPeriod - learningPeriod,
      reestimationPeriod=100
    )


  def handleRecord(self, inputData):
    """Returns a tuple (anomalyScore, rawScore)."""

    self.valueEncoder.encodeIntoArray(inputData["value"],
                                      self.encodedValue)

    self.timestampEncoder.encodeIntoArray(inputData["timestamp"],
                                          self.encodedTimestamp)
    self.prevActiveExternalCells = self.activeExternalCells
    self.activeExternalCells = self.encodedTimestamp.nonzero()[0]

    self.sp.compute(self.encodedValue, True, self.spOutput)

    activeColumns = self.spOutput.nonzero()[0]
    activeColumnsSet = set(activeColumns.tolist())
    prevPredictedColumns = set(self.etm.columnForCell(cell)
                               for cell in self.etm.getPredictiveCells())

    rawScore = (len(activeColumnsSet - prevPredictedColumns) /
                float(len(activeColumns)))
    anomalyScore = self.anomalyLikelihood.anomalyProbability(
      inputData["value"], rawScore, inputData["timestamp"])
    logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)

    self.etm.compute(activeColumns,
                     activeCellsExternalBasal=self.activeExternalCells,
                     reinforceCandidatesExternalBasal=self.prevActiveExternalCells,
                     growthCandidatesExternalBasal=self.prevActiveExternalCells)

    return (logScore, rawScore)
Example #22
0
def go():
    valueEncoder = RandomDistributedScalarEncoder(resolution=0.88, seed=42)
    timestampEncoder = DateEncoder(timeOfDay=(
        21,
        9.49,
    ))

    inputWidth = timestampEncoder.getWidth() + valueEncoder.getWidth()

    sp = SpatialPooler(
        **{
            "globalInhibition": True,
            "columnDimensions": [2048],
            "inputDimensions": [inputWidth],
            "potentialRadius": inputWidth,
            "numActiveColumnsPerInhArea": 40,
            "seed": 1956,
            "potentialPct": 0.8,
            "boostStrength": 0.0,
            "synPermActiveInc": 0.003,
            "synPermConnected": 0.2,
            "synPermInactiveDec": 0.0005,
        })

    tm = TemporalMemory(
        **{
            "activationThreshold": 20,
            "cellsPerColumn": 32,
            "columnDimensions": (2048, ),
            "initialPermanence": 0.24,
            "maxSegmentsPerCell": 128,
            "maxSynapsesPerSegment": 128,
            "minThreshold": 13,
            "maxNewSynapseCount": 31,
            "permanenceDecrement": 0.008,
            "permanenceIncrement": 0.04,
            "seed": 1961,
        })

    inputPath = os.path.join(os.path.dirname(__file__),
                             "data/rec-center-hourly.csv")
    inputFile = open(inputPath, "rb")
    csvReader = csv.reader(inputFile)
    csvReader.next()
    csvReader.next()
    csvReader.next()

    encodedValue = np.zeros(valueEncoder.getWidth(), dtype=np.uint32)
    encodedTimestamp = np.zeros(timestampEncoder.getWidth(), dtype=np.uint32)
    spOutput = np.zeros(2048, dtype=np.float32)

    sanityInstance = sanity.SPTMInstance(sp, tm)

    for timestampStr, consumptionStr in csvReader:

        sanityInstance.waitForUserContinue()

        timestamp = datetime.datetime.strptime(timestampStr, "%m/%d/%y %H:%M")
        consumption = float(consumptionStr)

        timestampEncoder.encodeIntoArray(timestamp, encodedTimestamp)
        valueEncoder.encodeIntoArray(consumption, encodedValue)

        sensoryInput = np.concatenate((
            encodedTimestamp,
            encodedValue,
        ))
        sp.compute(sensoryInput, True, spOutput)

        activeColumns = np.flatnonzero(spOutput)
        predictedCells = tm.getPredictiveCells()
        tm.compute(activeColumns)

        activeInputBits = np.flatnonzero(sensoryInput)
        displayText = {
            "timestamp": timestampStr,
            "consumption": consumptionStr
        }

        sanityInstance.appendTimestep(activeInputBits, activeColumns,
                                      predictedCells, displayText)
Example #23
0
from nupic.encoders import ScalarEncoder
from nupic.encoders.date import DateEncoder
from nupic.encoders.category import CategoryEncoder
#from nupic.research.spatial_pooler import SpatialPooler
from nupic.algorithms.spatial_pooler import SpatialPooler
from nupic.algorithms.anomaly import Anomaly
from nupic.algorithms.anomaly_likelihood import AnomalyLikelihood
import datetime
from scipy.stats import norm
import numpy as np
import math

de = DateEncoder(season=5)
file1 = open("time_file.txt", "r")
cpt = 0
for line in file1:
    if cpt == 0:
        line = line.replace("/", "-")
        line = line.replace("19-", "2019-")
        #print line,
        lines = '2019-06-03 00:00:16'
        now = datetime.datetime.strptime(lines, "%Y-%m-%d %H:%M:%S")
        print "now =       ", de.encode(now)
    cpt += 1

categories = ('info', 'error', 'warning')
encoder = CategoryEncoder(w=3, categoryList=categories, forced=True)
info = encoder.encode("info")
error = encoder.encode("error")
warning = encoder.encode("warning")
#print "info =       ", info
Example #24
0
var_chosen = 'value'
Data = ma_preprocess(Data[var_chosen], 4).rename(columns={0: var_chosen})

Data['Anomaly'] = 0.0
Data['Anomaly_Likelihood'] = 0.0

prec_param = 5
pooler_out = 2024
cell_col = 5

# Value Encoder Resoltion
Res = Data.std()[0] / prec_param
RDSE = RandomDistributedScalarEncoder(resolution=Res)
# We ecndoe now the datas
TODE = DateEncoder(timeOfDay=(21, 1))
WENDE = DateEncoder(weekend=21)

# Spatial Pooler Parameters

var_encoders = {Encoder('value', ['RDSE'])}
# Encoder('_index', ['TODE'])}

encoder_width = 0
for x in var_encoders:
    for y in x.encoders:
        exec("s = " + y + ".getWidth()")
        encoder_width += s

SP = SpatialPooler(
    inputDimensions=encoder_width,
Example #25
0
class DateEncoderTest(unittest.TestCase):
  '''Unit tests for DateEncoder class'''

  
  def setUp(self):
    ##TODO: comment and code don't match - weekend?!!
    # 3 bits for season, 1 bit for day of week, 2 for weekend, 5 for time of day
    self._e = DateEncoder(season=3, dayOfWeek=1, weekend=3, timeOfDay=5)
    # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov, 2010, 14:55
    self._d = datetime.datetime(2010, 11, 4, 14, 55)
    self._bits = self._e.encode(self._d)
    # season is aaabbbcccddd (1 bit/month) # TODO should be <<3?
    # should be 000000000111 (centered on month 11 - Nov)
    seasonExpected = [0,0,0,0,0,0,0,0,0,1,1,1]

    # week is MTWTFSS
    # contrary to localtime documentation, Monday = 0 (for python
    #  datetime.datetime.timetuple()
    dayOfWeekExpected = [0,0,0,1,0,0,0]

    # not a weekend, so it should be "False"
    weekendExpected = [1,1,1,0,0,0]

    # time of day has radius of 4 hours and w of 5 so each bit = 240/5 min = 48min
    # 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6
    # should be 30 bits total (30 * 48 minutes = 24 hours)
    timeOfDayExpected = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0]
    self._expected = numpy.array(seasonExpected + dayOfWeekExpected + weekendExpected \
                          + timeOfDayExpected, dtype=defaultDtype)
   
  def testDateEncoder(self):
    '''creating date encoder instance'''
    self.assertEqual(self._e.getDescription(), [("season", 0), ("day of week", 12),
                                ("weekend", 19), ("time of day", 25)])

    self.assertTrue((self._expected == self._bits).all())

    print
    self._e.pprintHeader()
    self._e.pprint(self._bits)
    print

  def testMissingValues(self):
    '''missing values'''
    mvOutput = self._e.encode(SENTINEL_VALUE_FOR_MISSING_DATA)
    self.assertEqual(sum(mvOutput), 0)

  def testDecoding(self):
    '''decoding date'''
    decoded = self._e.decode(self._bits)

    (fieldsDict, fieldNames) = decoded
    self.assertEqual(len(fieldsDict), 4)

    (ranges, desc) = fieldsDict['season']
    self.assertEqual(len(ranges), 1)
    self.assertSequenceEqual(ranges[0], [305, 305])
    
    (ranges, desc) = fieldsDict['time of day']
    self.assertEqual(len(ranges), 1)
    self.assertSequenceEqual(ranges[0], [14.4, 14.4])
    
    (ranges, desc) = fieldsDict['day of week']
    self.assertEqual(len(ranges), 1)
    self.assertSequenceEqual(ranges[0], [3, 3])
    
    (ranges, desc) = fieldsDict['weekend']
    self.assertEqual(len(ranges), 1)
    self.assertSequenceEqual(ranges[0], [0, 0])
    
    print decoded
    print "decodedToStr=>", self._e.decodedToStr(decoded)

  def testTopDownCompute(self):
    '''Check topDownCompute'''
    topDown = self._e.topDownCompute(self._bits)
    topDownValues = numpy.array([elem.value for elem in topDown])
    errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8])
    self.assertAlmostEqual(errs.max(), 0, 4)

  def testBucketIndexSupport(self):
    '''Check bucket index support'''
    bucketIndices = self._e.getBucketIndices(self._d)
    print "bucket indices:", bucketIndices
    topDown = self._e.getBucketInfo(bucketIndices)
    topDownValues = numpy.array([elem.value for elem in topDown])
    errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8])
    self.assertAlmostEqual(errs.max(), 0, 4)

    encodings = []
    for x in topDown:
      encodings.extend(x.encoding)
    self.assertTrue((encodings == self._expected).all())

  def testHoliday(self):
    '''look at holiday more carefully because of the smooth transition'''
    e = DateEncoder(holiday=5)
    holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype='uint8')
    notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype='uint8')
    holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype='uint8')

    d = datetime.datetime(2010, 12, 25, 4, 55)
    self.assertTrue((e.encode(d) == holiday).all())

    d = datetime.datetime(2008, 12, 27, 4, 55)
    self.assertTrue((e.encode(d) == notholiday).all())

    d = datetime.datetime(1999, 12, 26, 8, 00)
    self.assertTrue((e.encode(d) == holiday2).all())

    d = datetime.datetime(2011, 12, 24, 16, 00)
    self.assertTrue((e.encode(d) == holiday2).all())

  def testWeekend(self):
    '''Test weekend encoder'''
    e = DateEncoder(customDays = (21,["sat","sun","fri"]))
    mon = DateEncoder(customDays = (21,"Monday"))

    e2 = DateEncoder(weekend=(21,1))
    d = datetime.datetime(1988,5,29,20,00)
    self.assertTrue((e.encode(d) == e2.encode(d)).all())
    for _ in range(300):
      d = d+datetime.timedelta(days=1)
      self.assertTrue((e.encode(d) == e2.encode(d)).all())
      print mon.decode(mon.encode(d))
      #Make sure
      if mon.decode(mon.encode(d))[0]["Monday"][0][0][0]==1.0:
        self.assertEqual(d.weekday(), 0)
      else:
        self.assertFalse(d.weekday()==0)
Example #26
0
  def testWeekend(self):
    '''Test weekend encoder'''
    e = DateEncoder(customDays = (21,["sat","sun","fri"]))
    mon = DateEncoder(customDays = (21,"Monday"))

    e2 = DateEncoder(weekend=(21,1))
    d = datetime.datetime(1988,5,29,20,00)
    self.assertTrue((e.encode(d) == e2.encode(d)).all())
    for _ in range(300):
      d = d+datetime.timedelta(days=1)
      self.assertTrue((e.encode(d) == e2.encode(d)).all())
      print mon.decode(mon.encode(d))
      #Make sure
      if mon.decode(mon.encode(d))[0]["Monday"][0][0][0]==1.0:
        self.assertEqual(d.weekday(), 0)
      else:
        self.assertFalse(d.weekday()==0)
Example #27
0
class DateEncoderTest(unittest.TestCase):
  """Unit tests for DateEncoder class"""


  def setUp(self):
    # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of
    # day
    # use of forced is not recommended, used here for readability, see scalar.py
    self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5)
    # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov,
    # 2010, 14:55
    self._d = datetime.datetime(2010, 11, 4, 14, 55)
    self._bits = self._e.encode(self._d)
    # season is aaabbbcccddd (1 bit/month) # TODO should be <<3?
    # should be 000000000111 (centered on month 11 - Nov)
    seasonExpected = [0,0,0,0,0,0,0,0,0,1,1,1]

    # week is MTWTFSS
    # contrary to localtime documentation, Monday = 0 (for python
    #  datetime.datetime.timetuple()
    dayOfWeekExpected = [0,0,0,1,0,0,0]

    # not a weekend, so it should be "False"
    weekendExpected = [1, 0]

    # time of day has radius of 4 hours and w of 5 so each bit = 240/5
    # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6
    # should be 30 bits total (30 * 48 minutes = 24 hours)
    timeOfDayExpected = (
      [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0])
    self._expected = numpy.array(seasonExpected +
                                 dayOfWeekExpected +
                                 weekendExpected +
                                 timeOfDayExpected, dtype=defaultDtype)

  def testDateEncoder(self):
    """creating date encoder instance"""
    self.assertSequenceEqual(
      self._e.getDescription(),
      [("season", 0),
       ("day of week", 12),
       ("weekend", 19), ("time of day", 21)])
    self.assertTrue(numpy.array_equal(self._expected, self._bits))


  def testMissingValues(self):
    """missing values"""
    mvOutput = self._e.encode(SENTINEL_VALUE_FOR_MISSING_DATA)
    self.assertEqual(sum(mvOutput), 0)


  def testDecoding(self):
    """decoding date"""
    decoded = self._e.decode(self._bits)

    (fieldsDict, _) = decoded
    self.assertEqual(len(fieldsDict), 4)

    (ranges, _) = fieldsDict['season']
    self.assertEqual(len(ranges), 1)
    self.assertSequenceEqual(ranges[0], [305, 305])

    (ranges, _) = fieldsDict['time of day']
    self.assertEqual(len(ranges), 1)
    self.assertSequenceEqual(ranges[0], [14.4, 14.4])

    (ranges, _) = fieldsDict['day of week']
    self.assertEqual(len(ranges), 1)
    self.assertSequenceEqual(ranges[0], [3, 3])

    (ranges, _) = fieldsDict['weekend']
    self.assertEqual(len(ranges), 1)
    self.assertSequenceEqual(ranges[0], [0, 0])


  def testTopDownCompute(self):
    """Check topDownCompute"""
    topDown = self._e.topDownCompute(self._bits)
    topDownValues = numpy.array([elem.value for elem in topDown])
    errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8])
    self.assertAlmostEqual(errs.max(), 0, 4)


  def testBucketIndexSupport(self):
    """Check bucket index support"""
    bucketIndices = self._e.getBucketIndices(self._d)
    topDown = self._e.getBucketInfo(bucketIndices)
    topDownValues = numpy.array([elem.value for elem in topDown])
    errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8])
    self.assertAlmostEqual(errs.max(), 0, 4)

    encodings = []
    for x in topDown:
      encodings.extend(x.encoding)
    self.assertTrue(numpy.array_equal(encodings, self._expected))


  def testHoliday(self):
    """look at holiday more carefully because of the smooth transition"""
    # use of forced is not recommended, used here for readability, see
    # scalar.py
    e = DateEncoder(holiday=5, forced=True)
    holiday = numpy.array([0,0,0,0,0,1,1,1,1,1], dtype="uint8")
    notholiday = numpy.array([1,1,1,1,1,0,0,0,0,0], dtype="uint8")
    holiday2 = numpy.array([0,0,0,1,1,1,1,1,0,0], dtype="uint8")

    d = datetime.datetime(2010, 12, 25, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))

    d = datetime.datetime(2008, 12, 27, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), notholiday))

    d = datetime.datetime(1999, 12, 26, 8, 00)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday2))

    d = datetime.datetime(2011, 12, 24, 16, 00)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday2))

  def testHolidayMultiple(self):
    """look at holiday more carefully because of the smooth transition"""
    # use of forced is not recommended, used here for readability, see
    # scalar.py
    e = DateEncoder(holiday=5, forced=True, holidays=[(12, 25), (2018, 4, 1), (2017, 4, 16)])
    holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8")
    notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8")

    d = datetime.datetime(2011, 12, 25, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))

    d = datetime.datetime(2007, 12, 2, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), notholiday))

    d = datetime.datetime(2018, 4, 1, 16, 10)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))

    d = datetime.datetime(2017, 4, 16, 16, 10)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))

  def testWeekend(self):
    """Test weekend encoder"""
    # use of forced is not recommended, used here for readability, see scalar.py
    e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]), forced=True)
    mon = DateEncoder(customDays=(21, "Monday"), forced=True)

    e2 = DateEncoder(weekend=(21, 1), forced=True)
    d = datetime.datetime(1988, 5, 29, 20, 00)
    self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))
    for _ in range(300):
      d = d+datetime.timedelta(days=1)
      self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))

      #Make sure
      if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0:
        self.assertEqual(d.weekday(), 0)
      else:
        self.assertNotEqual(d.weekday(), 0)


  @unittest.skipUnless(
      capnp, "pycapnp is not installed, skipping serialization test.")
  def testReadWrite(self):
    originalTS = datetime.datetime(1997, 8, 29, 2, 14)
    originalValue = self._e.encode(originalTS)

    proto1 = DateEncoderProto.new_message()
    self._e.write(proto1)

    # Write the proto to a temp file and read it back into a new proto
    with tempfile.TemporaryFile() as f:
      proto1.write(f)
      f.seek(0)
      proto2 = DateEncoderProto.read(f)

    encoder = DateEncoder.read(proto2)

    self.assertIsInstance(encoder, DateEncoder)
    self.assertEqual(encoder.width, self._e.width)
    self.assertEqual(encoder.weekendOffset, self._e.weekendOffset)
    self.assertEqual(encoder.timeOfDayOffset, self._e.timeOfDayOffset)
    self.assertEqual(encoder.seasonOffset, self._e.seasonOffset)
    self.assertEqual(encoder.dayOfWeekOffset, self._e.dayOfWeekOffset)
    self.assertIsInstance(encoder.customDaysEncoder,
                          self._e.customDaysEncoder.__class__)
    self.assertIsInstance(encoder.dayOfWeekEncoder,
                          self._e.dayOfWeekEncoder.__class__)
    self.assertIsInstance(encoder.seasonEncoder,
                          self._e.seasonEncoder.__class__)
    self.assertIsInstance(encoder.timeOfDayEncoder,
                          self._e.timeOfDayEncoder.__class__)
    self.assertIsInstance(encoder.weekendEncoder,
                          self._e.weekendEncoder.__class__)
    self.assertTrue(numpy.array_equal(self._bits, encoder.encode(self._d)))
    self.assertTrue(numpy.array_equal(encoder.encode(originalTS),
                                      originalValue))
    self.assertEqual(self._e.decode(encoder.encode(self._d)),
                     encoder.decode(self._e.encode(self._d)))
def runHotgym(numRecords):
    with open(_PARAMS_PATH, "r") as f:
        modelParams = yaml.safe_load(f)["modelParams"]
        enParams = modelParams["sensorParams"]["encoders"]
        spParams = modelParams["spParams"]
        tmParams = modelParams["tmParams"]

    timeOfDayEncoder = DateEncoder(
        timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
    weekendEncoder = DateEncoder(
        weekend=enParams["timestamp_weekend"]["weekend"])
    CtEncoder = RandomDistributedScalarEncoder(enParams["Ct"]["resolution"])
    ZIP_10467Encoder = RandomDistributedScalarEncoder(
        enParams["ZIP_10467"]["resolution"])
    #  ZIP_10462Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10462"]["resolution"])
    #  ZIP_10475Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10475"]["resolution"])
    #  ZIP_10466Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10466"]["resolution"])
    #  ZIP_10469Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10469"]["resolution"])
    #  DEPT_11Encoder = RandomDistributedScalarEncoder(enParams["DEPT_11"]["resolution"])
    #  DEPT_24Encoder = RandomDistributedScalarEncoder(enParams["DEPT_24"]["resolution"])
    #  DEPT_41Encoder = RandomDistributedScalarEncoder(enParams["DEPT_41"]["resolution"])
    #  DEPT_34Encoder = RandomDistributedScalarEncoder(enParams["DEPT_34"]["resolution"])
    #  DEPT_31Encoder = RandomDistributedScalarEncoder(enParams["DEPT_31"]["resolution"])
    #  DEPT_60Encoder = RandomDistributedScalarEncoder(enParams["DEPT_60"]["resolution"])
    #  AGE_0_9Encoder = RandomDistributedScalarEncoder(enParams["AGE_0_9"]["resolution"])
    #  AGE_10_19Encoder = RandomDistributedScalarEncoder(enParams["AGE_10_19"]["resolution"])
    #  AGE_20_29Encoder = RandomDistributedScalarEncoder(enParams["AGE_20_29"]["resolution"])
    #  AGE_30_39Encoder = RandomDistributedScalarEncoder(enParams["AGE_30_39"]["resolution"])
    #  AGE_40_49Encoder = RandomDistributedScalarEncoder(enParams["AGE_40_49"]["resolution"])
    #  AGE_50_59Encoder = RandomDistributedScalarEncoder(enParams["AGE_50_59"]["resolution"])
    #  AGE_60_69Encoder = RandomDistributedScalarEncoder(enParams["AGE_60_69"]["resolution"])
    #  AGE_70_79Encoder = RandomDistributedScalarEncoder(enParams["AGE_70_79"]["resolution"])
    #  AGE_80_89Encoder = RandomDistributedScalarEncoder(enParams["AGE_80_89"]["resolution"])
    #  AGE_90_99Encoder = RandomDistributedScalarEncoder(enParams["AGE_90_99"]["resolution"])
    #  DIST_1_7Encoder = RandomDistributedScalarEncoder(enParams["DIST_1_7"]["resolution"])
    #  DIST_8_14Encoder = RandomDistributedScalarEncoder(enParams["DIST_8_14"]["resolution"])
    #  DIST_15_21Encoder = RandomDistributedScalarEncoder(enParams["DIST_15_21"]["resolution"])
    #  DIST_22_28Encoder = RandomDistributedScalarEncoder(enParams["DIST_22_28"]["resolution"])
    #  DIST_29_35Encoder = RandomDistributedScalarEncoder(enParams["DIST_29_35"]["resolution"])
    #  DIST_36_42Encoder = RandomDistributedScalarEncoder(enParams["DIST_36_42"]["resolution"])
    #  DIST_43_49Encoder = RandomDistributedScalarEncoder(enParams["DIST_43_49"]["resolution"])
    #  DIST_50_56Encoder = RandomDistributedScalarEncoder(enParams["DIST_50_56"]["resolution"])
    #  DIST_57_63Encoder = RandomDistributedScalarEncoder(enParams["DIST_57_63"]["resolution"])
    #  DIST_64_70Encoder = RandomDistributedScalarEncoder(enParams["DIST_64_70"]["resolution"])

    encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() +
                     CtEncoder.getWidth() * 2)

    sp = SpatialPooler(
        inputDimensions=(encodingWidth, ),
        columnDimensions=(spParams["columnCount"], ),
        potentialPct=spParams["potentialPct"],
        potentialRadius=encodingWidth,
        globalInhibition=spParams["globalInhibition"],
        localAreaDensity=spParams["localAreaDensity"],
        numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
        synPermInactiveDec=spParams["synPermInactiveDec"],
        synPermActiveInc=spParams["synPermActiveInc"],
        synPermConnected=spParams["synPermConnected"],
        boostStrength=spParams["boostStrength"],
        seed=spParams["seed"],
        wrapAround=True)

    tm = TemporalMemory(
        columnDimensions=(tmParams["columnCount"], ),
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=spParams["synPermConnected"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
        seed=tmParams["seed"])

    classifier = SDRClassifierFactory.create()
    results = []
    with open(_INPUT_FILE_PATH, "r") as fin:
        reader = csv.reader(fin)
        headers = reader.next()
        reader.next()
        reader.next()

        output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME)

        for count, record in enumerate(reader):

            if count >= numRecords: break

            # Convert data string into Python date object.
            dateString = datetime.datetime.strptime(record[0],
                                                    "%Y-%m-%d %H:%M:%S")
            # Convert data value string into float.
            Ct = float(record[1])
            ZIP_10467 = float(record[2])
            #      ZIP_10462 = float(record[3])
            #      ZIP_10475 = float(record[4])
            #      ZIP_10466 = float(record[5])
            #      ZIP_10469 = float(record[6])
            #      DEPT_11 = float(record[7])
            #      DEPT_24 = float(record[8])
            #      DEPT_41 = float(record[9])
            #      DEPT_34 = float(record[10])
            #      DEPT_31 = float(record[11])
            #      DEPT_60 = float(record[12])
            #      AGE_0_9 = float(record[13])
            #      AGE_10_19 = float(record[14])
            #      AGE_20_29 = float(record[15])
            #      AGE_30_39 = float(record[16])
            #      AGE_40_49 = float(record[17])
            #      AGE_50_59 = float(record[18])
            #      AGE_60_69 = float(record[19])
            #      AGE_70_79 = float(record[20])
            #      AGE_80_89 = float(record[21])
            #      AGE_90_99 = float(record[22])
            #      DIST_1_7 = float(record[23])
            #      DIST_8_14 = float(record[24])
            #      DIST_15_21 = float(record[25])
            #      DIST_22_28 = float(record[26])
            #      DIST_29_35 = float(record[27])
            #      DIST_36_42 = float(record[28])
            #      DIST_43_49 = float(record[29])
            #      DIST_50_56 = float(record[30])
            #      DIST_57_63 = float(record[31])
            #      DIST_64_70 = float(record[31])

            # To encode, we need to provide zero-filled numpy arrays for the encoders
            # to populate.
            timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
            weekendBits = numpy.zeros(weekendEncoder.getWidth())
            CtBits = numpy.zeros(CtEncoder.getWidth())
            ZIP_10467Bits = numpy.zeros(ZIP_10467Encoder.getWidth())
            #      ZIP_10462Bits = numpy.zeros(ZIP_10462Encoder.getWidth())
            #      ZIP_10475Bits = numpy.zeros(ZIP_10475Encoder.getWidth())
            #      ZIP_10466Bits = numpy.zeros(ZIP_10466Encoder.getWidth())
            #      ZIP_10469Bits = numpy.zeros(ZIP_10469Encoder.getWidth())
            #      DEPT_11Bits = numpy.zeros(DEPT_11Encoder.getWidth())
            #      DEPT_24Bits = numpy.zeros(DEPT_24Encoder.getWidth())
            #      DEPT_41Bits = numpy.zeros(DEPT_41Encoder.getWidth())
            #      DEPT_34Bits = numpy.zeros(DEPT_34Encoder.getWidth())
            #      DEPT_31Bits = numpy.zeros(DEPT_31Encoder.getWidth())
            #      DEPT_60Bits = numpy.zeros(DEPT_60Encoder.getWidth())
            #      AGE_0_9Bits = numpy.zeros(AGE_0_9Encoder.getWidth())
            #      AGE_10_19Bits = numpy.zeros(AGE_10_19Encoder.getWidth())
            #      AGE_20_29Bits = numpy.zeros(AGE_20_29Encoder.getWidth())
            #      AGE_30_39Bits = numpy.zeros(AGE_30_39Encoder.getWidth())
            #      AGE_40_49Bits = numpy.zeros(AGE_40_49Encoder.getWidth())
            #      AGE_50_59Bits = numpy.zeros(AGE_50_59Encoder.getWidth())
            #      AGE_60_69Bits = numpy.zeros(AGE_60_69Encoder.getWidth())
            #      AGE_70_79Bits = numpy.zeros(AGE_70_79Encoder.getWidth())
            #      AGE_80_89Bits = numpy.zeros(AGE_80_89Encoder.getWidth())
            #      AGE_90_99Bits = numpy.zeros(AGE_90_99Encoder.getWidth())
            #      DIST_1_7Bits = numpy.zeros(DIST_1_7Encoder.getWidth())
            #      DIST_8_14Bits = numpy.zeros(DIST_8_14Encoder.getWidth())
            #      DIST_15_21Bits = numpy.zeros(DIST_15_21Encoder.getWidth())
            #      DIST_22_28Bits = numpy.zeros(DIST_22_28Encoder.getWidth())
            #      DIST_29_35Bits = numpy.zeros(DIST_29_35Encoder.getWidth())
            #      DIST_36_42Bits = numpy.zeros(DIST_36_42Encoder.getWidth())
            #      DIST_43_49Bits = numpy.zeros(DIST_43_49Encoder.getWidth())
            #      DIST_50_56Bits = numpy.zeros(DIST_50_56Encoder.getWidth())
            #      DIST_57_63Bits = numpy.zeros(DIST_57_63Encoder.getWidth())
            #      DIST_64_70Bits = numpy.zeros(DIST_64_70Encoder.getWidth())

            # Now we call the encoders to create bit representations for each value.
            timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
            weekendEncoder.encodeIntoArray(dateString, weekendBits)
            CtEncoder.encodeIntoArray(Ct, CtBits)
            ZIP_10467Encoder.encodeIntoArray(ZIP_10467, ZIP_10467Bits)
            #      ZIP_10462Encoder.encodeIntoArray(ZIP_10462, ZIP_10462Bits)
            #      ZIP_10475Encoder.encodeIntoArray(ZIP_10475, ZIP_10475Bits)
            #      ZIP_10466Encoder.encodeIntoArray(ZIP_10466, ZIP_10466Bits)
            #      ZIP_10469Encoder.encodeIntoArray(ZIP_10469, ZIP_10469Bits)
            #      DEPT_11Encoder.encodeIntoArray(DEPT_11, DEPT_11Bits)
            #      DEPT_24Encoder.encodeIntoArray(DEPT_24, DEPT_24Bits)
            #      DEPT_41Encoder.encodeIntoArray(DEPT_41, DEPT_41Bits)
            #      DEPT_34Encoder.encodeIntoArray(DEPT_34, DEPT_34Bits)
            #      DEPT_31Encoder.encodeIntoArray(DEPT_31, DEPT_31Bits)
            #      DEPT_60Encoder.encodeIntoArray(DEPT_60, DEPT_60Bits)
            #      AGE_0_9Encoder.encodeIntoArray(AGE_0_9, AGE_0_9Bits)
            #      AGE_10_19Encoder.encodeIntoArray(AGE_10_19, AGE_10_19Bits)
            #      AGE_20_29Encoder.encodeIntoArray(AGE_20_29, AGE_20_29Bits)
            #      AGE_30_39Encoder.encodeIntoArray(AGE_30_39, AGE_30_39Bits)
            #      AGE_40_49Encoder.encodeIntoArray(AGE_40_49, AGE_40_49Bits)
            #      AGE_50_59Encoder.encodeIntoArray(AGE_50_59, AGE_50_59Bits)
            #      AGE_60_69Encoder.encodeIntoArray(AGE_60_69, AGE_60_69Bits)
            #      AGE_70_79Encoder.encodeIntoArray(AGE_70_79, AGE_70_79Bits)
            #      AGE_80_89Encoder.encodeIntoArray(AGE_80_89, AGE_80_89Bits)
            #      AGE_90_99Encoder.encodeIntoArray(AGE_90_99, AGE_90_99Bits)
            #      DIST_1_7Encoder.encodeIntoArray(DIST_1_7, DIST_1_7Bits)
            #      DIST_8_14Encoder.encodeIntoArray(DIST_8_14, DIST_8_14Bits)
            #      DIST_15_21Encoder.encodeIntoArray(DIST_15_21, DIST_15_21Bits)
            #      DIST_22_28Encoder.encodeIntoArray(DIST_22_28, DIST_22_28Bits)
            #      DIST_29_35Encoder.encodeIntoArray(DIST_29_35, DIST_29_35Bits)
            #      DIST_36_42Encoder.encodeIntoArray(DIST_36_42, DIST_36_42Bits)
            #      DIST_43_49Encoder.encodeIntoArray(DIST_43_49, DIST_43_49Bits)
            #      DIST_50_56Encoder.encodeIntoArray(DIST_50_56, DIST_50_56Bits)
            #      DIST_57_63Encoder.encodeIntoArray(DIST_57_63, DIST_57_63Bits)
            #      DIST_64_70Encoder.encodeIntoArray(DIST_64_70, DIST_64_70Bits)
            # Concatenate all these encodings into one large encoding for Spatial
            # Pooling.
            encoding = numpy.concatenate(
                [timeOfDayBits, weekendBits, CtBits, ZIP_10467Bits])
            #      encoding = numpy.concatenate(
            #        [timeOfDayBits, weekendBits, CtBits,
            #         ZIP_10467Bits, ZIP_10462Bits, ZIP_10475Bits, ZIP_10466Bits, ZIP_10469Bits,
            #         DEPT_11Bits, DEPT_24Bits, DEPT_41Bits, DEPT_34Bits, DEPT_31Bits,
            #         DEPT_60Bits, AGE_0_9Bits, AGE_10_19Bits, AGE_20_29Bits, AGE_30_39Bits,
            #         AGE_40_49Bits, AGE_50_59Bits, AGE_60_69Bits, AGE_70_79Bits, AGE_80_89Bits,
            #         AGE_90_99Bits, DIST_1_7Bits, DIST_8_14Bits, DIST_15_21Bits, DIST_22_28Bits,
            #         DIST_29_35Bits, DIST_36_42Bits, DIST_43_49Bits, DIST_50_56Bits, DIST_57_63Bits,
            #         DIST_64_70Bits])

            # Create an array to represent active columns, all initially zero. This
            # will be populated by the compute method below. It must have the same
            # dimensions as the Spatial Pooler.
            activeColumns = numpy.zeros(spParams["columnCount"])

            # Execute Spatial Pooling algorithm over input space.
            sp.compute(encoding, True, activeColumns)
            activeColumnIndices = numpy.nonzero(activeColumns)[0]

            # Execute Temporal Memory algorithm over active mini-columns.
            tm.compute(activeColumnIndices, learn=True)

            activeCells = tm.getActiveCells()

            # Get the bucket info for this input value for classification.
            bucketIdx = CtEncoder.getBucketIndices(Ct)[0]

            # Run classifier to translate active cells back to scalar value.
            classifierResult = classifier.compute(recordNum=count,
                                                  patternNZ=activeCells,
                                                  classification={
                                                      "bucketIdx": bucketIdx,
                                                      "actValue": Ct
                                                  },
                                                  learn=True,
                                                  infer=True)

            # Print the best prediction for 1 step out.
            oneStepConfidence, oneStep = sorted(zip(
                classifierResult[1], classifierResult["actualValues"]),
                                                reverse=True)[0]
            # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
            #      results.append([oneStep, oneStepConfidence * 100, None, None])
            results.append([record[0], Ct, oneStep, oneStepConfidence * 100])
            output.write(record[0], Ct, oneStep, oneStepConfidence * 100)

        output.close()
        return results
    def initialize(self):
        # Keep track of value range for spatial anomaly detection.
        self.minVal = None
        self.maxVal = None

        # Time of day encoder
        self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49),
                                            name='time_enc')
        # RDSE encoder for the time series value.
        minResolution = 0.001
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = self.inputMax + rangePadding
        numBuckets = 130
        resolution = max(minResolution, (maxVal - minVal) / numBuckets)
        self.value_enc = RandomDistributedScalarEncoder(resolution=resolution,
                                                        name='value_rdse')

        # Spatial Pooler.
        encodingWidth = self.timeOfDayEncoder.getWidth(
        ) + self.value_enc.getWidth()
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth, ),
            columnDimensions=(2048, ),
            potentialPct=0.8,
            potentialRadius=encodingWidth,
            globalInhibition=1,
            numActiveColumnsPerInhArea=40,
            synPermInactiveDec=0.0005,
            synPermActiveInc=0.003,
            synPermConnected=0.2,
            boostStrength=0.0,
            seed=1956,
            wrapAround=True,
        )

        self.tm = TemporalMemory(
            columnDimensions=(2048, ),
            cellsPerColumn=32,
            activationThreshold=20,
            initialPermanence=.5,  # Increased to connectedPermanence.
            connectedPermanence=.5,
            minThreshold=13,
            maxNewSynapseCount=31,
            permanenceIncrement=0.04,
            permanenceDecrement=0.008,
            predictedSegmentDecrement=0.001,
            maxSegmentsPerCell=128,
            maxSynapsesPerSegment=
            128,  # Changed meaning. Also see connections.topology[2]
            seed=1993,
        )

        # Initialize the anomaly likelihood object
        numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            learningPeriod=numentaLearningPeriod,
            estimationSamples=self.probationaryPeriod - numentaLearningPeriod,
            reestimationPeriod=100,
        )

        self.age = 0
Example #30
0
    def testWeekend(self):
        """Test weekend encoder"""
        # use of forced is not recommended, used here for readability, see scalar.py
        e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]), forced=True)
        mon = DateEncoder(customDays=(21, "Monday"), forced=True)

        e2 = DateEncoder(weekend=(21, 1), forced=True)
        d = datetime.datetime(1988, 5, 29, 20, 00)
        self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))
        for _ in range(300):
            d = d + datetime.timedelta(days=1)
            self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))

            #Make sure
            if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0:
                self.assertEqual(d.weekday(), 0)
            else:
                self.assertNotEqual(d.weekday(), 0)
Example #31
0
    def initialize(self):

        # Initialize the RDSE with a resolution; calculated from the data min and
        # max, the resolution is specific to the data stream.
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = (self.inputMax + rangePadding
                  if self.inputMin != self.inputMax else self.inputMin + 1)
        numBuckets = 130.0
        resolution = max(0.001, (maxVal - minVal) / numBuckets)
        self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42)
        self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                     dtype=np.uint32)

        # Initialize the timestamp encoder
        self.timestampEncoder = DateEncoder(timeOfDay=(
            21,
            9.49,
        ))
        self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                         dtype=np.uint32)

        inputWidth = (self.timestampEncoder.getWidth() +
                      self.valueEncoder.getWidth())

        self.sp = SpatialPooler(
            **{
                "globalInhibition": True,
                "columnDimensions": [2048],
                "inputDimensions": [inputWidth],
                "potentialRadius": inputWidth,
                "numActiveColumnsPerInhArea": 40,
                "seed": 1956,
                "potentialPct": 0.8,
                "maxBoost": 1.0,
                "synPermActiveInc": 0.003,
                "synPermConnected": 0.2,
                "synPermInactiveDec": 0.0005,
            })
        self.spOutput = np.zeros(2048, dtype=np.float32)

        self.tm = TemporalMemory(
            **{
                "activationThreshold": 20,
                "cellsPerColumn": 32,
                "columnDimensions": (2048, ),
                "initialPermanence": 0.24,
                "maxSegmentsPerCell": 128,
                "maxSynapsesPerSegment": 128,
                "minThreshold": 13,
                "maxNewSynapseCount": 31,
                "permanenceDecrement": 0.008,
                "permanenceIncrement": 0.04,
                "seed": 1960,
            })

        if self.useLikelihood:
            learningPeriod = math.floor(self.probationaryPeriod / 2.0)
            self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
                claLearningPeriod=learningPeriod,
                estimationSamples=self.probationaryPeriod - learningPeriod,
                reestimationPeriod=100)
Example #32
0
class DateEncoderTest(unittest.TestCase):
    """Unit tests for DateEncoder class"""
    def setUp(self):
        # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of
        # day
        # use of forced is not recommended, used here for readability, see scalar.py
        self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5)
        # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov,
        # 2010, 14:55
        self._d = datetime.datetime(2010, 11, 4, 14, 55)
        self._bits = self._e.encode(self._d)
        # season is aaabbbcccddd (1 bit/month) # TODO should be <<3?
        # should be 000000000111 (centered on month 11 - Nov)
        seasonExpected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]

        # week is MTWTFSS
        # contrary to localtime documentation, Monday = 0 (for python
        #  datetime.datetime.timetuple()
        dayOfWeekExpected = [0, 0, 0, 1, 0, 0, 0]

        # not a weekend, so it should be "False"
        weekendExpected = [1, 0]

        # time of day has radius of 4 hours and w of 5 so each bit = 240/5
        # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6
        # should be 30 bits total (30 * 48 minutes = 24 hours)
        timeOfDayExpected = ([
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
            0, 0, 0, 0, 0, 0, 0, 0
        ])
        self._expected = numpy.array(seasonExpected + dayOfWeekExpected +
                                     weekendExpected + timeOfDayExpected,
                                     dtype=defaultDtype)

    def testDateEncoder(self):
        """creating date encoder instance"""
        self.assertSequenceEqual(self._e.getDescription(),
                                 [("season", 0), ("day of week", 12),
                                  ("weekend", 19), ("time of day", 21)])
        self.assertTrue(numpy.array_equal(self._expected, self._bits))

    def testMissingValues(self):
        """missing values"""
        mvOutput = self._e.encode(SENTINEL_VALUE_FOR_MISSING_DATA)
        self.assertEqual(sum(mvOutput), 0)

    def testDecoding(self):
        """decoding date"""
        decoded = self._e.decode(self._bits)

        (fieldsDict, _) = decoded
        self.assertEqual(len(fieldsDict), 4)

        (ranges, _) = fieldsDict['season']
        self.assertEqual(len(ranges), 1)
        self.assertSequenceEqual(ranges[0], [305, 305])

        (ranges, _) = fieldsDict['time of day']
        self.assertEqual(len(ranges), 1)
        self.assertSequenceEqual(ranges[0], [14.4, 14.4])

        (ranges, _) = fieldsDict['day of week']
        self.assertEqual(len(ranges), 1)
        self.assertSequenceEqual(ranges[0], [3, 3])

        (ranges, _) = fieldsDict['weekend']
        self.assertEqual(len(ranges), 1)
        self.assertSequenceEqual(ranges[0], [0, 0])

    def testTopDownCompute(self):
        """Check topDownCompute"""
        topDown = self._e.topDownCompute(self._bits)
        topDownValues = numpy.array([elem.value for elem in topDown])
        errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8])
        self.assertAlmostEqual(errs.max(), 0, 4)

    def testBucketIndexSupport(self):
        """Check bucket index support"""
        bucketIndices = self._e.getBucketIndices(self._d)
        topDown = self._e.getBucketInfo(bucketIndices)
        topDownValues = numpy.array([elem.value for elem in topDown])
        errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8])
        self.assertAlmostEqual(errs.max(), 0, 4)

        encodings = []
        for x in topDown:
            encodings.extend(x.encoding)
        self.assertTrue(numpy.array_equal(encodings, self._expected))

    def testHoliday(self):
        """look at holiday more carefully because of the smooth transition"""
        # use of forced is not recommended, used here for readability, see
        # scalar.py
        e = DateEncoder(holiday=5, forced=True)
        holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8")
        notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8")
        holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype="uint8")

        d = datetime.datetime(2010, 12, 25, 4, 55)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday))

        d = datetime.datetime(2008, 12, 27, 4, 55)
        self.assertTrue(numpy.array_equal(e.encode(d), notholiday))

        d = datetime.datetime(1999, 12, 26, 8, 00)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday2))

        d = datetime.datetime(2011, 12, 24, 16, 00)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday2))

    def testHolidayMultiple(self):
        """look at holiday more carefully because of the smooth transition"""
        # use of forced is not recommended, used here for readability, see
        # scalar.py
        e = DateEncoder(holiday=5,
                        forced=True,
                        holidays=[(12, 25), (2018, 4, 1), (2017, 4, 16)])
        holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8")
        notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8")

        d = datetime.datetime(2011, 12, 25, 4, 55)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday))

        d = datetime.datetime(2007, 12, 2, 4, 55)
        self.assertTrue(numpy.array_equal(e.encode(d), notholiday))

        d = datetime.datetime(2018, 4, 1, 16, 10)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday))

        d = datetime.datetime(2017, 4, 16, 16, 10)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday))

    def testWeekend(self):
        """Test weekend encoder"""
        # use of forced is not recommended, used here for readability, see scalar.py
        e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]), forced=True)
        mon = DateEncoder(customDays=(21, "Monday"), forced=True)

        e2 = DateEncoder(weekend=(21, 1), forced=True)
        d = datetime.datetime(1988, 5, 29, 20, 00)
        self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))
        for _ in range(300):
            d = d + datetime.timedelta(days=1)
            self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))

            #Make sure
            if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0:
                self.assertEqual(d.weekday(), 0)
            else:
                self.assertNotEqual(d.weekday(), 0)

    @unittest.skipUnless(
        capnp, "pycapnp is not installed, skipping serialization test.")
    def testReadWrite(self):
        originalTS = datetime.datetime(1997, 8, 29, 2, 14)
        originalValue = self._e.encode(originalTS)

        proto1 = DateEncoderProto.new_message()
        self._e.write(proto1)

        # Write the proto to a temp file and read it back into a new proto
        with tempfile.TemporaryFile() as f:
            proto1.write(f)
            f.seek(0)
            proto2 = DateEncoderProto.read(f)

        encoder = DateEncoder.read(proto2)

        self.assertIsInstance(encoder, DateEncoder)
        self.assertEqual(encoder.width, self._e.width)
        self.assertEqual(encoder.weekendOffset, self._e.weekendOffset)
        self.assertEqual(encoder.timeOfDayOffset, self._e.timeOfDayOffset)
        self.assertEqual(encoder.seasonOffset, self._e.seasonOffset)
        self.assertEqual(encoder.dayOfWeekOffset, self._e.dayOfWeekOffset)
        self.assertIsInstance(encoder.customDaysEncoder,
                              self._e.customDaysEncoder.__class__)
        self.assertIsInstance(encoder.dayOfWeekEncoder,
                              self._e.dayOfWeekEncoder.__class__)
        self.assertIsInstance(encoder.seasonEncoder,
                              self._e.seasonEncoder.__class__)
        self.assertIsInstance(encoder.timeOfDayEncoder,
                              self._e.timeOfDayEncoder.__class__)
        self.assertIsInstance(encoder.weekendEncoder,
                              self._e.weekendEncoder.__class__)
        self.assertTrue(numpy.array_equal(self._bits, encoder.encode(self._d)))
        self.assertTrue(
            numpy.array_equal(encoder.encode(originalTS), originalValue))
        self.assertEqual(self._e.decode(encoder.encode(self._d)),
                         encoder.decode(self._e.encode(self._d)))
Example #33
0
class DateEncoderTest(unittest.TestCase):
    '''Unit tests for DateEncoder class'''
    def setUp(self):
        ##TODO: comment and code dont match - weekend?!!
        # 3 bits for season, 1 bit for day of week, 2 for weekend, 5 for time of day
        self._e = DateEncoder(season=3, dayOfWeek=1, weekend=3, timeOfDay=5)
        # in the middle of fall, thursday, not a weekend, afternoon - 4th Nov, 2010, 14:55
        self._d = datetime.datetime(2010, 11, 4, 14, 55)
        self._bits = self._e.encode(self._d)
        # season is aaabbbcccddd (1 bit/month) # TODO should be <<3?
        # should be 000000000111 (centered on month 11 - Nov)
        seasonExpected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]

        # week is MTWTFSS
        # contrary to localtime documentation, Monaday = 0 (for python
        #  datetime.datetime.timetuple()
        dayOfWeekExpected = [0, 0, 0, 1, 0, 0, 0]

        # not a weekend, so it should be "False"
        weekendExpected = [1, 1, 1, 0, 0, 0]

        # time of day has radius of 4 hours and w of 5 so each bit = 240/5 min = 48min
        # 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6
        # should be 30 bits total (30 * 48 minutes = 24 hours)
        timeOfDayExpected = [
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
            0, 0, 0, 0, 0, 0, 0, 0
        ]
        self._expected = numpy.array(seasonExpected + dayOfWeekExpected + weekendExpected \
                              + timeOfDayExpected, dtype=defaultDtype)

    def testDateEncoder(self):
        '''creating date encoder instance'''
        self.assertEqual(self._e.getDescription(), [("season", 0),
                                                    ("day of week", 12),
                                                    ("weekend", 19),
                                                    ("time of day", 25)])

        self.assertTrue((self._expected == self._bits).all())

        print
        self._e.pprintHeader()
        self._e.pprint(self._bits)
        print

    def testMissingValues(self):
        '''missing values'''
        mvOutput = self._e.encode(SENTINEL_VALUE_FOR_MISSING_DATA)
        self.assertEqual(sum(mvOutput), 0)

    def testDecoding(self):
        '''decoding date'''
        decoded = self._e.decode(self._bits)

        (fieldsDict, fieldNames) = decoded
        self.assertEqual(len(fieldsDict), 4)

        (ranges, desc) = fieldsDict['season']
        self.assertEqual(len(ranges), 1)
        self.assertSequenceEqual(ranges[0], [305, 305])

        (ranges, desc) = fieldsDict['time of day']
        self.assertEqual(len(ranges), 1)
        self.assertSequenceEqual(ranges[0], [14.4, 14.4])

        (ranges, desc) = fieldsDict['day of week']
        self.assertEqual(len(ranges), 1)
        self.assertSequenceEqual(ranges[0], [3, 3])

        (ranges, desc) = fieldsDict['weekend']
        self.assertEqual(len(ranges), 1)
        self.assertSequenceEqual(ranges[0], [0, 0])

        print decoded
        print "decodedToStr=>", self._e.decodedToStr(decoded)

    def testTopDownCompute(self):
        '''Check topDownCompute'''
        topDown = self._e.topDownCompute(self._bits)
        topDownValues = numpy.array([elem.value for elem in topDown])
        errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8])
        self.assertAlmostEqual(errs.max(), 0, 4)

    def testBucketIndexSupport(self):
        '''Check bucket index support'''
        bucketIndices = self._e.getBucketIndices(self._d)
        print "bucket indices:", bucketIndices
        topDown = self._e.getBucketInfo(bucketIndices)
        topDownValues = numpy.array([elem.value for elem in topDown])
        errs = topDownValues - numpy.array([320.25, 3.5, .167, 14.8])
        self.assertAlmostEqual(errs.max(), 0, 4)

        encodings = []
        for x in topDown:
            encodings.extend(x.encoding)
        self.assertTrue((encodings == self._expected).all())

    def testHoliday(self):
        '''look at holiday more carefully because of the smooth transition'''
        e = DateEncoder(holiday=5)
        holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype='uint8')
        notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype='uint8')
        holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype='uint8')

        d = datetime.datetime(2010, 12, 25, 4, 55)
        self.assertTrue((e.encode(d) == holiday).all())

        d = datetime.datetime(2008, 12, 27, 4, 55)
        self.assertTrue((e.encode(d) == notholiday).all())

        d = datetime.datetime(1999, 12, 26, 8, 00)
        self.assertTrue((e.encode(d) == holiday2).all())

        d = datetime.datetime(2011, 12, 24, 16, 00)
        self.assertTrue((e.encode(d) == holiday2).all())

    def testWeekend(self):
        '''Test weekend encoder'''
        e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]))
        mon = DateEncoder(customDays=(21, "Monday"))

        e2 = DateEncoder(weekend=(21, 1))
        d = datetime.datetime(1988, 5, 29, 20, 00)
        self.assertTrue((e.encode(d) == e2.encode(d)).all())
        for _ in range(300):
            d = d + datetime.timedelta(days=1)
            self.assertTrue((e.encode(d) == e2.encode(d)).all())
            print mon.decode(mon.encode(d))
            #Make sure
            if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0:
                self.assertEqual(d.weekday(), 0)
            else:
                self.assertFalse(d.weekday() == 0)
Example #34
0
class FourSquareAnomalyDetector():
    def __init__(self):
        self.lat = ScalarEncoder(name='latitude',  w=3, n=100, minval=-90, maxval=90,
                        periodic=False)
        self.long= ScalarEncoder(name='longitude',  w=3, n=100, minval=-180, maxval=180,
                        periodic=True)
        self.timeenc= DateEncoder(season=0, dayOfWeek=1, weekend=3, timeOfDay=5)
        self.likes = ScalarEncoder(name='likes',  w=3, n=50, minval=0, maxval=100000,
                        periodic=False)
        self.people = ScalarEncoder(name='numpeople',  w=3, n=20, minval=0, maxval=100,
                        periodic=False)
        self.categories = SDRCategoryEncoder(n=87, w=3, categoryList = None,
                             name="cats", verbosity=0)
        self.run()

    def run(self):
        check1=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe")
        check2=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe")
        check3=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe")
        check4=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe")
        check5=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe")
        check6=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe")
        check7=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe")
        check8=Checkin(10,100,datetime.datetime.utcnow(),12,5,"cafe")
        list_of_unencoded_checkins=[check1,check2,check3,check4,check5,check6,check7,check8]
        list_of_encoded_checkins=[]
        for check in list_of_unencoded_checkins:
            print check
            list_of_encoded_checkins.append(self.encode(check))
        print self.LastAnomalyScore(list_of_encoded_checkins)


    def createModel(self):
        return ModelFactory.create(model_params.MODEL_PARAMS)

    def encode(self, checkin):
        print checkin
        latenc=self.lat.encode(checkin.latitude)
        longenc=self.long.encode(checkin.longitude)
        timenc=self.timeenc.encode(checkin.time)
        likeenc=self.likes.encode(checkin.likes)
        peoplenc=self.people.encode(checkin.people)
        for cat in checkin.categories:
            try:
                catenc=numpy.logical_or(catenc,self.categories.encode(cat))
            except:
                catenc=self.categories.encode(cat)
        checkinsdr=numpy.concatenate((latenc,longenc,timenc,likeenc,peoplenc,catenc))
        print checkinsdr
        print type(checkinsdr)
        return checkinsdr

    def LastAnomalyScore(self, checkin_list):
        model = self.createModel()
        model.enableInference({'predictedField': 'checkin'})
        last_anomaly = 0
        for i, record in enumerate(checkin_list, start=1):
            modelInput = {"checkin": record}
            result = model.run(modelInput)
            anomalyScore = result.inferences['anomalyScore']
            last_anomaly = anomalyScore
        return last_anomaly
Example #35
0
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])
  scalarEncoder2 = RandomDistributedScalarEncoder(
    enParams["consumption2"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth()
                   + scalarEncoder2.getWidth())

  sp = SpatialPooler(
    inputDimensions=(encodingWidth,),
    columnDimensions=(spParams["columnCount"],),
    potentialPct=spParams["potentialPct"],
    potentialRadius=encodingWidth,
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    boostStrength=spParams["boostStrength"],
    seed=spParams["seed"],
    wrapAround=True
  )

  tm = TemporalMemory(
    columnDimensions=(tmParams["columnCount"],),
    cellsPerColumn=tmParams["cellsPerColumn"],
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    connectedPermanence=spParams["synPermConnected"],
    minThreshold=tmParams["minThreshold"],
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()
    
    output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME)
    
    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      prediction = float(record[1])
      prediction2 = float(record[2])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())
      consumptionBits2 = numpy.zeros(scalarEncoder2.getWidth())

      # Now we call the encoders to create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(prediction, consumptionBits)
      scalarEncoder2.encodeIntoArray(prediction2, consumptionBits2)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits, consumptionBits2]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(prediction)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": prediction
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
#      results.append([oneStep, oneStepConfidence * 100, None, None])
      results.append([record[0], prediction, oneStep, oneStepConfidence * 100])
      output.write(record[0], prediction, oneStep, oneStepConfidence * 100)
    
    output.close()
    return results
Example #36
0
class NumentaTMLowLevelDetector(AnomalyDetector):
  """The 'numentaTM' detector, but not using the CLAModel or network API """
  def __init__(self, *args, **kwargs):
    super(NumentaTMLowLevelDetector, self).__init__(*args, **kwargs)

    self.valueEncoder = None
    self.encodedValue = None
    self.timestampEncoder = None
    self.encodedTimestamp = None
    self.sp = None
    self.spOutput = None
    self.tm = None
    self.anomalyLikelihood = None

    # Set this to False if you want to get results based on raw scores
    # without using AnomalyLikelihood. This will give worse results, but
    # useful for checking the efficacy of AnomalyLikelihood. You will need
    # to re-optimize the thresholds when running with this setting.
    self.useLikelihood = True


  def getAdditionalHeaders(self):
    """Returns a list of strings."""
    return ["raw_score"]


  def initialize(self):

    # Initialize the RDSE with a resolution; calculated from the data min and
    # max, the resolution is specific to the data stream.
    rangePadding = abs(self.inputMax - self.inputMin) * 0.2
    minVal = self.inputMin - rangePadding
    maxVal = (self.inputMax + rangePadding
              if self.inputMin != self.inputMax
              else self.inputMin + 1)
    numBuckets = 130.0
    resolution = max(0.001, (maxVal - minVal) / numBuckets)
    self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42)
    self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                 dtype=np.uint32)

    # Initialize the timestamp encoder
    self.timestampEncoder = DateEncoder(timeOfDay=(21, 9.49, ))
    self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                     dtype=np.uint32)

    inputWidth = (self.timestampEncoder.getWidth() +
                  self.valueEncoder.getWidth())

    self.sp = SpatialPooler(**{
      "globalInhibition": True,
      "columnDimensions": [2048],
      "inputDimensions": [inputWidth],
      "potentialRadius": inputWidth,
      "numActiveColumnsPerInhArea": 40,
      "seed": 1956,
      "potentialPct": 0.8,
      "boostStrength": 0.0,
      "synPermActiveInc": 0.003,
      "synPermConnected": 0.2,
      "synPermInactiveDec": 0.0005,
    })
    self.spOutput = np.zeros(2048, dtype=np.float32)

    self.tm = TemporalMemory(**{
      "activationThreshold": 20,
      "cellsPerColumn": 32,
      "columnDimensions": (2048,),
      "initialPermanence": 0.24,
      "maxSegmentsPerCell": 128,
      "maxSynapsesPerSegment": 128,
      "minThreshold": 13,
      "maxNewSynapseCount": 31,
      "permanenceDecrement": 0.008,
      "permanenceIncrement": 0.04,
      "seed": 1960,
    })

    if self.useLikelihood:
      learningPeriod = math.floor(self.probationaryPeriod / 2.0)
      self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
        claLearningPeriod=learningPeriod,
        estimationSamples=self.probationaryPeriod - learningPeriod,
        reestimationPeriod=100
      )


  def handleRecord(self, inputData):
    """Returns a tuple (anomalyScore, rawScore)."""

    # Encode the input data record
    self.valueEncoder.encodeIntoArray(
        inputData["value"], self.encodedValue)
    self.timestampEncoder.encodeIntoArray(
        inputData["timestamp"], self.encodedTimestamp)

    # Run the encoded data through the spatial pooler
    self.sp.compute(np.concatenate((self.encodedTimestamp,
                                    self.encodedValue,)),
                    True, self.spOutput)

    # At the current state, the set of the region's active columns and the set
    # of columns that have previously-predicted cells are used to calculate the
    # raw anomaly score.
    activeColumns = set(self.spOutput.nonzero()[0].tolist())
    prevPredictedColumns = set(self.tm.columnForCell(cell)
                               for cell in self.tm.getPredictiveCells())
    rawScore = (len(activeColumns - prevPredictedColumns) /
                float(len(activeColumns)))

    self.tm.compute(activeColumns)

    if self.useLikelihood:
      # Compute the log-likelihood score
      anomalyScore = self.anomalyLikelihood.anomalyProbability(
        inputData["value"], rawScore, inputData["timestamp"])
      logScore = self.anomalyLikelihood.computeLogLikelihood(anomalyScore)
      return (logScore, rawScore)

    return (rawScore, rawScore)
Example #37
0
from nupic.algorithms.sdr_classifier_factory import SDRClassifierFactory
from nupic.algorithms.spatial_pooler import SpatialPooler
from nupic.algorithms.temporal_memory import TemporalMemory
from nupic.encoders.adaptive_scalar import AdaptiveScalarEncoder
from nupic.encoders.date import DateEncoder
from nupic.encoders.random_distributed_scalar \
     import RandomDistributedScalarEncoder
from nupic.encoders.scalar import ScalarEncoder
from simhash_distributed_scalar import SimHashDistributedScalarEncoder
from stats import mae, mape, nll, rmse

# setup

COL_WIDTH = 2048

timeOfDayEncoder = DateEncoder(timeOfDay=(21, 1))
weekendEncoder = DateEncoder(weekend=21)
#consumeEncoder = RandomDistributedScalarEncoder(
#  n=400,
#  w=21,
#  resolution=0.4)   # best, 0.88 original
#consumeEncoder = ScalarEncoder(
#  n=400,
#  w=21,
#  minval=0,
#  maxval=100)
#consumeEncoder = AdaptiveScalarEncoder(
#  n=400,
#  w=21)
consumeEncoder = SimHashDistributedScalarEncoder(n=400, w=21, resolution=0.25)
encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() +
Example #38
0
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth())

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(spParams["columnCount"]),
    # What percent of the columns"s receptive field is available for potential
    # synapses?
    potentialPct=spParams["potentialPct"],
    # This means that the input space has no topology.
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    # How quickly synapses grow and degrade.
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=spParams["boostStrength"],
    # Random number generator seed.
    seed=spParams["seed"],
    # TODO: is this useful?
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(tmParams["columnCount"],),
    # How many cells in each mini-column.
    cellsPerColumn=tmParams["cellsPerColumn"],
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    # TODO: This comes from the SP params, is this normal
    connectedPermanence=spParams["synPermConnected"],
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=tmParams["minThreshold"],
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
      results.append([oneStep, oneStepConfidence * 100, None, None])

    return results
Example #39
0
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth())

  sp = SpatialPooler(
    inputDimensions=(encodingWidth,),
    columnDimensions=(spParams["columnCount"],),
    potentialPct=spParams["potentialPct"],
    potentialRadius=encodingWidth,
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    boostStrength=spParams["boostStrength"],
    seed=spParams["seed"],
    wrapAround=True
  )

  tm = TemporalMemory(
    columnDimensions=(tmParams["columnCount"],),
    cellsPerColumn=tmParams["cellsPerColumn"],
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    connectedPermanence=spParams["synPermConnected"],
    minThreshold=tmParams["minThreshold"],
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders to create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
      results.append([oneStep, oneStepConfidence * 100, None, None])

    return results
class DendriteDetector(AnomalyDetector):
    def initialize(self):
        # Keep track of value range for spatial anomaly detection.
        self.minVal = None
        self.maxVal = None

        # Time of day encoder
        self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49),
                                            name='time_enc')
        # RDSE encoder for the time series value.
        minResolution = 0.001
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = self.inputMax + rangePadding
        numBuckets = 130
        resolution = max(minResolution, (maxVal - minVal) / numBuckets)
        self.value_enc = RandomDistributedScalarEncoder(resolution=resolution,
                                                        name='value_rdse')

        # Spatial Pooler.
        encodingWidth = self.timeOfDayEncoder.getWidth(
        ) + self.value_enc.getWidth()
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth, ),
            columnDimensions=(2048, ),
            potentialPct=0.8,
            potentialRadius=encodingWidth,
            globalInhibition=1,
            numActiveColumnsPerInhArea=40,
            synPermInactiveDec=0.0005,
            synPermActiveInc=0.003,
            synPermConnected=0.2,
            boostStrength=0.0,
            seed=1956,
            wrapAround=True,
        )

        self.tm = TemporalMemory(
            columnDimensions=(2048, ),
            cellsPerColumn=32,
            activationThreshold=20,
            initialPermanence=.5,  # Increased to connectedPermanence.
            connectedPermanence=.5,
            minThreshold=13,
            maxNewSynapseCount=31,
            permanenceIncrement=0.04,
            permanenceDecrement=0.008,
            predictedSegmentDecrement=0.001,
            maxSegmentsPerCell=128,
            maxSynapsesPerSegment=
            128,  # Changed meaning. Also see connections.topology[2]
            seed=1993,
        )

        # Initialize the anomaly likelihood object
        numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            learningPeriod=numentaLearningPeriod,
            estimationSamples=self.probationaryPeriod - numentaLearningPeriod,
            reestimationPeriod=100,
        )

        self.age = 0

    def getAdditionalHeaders(self):
        """Returns a list of strings."""
        return ["raw_score"]

    def handleRecord(self, inputData):
        """
    Argument inputData is {"value": instantaneous_value, "timestamp": pandas.Timestamp}
    Returns a tuple (anomalyScore, rawScore).

    Internally to NuPIC "anomalyScore" corresponds to "likelihood_score"
    and "rawScore" corresponds to "anomaly_score". Sorry about that.
    """

        # Check for spatial anomalies and update min/max values.
        value = inputData["value"]
        spatialAnomaly = False
        if self.minVal != self.maxVal:
            tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE
            maxExpected = self.maxVal + tolerance
            minExpected = self.minVal - tolerance
            if value > maxExpected or value < minExpected:
                spatialAnomaly = True
        if self.maxVal is None or value > self.maxVal:
            self.maxVal = value
        if self.minVal is None or value < self.minVal:
            self.minVal = value

        # Run the HTM stack.  First Encoders.
        timestamp = inputData["timestamp"]
        timeOfDayBits = np.zeros(self.timeOfDayEncoder.getWidth())
        self.timeOfDayEncoder.encodeIntoArray(timestamp, timeOfDayBits)
        valueBits = np.zeros(self.value_enc.getWidth())
        self.value_enc.encodeIntoArray(value, valueBits)
        encoding = np.concatenate([timeOfDayBits, valueBits])
        # Spatial Pooler.
        activeColumns = np.zeros(self.sp.getNumColumns())
        self.sp.compute(encoding, True, activeColumns)
        activeColumnIndices = np.nonzero(activeColumns)[0]
        # Temporal Memory and Anomaly.
        predictions = self.tm.getPredictiveCells()
        predictedColumns = list(self.tm.mapCellsToColumns(predictions).keys())
        self.tm.compute(activeColumnIndices, learn=True)
        activeCells = self.tm.getActiveCells()
        rawScore = anomaly.computeRawAnomalyScore(activeColumnIndices,
                                                  predictedColumns)

        # Compute log(anomaly likelihood)
        anomalyScore = self.anomalyLikelihood.anomalyProbability(
            inputData["value"], rawScore, inputData["timestamp"])
        finalScore = logScore = self.anomalyLikelihood.computeLogLikelihood(
            anomalyScore)

        if spatialAnomaly:
            finalScore = 1.0

        if False:
            # Plot correlation of excitement versus compartmentalization.
            if self.age == 0:
                print("Correlation Plots ENABLED.")
            if False:
                start_age = 1000
                end_age = 1800
            else:
                start_age = 4000
                end_age = 7260
            if self.age == start_age:
                import correlation
                import random
                self.cor_samplers = []
                sampled_cells = []
                while len(self.cor_samplers) < 20:
                    n = random.choice(xrange(self.tm.numberOfCells()))
                    if n in sampled_cells:
                        continue
                    else:
                        sampled_cells.append(n)
                    neuron = self.tm.connections.dataForCell(n)
                    if neuron._roots:
                        c = correlation.CorrelationSampler(neuron._roots[0])
                        c.random_sample_points(100)
                        self.cor_samplers.append(c)
                print("Created %d Correlation Samplers" %
                      len(self.cor_samplers))
            if self.age >= start_age:
                for smplr in self.cor_samplers:
                    smplr.sample()
            if self.age == end_age:
                import matplotlib.pyplot as plt
                for idx, smplr in enumerate(self.cor_samplers):
                    if smplr.num_samples == 0:
                        print("No samples, plot not shown.")
                        continue
                    plt.figure("Sample %d" % idx)
                    smplr.plot(period=64)  # Different value!
                plt.show()

        if False:
            # Plot excitement of a typical detection on a dendrite.
            if self.age == 7265:
                #if self.age == 1800:
                import matplotlib.pyplot as plt
                import random
                from connections import SYN_CONNECTED_ACTIVE
                sampled_cells = set()
                for figure_num in xrange(40):
                    plt.figure("(%d)" % figure_num)
                    # Find an active cell to view.
                    cell = None
                    for attempt in range(100):
                        event = random.choice(self.tm.activeEvents)
                        cell = event.cell  # This is an integer.
                        if cell is not None and cell not in sampled_cells:
                            break
                    else:
                        break
                    sampled_cells.add(cell)
                    cell = self.tm.connections.dataForCell(cell)
                    # Organize the data.
                    EPSPs = []
                    excitement = []
                    distance_to_root = 0
                    segment_offsets = {}
                    branch = cell._roots[0]
                    while True:
                        segment_offsets[branch] = distance_to_root
                        distance_to_root += len(branch._synapses)
                        excitement.extend(branch.excitement)
                        for syn in branch._synapses:
                            if syn is None:
                                EPSPs.append(0)
                            else:
                                EPSPs.append(syn.state == SYN_CONNECTED_ACTIVE)
                        if branch.children:
                            branch = random.choice(branch.children)
                        else:
                            break
                    plt.plot(
                        np.arange(distance_to_root),
                        EPSPs,
                        'r',
                        np.arange(distance_to_root),
                        excitement,
                        'b',
                    )
                    plt.title(
                        "Dendrite Activation\n Horizontal line is activation threshold, Vertical lines are segment bifurcations"
                    )
                    plt.xlabel("Distance along Dendrite", )
                    plt.ylabel("EPSPs are Red, Excitement is Blue")
                    # Show lines where the excitement crosses thresholds.
                    plt.axhline(20, color='k')  # Hard coded parameter value.
                    for offset in segment_offsets.values():
                        if offset != 0:
                            plt.axvline(offset, color='k')
                print("\nShowing %d excitement plots." % len(sampled_cells))
                plt.show()

        self.age += 1

        return (finalScore, rawScore)
Example #41
0
    def testWeekend(self):
        '''Test weekend encoder'''
        e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]))
        mon = DateEncoder(customDays=(21, "Monday"))

        e2 = DateEncoder(weekend=(21, 1))
        d = datetime.datetime(1988, 5, 29, 20, 00)
        self.assertTrue((e.encode(d) == e2.encode(d)).all())
        for _ in range(300):
            d = d + datetime.timedelta(days=1)
            self.assertTrue((e.encode(d) == e2.encode(d)).all())
            print mon.decode(mon.encode(d))
            #Make sure
            if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0:
                self.assertEqual(d.weekday(), 0)
            else:
                self.assertFalse(d.weekday() == 0)
print "3 =   ", rdse.encode(3)
print "4 =   ", rdse.encode(4)
print "5 =   ", rdse.encode(5)
print
print "100 = ", rdse.encode(100)
print "100000 =", rdse.encode(1000)


import datetime
from nupic.encoders.date import DateEncoder

DateEncoder?


de = DateEncoder(season=5)

now = datetime.datetime.strptime("2014-05-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "now =       ", de.encode(now)
nextMonth = datetime.datetime.strptime("2014-06-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "next month =", de.encode(nextMonth)
xmas = datetime.datetime.strptime("2014-12-25 13:08:58", "%Y-%m-%d %H:%M:%S")
print "xmas =      ", de.encode(xmas)


from nupic.encoders.category import CategoryEncoder

categories = ("cat", "dog", "monkey", "slow loris")
encoder = CategoryEncoder(w=3, categoryList=categories, forced=True)
cat = encoder.encode("cat")
dog = encoder.encode("dog")
Example #43
0
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth())

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(spParams["columnCount"]),
    # What percent of the columns"s receptive field is available for potential
    # synapses?
    potentialPct=spParams["potentialPct"],
    # This means that the input space has no topology.
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    # How quickly synapses grow and degrade.
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=spParams["boostStrength"],
    # Random number generator seed.
    seed=spParams["seed"],
    # TODO: is this useful?
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(tmParams["columnCount"],),
    # How many cells in each mini-column.
    cellsPerColumn=tmParams["cellsPerColumn"],
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    # TODO: This comes from the SP params, is this normal
    connectedPermanence=spParams["synPermConnected"],
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=tmParams["minThreshold"],
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
      results.append([oneStep, oneStepConfidence * 100, None, None])

    return results