Example #1
0
def smart_encode(data_fl):
    encoder_list = []
    for i in data_fl.columns:
        if data_fl[i].dtype == 'M8[ns]':
            time_delta = data_fl[i][1] - data_fl[i][0]
            if time_delta >= pd.Timedelta(1, unit='M'):
                encoder_list += [[DateEncoder(season=(5, 1))]]
            elif time_delta >= pd.Timedelta(1, unit='D'):
                encoder_list += [[
                    DateEncoder(season=(21)),
                    DateEncoder(dayOfWeek=(21, 1)),
                    DateEncoder(weekend=5)
                ]]
            else:
                encoder_list += [[
                    DateEncoder(season=(5, 1)),
                    DateEncoder(dayOfWeek=(5, 1)),
                    DateEncoder(weekend=5),
                    DateEncoder(timeOfDay=(5, 1))
                ]]
        if data_fl[i].dtype == "float":
            col_range = data_fl[i].max() - data_fl[i].min()
            res = col_range / (400 - 21)
            encoder_list += [[RandomDistributedScalarEncoder(res)]]
    return encoder_list
Example #2
0
    def setUp(self):
        # 3 bits for season, 1 bit for day of week, 1 for weekend, 5 for time of
        # day
        # use of forced is not recommended, used here for readability, see scalar.py
        self._e = DateEncoder(season=3, dayOfWeek=1, weekend=1, timeOfDay=5)
        # in the middle of fall, Thursday, not a weekend, afternoon - 4th Nov,
        # 2010, 14:55
        self._d = datetime.datetime(2010, 11, 4, 14, 55)
        self._bits = self._e.encode(self._d)
        # season is aaabbbcccddd (1 bit/month) # TODO should be <<3?
        # should be 000000000111 (centered on month 11 - Nov)
        seasonExpected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]

        # week is MTWTFSS
        # contrary to localtime documentation, Monday = 0 (for python
        #  datetime.datetime.timetuple()
        dayOfWeekExpected = [0, 0, 0, 1, 0, 0, 0]

        # not a weekend, so it should be "False"
        weekendExpected = [1, 0]

        # time of day has radius of 4 hours and w of 5 so each bit = 240/5
        # min = 48min 14:55 is minute 14*60 + 55 = 895; 895/48 = bit 18.6
        # should be 30 bits total (30 * 48 minutes = 24 hours)
        timeOfDayExpected = ([
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
            0, 0, 0, 0, 0, 0, 0, 0
        ])
        self._expected = numpy.array(seasonExpected + dayOfWeekExpected +
                                     weekendExpected + timeOfDayExpected,
                                     dtype=defaultDtype)
Example #3
0
    def initialize(self):
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = (self.inputMax + rangePadding
                  if self.inputMin != self.inputMax else self.inputMin + 1)
        numBuckets = 130.0
        resolution = max(0.001, (maxVal - minVal) / numBuckets)
        self.valueEncoder = RandomDistributedScalarEncoder(resolution,
                                                           w=41,
                                                           seed=42)
        self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                     dtype=np.uint32)

        self.timestampEncoder = DateEncoder(timeOfDay=(
            21,
            9.49,
        ))
        self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                         dtype=np.uint32)

        inputWidth = self.valueEncoder.getWidth()

        self.sp = SpatialPooler(
            **{
                "globalInhibition": True,
                "columnDimensions": [2048],
                "inputDimensions": [inputWidth],
                "potentialRadius": inputWidth,
                "numActiveColumnsPerInhArea": 40,
                "seed": 1956,
                "potentialPct": 0.8,
                "boostStrength": 0.0,
                "synPermActiveInc": 0.003,
                "synPermConnected": 0.2,
                "synPermInactiveDec": 0.0005,
            })
        self.spOutput = np.zeros(2048, dtype=np.float32)

        self.etm = ExtendedTemporalMemory(
            **{
                "activationThreshold": 13,
                "cellsPerColumn": 1,
                "columnDimensions": (2048, ),
                "basalInputDimensions": (self.timestampEncoder.getWidth(), ),
                "initialPermanence": 0.21,
                "maxSegmentsPerCell": 128,
                "maxSynapsesPerSegment": 32,
                "minThreshold": 10,
                "maxNewSynapseCount": 20,
                "permanenceDecrement": 0.1,
                "permanenceIncrement": 0.1,
                "seed": 1960,
                "checkInputs": False,
            })

        learningPeriod = math.floor(self.probationaryPeriod / 2.0)
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            claLearningPeriod=learningPeriod,
            estimationSamples=self.probationaryPeriod - learningPeriod,
            reestimationPeriod=100)
Example #4
0
    def testWeekend(self):
        '''Test weekend encoder'''
        e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]))
        mon = DateEncoder(customDays=(21, "Monday"))

        e2 = DateEncoder(weekend=(21, 1))
        d = datetime.datetime(1988, 5, 29, 20, 00)
        self.assertTrue((e.encode(d) == e2.encode(d)).all())
        for _ in range(300):
            d = d + datetime.timedelta(days=1)
            self.assertTrue((e.encode(d) == e2.encode(d)).all())
            print mon.decode(mon.encode(d))
            #Make sure
            if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0:
                self.assertEqual(d.weekday(), 0)
            else:
                self.assertFalse(d.weekday() == 0)
Example #5
0
    def testWeekend(self):
        """Test weekend encoder"""
        # use of forced is not recommended, used here for readability, see scalar.py
        e = DateEncoder(customDays=(21, ["sat", "sun", "fri"]), forced=True)
        mon = DateEncoder(customDays=(21, "Monday"), forced=True)

        e2 = DateEncoder(weekend=(21, 1), forced=True)
        d = datetime.datetime(1988, 5, 29, 20, 00)
        self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))
        for _ in range(300):
            d = d + datetime.timedelta(days=1)
            self.assertTrue(numpy.array_equal(e.encode(d), e2.encode(d)))

            #Make sure
            if mon.decode(mon.encode(d))[0]["Monday"][0][0][0] == 1.0:
                self.assertEqual(d.weekday(), 0)
            else:
                self.assertNotEqual(d.weekday(), 0)
Example #6
0
    def testHoliday(self):
        '''look at holiday more carefully because of the smooth transition'''
        e = DateEncoder(holiday=5)
        holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype='uint8')
        notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype='uint8')
        holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype='uint8')

        d = datetime.datetime(2010, 12, 25, 4, 55)
        self.assertTrue((e.encode(d) == holiday).all())

        d = datetime.datetime(2008, 12, 27, 4, 55)
        self.assertTrue((e.encode(d) == notholiday).all())

        d = datetime.datetime(1999, 12, 26, 8, 00)
        self.assertTrue((e.encode(d) == holiday2).all())

        d = datetime.datetime(2011, 12, 24, 16, 00)
        self.assertTrue((e.encode(d) == holiday2).all())
  def testHolidayMultiple(self):
    """look at holiday more carefully because of the smooth transition"""
    # use of forced is not recommended, used here for readability, see
    # scalar.py
    e = DateEncoder(holiday=5, forced=True, holidays=[(12, 25), (2018, 4, 1), (2017, 4, 16)])
    holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8")
    notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8")

    d = datetime.datetime(2011, 12, 25, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))

    d = datetime.datetime(2007, 12, 2, 4, 55)
    self.assertTrue(numpy.array_equal(e.encode(d), notholiday))

    d = datetime.datetime(2018, 4, 1, 16, 10)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))

    d = datetime.datetime(2017, 4, 16, 16, 10)
    self.assertTrue(numpy.array_equal(e.encode(d), holiday))
Example #8
0
    def testHoliday(self):
        '''look at holiday more carefully because of the smooth transition'''
        # use of forced is not recommended, used here for readibility, see scalar.py
        e = DateEncoder(holiday=5, forced=True)
        holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype='uint8')
        notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype='uint8')
        holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype='uint8')

        d = datetime.datetime(2010, 12, 25, 4, 55)
        self.assertTrue((e.encode(d) == holiday).all())

        d = datetime.datetime(2008, 12, 27, 4, 55)
        self.assertTrue((e.encode(d) == notholiday).all())

        d = datetime.datetime(1999, 12, 26, 8, 00)
        self.assertTrue((e.encode(d) == holiday2).all())

        d = datetime.datetime(2011, 12, 24, 16, 00)
        self.assertTrue((e.encode(d) == holiday2).all())
Example #9
0
    def testHoliday(self):
        """look at holiday more carefully because of the smooth transition"""
        # use of forced is not recommended, used here for readability, see
        # scalar.py
        e = DateEncoder(holiday=5, forced=True)
        holiday = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype="uint8")
        notholiday = numpy.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype="uint8")
        holiday2 = numpy.array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype="uint8")

        d = datetime.datetime(2010, 12, 25, 4, 55)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday))

        d = datetime.datetime(2008, 12, 27, 4, 55)
        self.assertTrue(numpy.array_equal(e.encode(d), notholiday))

        d = datetime.datetime(1999, 12, 26, 8, 00)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday2))

        d = datetime.datetime(2011, 12, 24, 16, 00)
        self.assertTrue(numpy.array_equal(e.encode(d), holiday2))
    def initialize(self):
        # Keep track of value range for spatial anomaly detection.
        self.minVal = None
        self.maxVal = None

        # Time of day encoder
        self.timeOfDayEncoder = DateEncoder(timeOfDay=(21, 9.49),
                                            name='time_enc')
        # RDSE encoder for the time series value.
        minResolution = 0.001
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = self.inputMax + rangePadding
        numBuckets = 130
        resolution = max(minResolution, (maxVal - minVal) / numBuckets)
        self.value_enc = RandomDistributedScalarEncoder(resolution=resolution,
                                                        name='value_rdse')

        # Spatial Pooler.
        encodingWidth = self.timeOfDayEncoder.getWidth(
        ) + self.value_enc.getWidth()
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth, ),
            columnDimensions=(2048, ),
            potentialPct=0.8,
            potentialRadius=encodingWidth,
            globalInhibition=1,
            numActiveColumnsPerInhArea=40,
            synPermInactiveDec=0.0005,
            synPermActiveInc=0.003,
            synPermConnected=0.2,
            boostStrength=0.0,
            seed=1956,
            wrapAround=True,
        )

        self.tm = TemporalMemory(
            columnDimensions=(2048, ),
            cellsPerColumn=32,
            activationThreshold=20,
            initialPermanence=.5,  # Increased to connectedPermanence.
            connectedPermanence=.5,
            minThreshold=13,
            maxNewSynapseCount=31,
            permanenceIncrement=0.04,
            permanenceDecrement=0.008,
            predictedSegmentDecrement=0.001,
            maxSegmentsPerCell=128,
            maxSynapsesPerSegment=
            128,  # Changed meaning. Also see connections.topology[2]
            seed=1993,
        )

        # Initialize the anomaly likelihood object
        numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
        self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
            learningPeriod=numentaLearningPeriod,
            estimationSamples=self.probationaryPeriod - numentaLearningPeriod,
            reestimationPeriod=100,
        )

        self.age = 0
Example #11
0
var_chosen = 'value'
Data = ma_preprocess(Data[var_chosen], 4).rename(columns={0: var_chosen})

Data['Anomaly'] = 0.0
Data['Anomaly_Likelihood'] = 0.0

prec_param = 5
pooler_out = 2024
cell_col = 5

# Value Encoder Resoltion
Res = Data.std()[0] / prec_param
RDSE = RandomDistributedScalarEncoder(resolution=Res)
# We ecndoe now the datas
TODE = DateEncoder(timeOfDay=(21, 1))
WENDE = DateEncoder(weekend=21)

# Spatial Pooler Parameters

var_encoders = {Encoder('value', ['RDSE'])}
# Encoder('_index', ['TODE'])}

encoder_width = 0
for x in var_encoders:
    for y in x.encoders:
        exec("s = " + y + ".getWidth()")
        encoder_width += s

SP = SpatialPooler(
    inputDimensions=encoder_width,
Example #12
0
from nupic.algorithms.sdr_classifier_factory import SDRClassifierFactory
from nupic.algorithms.spatial_pooler import SpatialPooler
from nupic.algorithms.temporal_memory import TemporalMemory
from nupic.encoders.adaptive_scalar import AdaptiveScalarEncoder
from nupic.encoders.date import DateEncoder
from nupic.encoders.random_distributed_scalar \
     import RandomDistributedScalarEncoder
from nupic.encoders.scalar import ScalarEncoder
from simhash_distributed_scalar import SimHashDistributedScalarEncoder
from stats import mae, mape, nll, rmse

# setup

COL_WIDTH = 2048

timeOfDayEncoder = DateEncoder(timeOfDay=(21, 1))
weekendEncoder = DateEncoder(weekend=21)
#consumeEncoder = RandomDistributedScalarEncoder(
#  n=400,
#  w=21,
#  resolution=0.4)   # best, 0.88 original
#consumeEncoder = ScalarEncoder(
#  n=400,
#  w=21,
#  minval=0,
#  maxval=100)
#consumeEncoder = AdaptiveScalarEncoder(
#  n=400,
#  w=21)
consumeEncoder = SimHashDistributedScalarEncoder(n=400, w=21, resolution=0.25)
encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() +
Example #13
0
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])
  scalarEncoder2 = RandomDistributedScalarEncoder(
    enParams["consumption2"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth()
                   + scalarEncoder2.getWidth())

  sp = SpatialPooler(
    inputDimensions=(encodingWidth,),
    columnDimensions=(spParams["columnCount"],),
    potentialPct=spParams["potentialPct"],
    potentialRadius=encodingWidth,
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    boostStrength=spParams["boostStrength"],
    seed=spParams["seed"],
    wrapAround=True
  )

  tm = TemporalMemory(
    columnDimensions=(tmParams["columnCount"],),
    cellsPerColumn=tmParams["cellsPerColumn"],
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    connectedPermanence=spParams["synPermConnected"],
    minThreshold=tmParams["minThreshold"],
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()
    
    output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME)
    
    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      prediction = float(record[1])
      prediction2 = float(record[2])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())
      consumptionBits2 = numpy.zeros(scalarEncoder2.getWidth())

      # Now we call the encoders to create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(prediction, consumptionBits)
      scalarEncoder2.encodeIntoArray(prediction2, consumptionBits2)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits, consumptionBits2]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(prediction)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": prediction
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
#      results.append([oneStep, oneStepConfidence * 100, None, None])
      results.append([record[0], prediction, oneStep, oneStepConfidence * 100])
      output.write(record[0], prediction, oneStep, oneStepConfidence * 100)
    
    output.close()
    return results
Example #14
0
def HTM_AD(
        Data='Test',
        vars={'value': ['num']},
        prec_param=5,
        pooler_out=2024,  # Number of columns of the pooler output
        cell_col=5,  # HTM cells per column
        W=72,  # Window parameter
        W_prim=5,  # Local window for anomaly detection likelihood
        eps=1e-6,  # to Avoid by zero divisions
        athreshold=0.95):
    """
    This function performs HTM based anomaly detection on a time series provided
    :param Data:
    :param vars: Possible values: num, tod, weekend
    :param prec_param: A parameter that defines how much precision the number encoder has
        The encoder precision depends on the variability of the data,
        The real precision is computed taking into account both the precision parameter and data std
        A high precision might mean a high error at predicting the variable value in noisy variables
    :param pooler_out: Number of columns of the pooler output
    :param cell_col: HTM cells per column
    :param W: Window parameter
    :param W_prim: Local window for anomaly detection likelihood
    :param eps: to Avoid by zero divisions
    :param athreshold: To classify based on anomaly likelihood whether there is an anomaly or not
    :return: The Data + 3 columns
        Anomaly: indicates the error of within the value predicted by the HTM network
        Anomaly_likelihood: indicates the likelihood of the data into being anomalous
        Anomaly_flag: classifies the data in anomalous vs non anomalous
    """

    if Data == 'Test':  # If there is not data available, simply loads the temperature benchmark dataset
        # Import data
        Data = pd.read_csv('anomaly_API/Data/sample.csv',
                           parse_dates=True,
                           index_col='timestamp')
        Data = Data.resample('H').bfill().interpolate()

    TODE = DateEncoder(timeOfDay=(21, 1))
    WENDE = DateEncoder(weekend=21)

    var_encoders = set()
    # Spatial Pooler Parameters
    for x in vars:
        for y in vars[x]:
            if y == 'num':
                exec(
                    "RDSE_" + x +
                    " = RandomDistributedScalarEncoder(resolution=Data['" + x +
                    "'].std()/prec_param)", locals(), globals())
                var_encoders.add(Encoder(x, ["RDSE_" + x]))
            elif y == 'weekend':
                var_encoders.add(Encoder(x, ["WENDE"]))
            elif y == 'tod':
                var_encoders.add(Encoder(x, ["TODE"]))
            else:
                return {"error": "Variable encoder type is not recognized "}

    encoder_width = 0  # Computes encoder width
    for x in var_encoders:
        for y in x.encoders:
            exec("s = " + y + ".getWidth()", locals(), globals())
            encoder_width += s

    SP = SpatialPooler(
        inputDimensions=encoder_width,
        columnDimensions=pooler_out,
        potentialPct=0.8,
        globalInhibition=True,
        numActiveColumnsPerInhArea=pooler_out //
        50,  # Gets 2% of the total area
        boostStrength=1.0,
        wrapAround=False)
    TM = TemporalMemory(columnDimensions=(pooler_out, ),
                        cellsPerColumn=cell_col)

    Data['Anomaly'] = 0.0
    Data['Anomaly_Likelihood'] = 0.0

    # Train Spatial Pooler
    print("Spatial pooler learning")

    start = time.time()

    active_columns = np.zeros(pooler_out)

    for x in range(len(Data)):
        encoder = multiencode(var_encoders, Data, x)
        SP.compute(encoder, True, active_columns)

    end = time.time()
    print(end - start)

    # Temporal pooler
    print("Temporal pooler learning")

    start = time.time()

    A_score = np.zeros(len(Data))
    for x in range(len(Data)):
        encoder = multiencode(var_encoders, Data, x)
        SP.compute(encoder, False, active_columns)
        col_index = active_columns.nonzero()[0]
        TM.compute(col_index, learn=True)
        if x > 0:
            inter = set(col_index).intersection(Prev_pred_col)
            inter_l = len(inter)
            active_l = len(col_index)
            A_score[x] = 1 - (inter_l / active_l)
            Data.iat[x, -2] = A_score[x]
        Prev_pred_col = list(
            set(x // cell_col for x in TM.getPredictiveCells()))

    end = time.time()
    print(end - start)

    AL_score = np.zeros(len(Data))
    # Computes the likelihood of the anomaly
    for x in range(len(Data)):
        if x > 0:
            W_vec = A_score[max(0, x - W):x]
            W_prim_vec = A_score[max(0, x - W_prim):x]
            AL_score[x] = 1 - 2 * norm.sf(
                abs(np.mean(W_vec) - np.mean(W_prim_vec)) /
                max(np.std(W_vec), eps))
            Data.iat[x, -1] = AL_score[x]

    Data['Anomaly_flag'] = athreshold < Data['Anomaly_Likelihood']

    return Data
def runHotgym(numRecords):
    with open(_PARAMS_PATH, "r") as f:
        modelParams = yaml.safe_load(f)["modelParams"]
        enParams = modelParams["sensorParams"]["encoders"]
        spParams = modelParams["spParams"]
        tmParams = modelParams["tmParams"]

    timeOfDayEncoder = DateEncoder(
        timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
    weekendEncoder = DateEncoder(
        weekend=enParams["timestamp_weekend"]["weekend"])
    CtEncoder = RandomDistributedScalarEncoder(enParams["Ct"]["resolution"])
    ZIP_10467Encoder = RandomDistributedScalarEncoder(
        enParams["ZIP_10467"]["resolution"])
    #  ZIP_10462Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10462"]["resolution"])
    #  ZIP_10475Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10475"]["resolution"])
    #  ZIP_10466Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10466"]["resolution"])
    #  ZIP_10469Encoder = RandomDistributedScalarEncoder(enParams["ZIP_10469"]["resolution"])
    #  DEPT_11Encoder = RandomDistributedScalarEncoder(enParams["DEPT_11"]["resolution"])
    #  DEPT_24Encoder = RandomDistributedScalarEncoder(enParams["DEPT_24"]["resolution"])
    #  DEPT_41Encoder = RandomDistributedScalarEncoder(enParams["DEPT_41"]["resolution"])
    #  DEPT_34Encoder = RandomDistributedScalarEncoder(enParams["DEPT_34"]["resolution"])
    #  DEPT_31Encoder = RandomDistributedScalarEncoder(enParams["DEPT_31"]["resolution"])
    #  DEPT_60Encoder = RandomDistributedScalarEncoder(enParams["DEPT_60"]["resolution"])
    #  AGE_0_9Encoder = RandomDistributedScalarEncoder(enParams["AGE_0_9"]["resolution"])
    #  AGE_10_19Encoder = RandomDistributedScalarEncoder(enParams["AGE_10_19"]["resolution"])
    #  AGE_20_29Encoder = RandomDistributedScalarEncoder(enParams["AGE_20_29"]["resolution"])
    #  AGE_30_39Encoder = RandomDistributedScalarEncoder(enParams["AGE_30_39"]["resolution"])
    #  AGE_40_49Encoder = RandomDistributedScalarEncoder(enParams["AGE_40_49"]["resolution"])
    #  AGE_50_59Encoder = RandomDistributedScalarEncoder(enParams["AGE_50_59"]["resolution"])
    #  AGE_60_69Encoder = RandomDistributedScalarEncoder(enParams["AGE_60_69"]["resolution"])
    #  AGE_70_79Encoder = RandomDistributedScalarEncoder(enParams["AGE_70_79"]["resolution"])
    #  AGE_80_89Encoder = RandomDistributedScalarEncoder(enParams["AGE_80_89"]["resolution"])
    #  AGE_90_99Encoder = RandomDistributedScalarEncoder(enParams["AGE_90_99"]["resolution"])
    #  DIST_1_7Encoder = RandomDistributedScalarEncoder(enParams["DIST_1_7"]["resolution"])
    #  DIST_8_14Encoder = RandomDistributedScalarEncoder(enParams["DIST_8_14"]["resolution"])
    #  DIST_15_21Encoder = RandomDistributedScalarEncoder(enParams["DIST_15_21"]["resolution"])
    #  DIST_22_28Encoder = RandomDistributedScalarEncoder(enParams["DIST_22_28"]["resolution"])
    #  DIST_29_35Encoder = RandomDistributedScalarEncoder(enParams["DIST_29_35"]["resolution"])
    #  DIST_36_42Encoder = RandomDistributedScalarEncoder(enParams["DIST_36_42"]["resolution"])
    #  DIST_43_49Encoder = RandomDistributedScalarEncoder(enParams["DIST_43_49"]["resolution"])
    #  DIST_50_56Encoder = RandomDistributedScalarEncoder(enParams["DIST_50_56"]["resolution"])
    #  DIST_57_63Encoder = RandomDistributedScalarEncoder(enParams["DIST_57_63"]["resolution"])
    #  DIST_64_70Encoder = RandomDistributedScalarEncoder(enParams["DIST_64_70"]["resolution"])

    encodingWidth = (timeOfDayEncoder.getWidth() + weekendEncoder.getWidth() +
                     CtEncoder.getWidth() * 2)

    sp = SpatialPooler(
        inputDimensions=(encodingWidth, ),
        columnDimensions=(spParams["columnCount"], ),
        potentialPct=spParams["potentialPct"],
        potentialRadius=encodingWidth,
        globalInhibition=spParams["globalInhibition"],
        localAreaDensity=spParams["localAreaDensity"],
        numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
        synPermInactiveDec=spParams["synPermInactiveDec"],
        synPermActiveInc=spParams["synPermActiveInc"],
        synPermConnected=spParams["synPermConnected"],
        boostStrength=spParams["boostStrength"],
        seed=spParams["seed"],
        wrapAround=True)

    tm = TemporalMemory(
        columnDimensions=(tmParams["columnCount"], ),
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=spParams["synPermConnected"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
        seed=tmParams["seed"])

    classifier = SDRClassifierFactory.create()
    results = []
    with open(_INPUT_FILE_PATH, "r") as fin:
        reader = csv.reader(fin)
        headers = reader.next()
        reader.next()
        reader.next()

        output = output_anomaly_generic_v1.NuPICFileOutput(_FILE_NAME)

        for count, record in enumerate(reader):

            if count >= numRecords: break

            # Convert data string into Python date object.
            dateString = datetime.datetime.strptime(record[0],
                                                    "%Y-%m-%d %H:%M:%S")
            # Convert data value string into float.
            Ct = float(record[1])
            ZIP_10467 = float(record[2])
            #      ZIP_10462 = float(record[3])
            #      ZIP_10475 = float(record[4])
            #      ZIP_10466 = float(record[5])
            #      ZIP_10469 = float(record[6])
            #      DEPT_11 = float(record[7])
            #      DEPT_24 = float(record[8])
            #      DEPT_41 = float(record[9])
            #      DEPT_34 = float(record[10])
            #      DEPT_31 = float(record[11])
            #      DEPT_60 = float(record[12])
            #      AGE_0_9 = float(record[13])
            #      AGE_10_19 = float(record[14])
            #      AGE_20_29 = float(record[15])
            #      AGE_30_39 = float(record[16])
            #      AGE_40_49 = float(record[17])
            #      AGE_50_59 = float(record[18])
            #      AGE_60_69 = float(record[19])
            #      AGE_70_79 = float(record[20])
            #      AGE_80_89 = float(record[21])
            #      AGE_90_99 = float(record[22])
            #      DIST_1_7 = float(record[23])
            #      DIST_8_14 = float(record[24])
            #      DIST_15_21 = float(record[25])
            #      DIST_22_28 = float(record[26])
            #      DIST_29_35 = float(record[27])
            #      DIST_36_42 = float(record[28])
            #      DIST_43_49 = float(record[29])
            #      DIST_50_56 = float(record[30])
            #      DIST_57_63 = float(record[31])
            #      DIST_64_70 = float(record[31])

            # To encode, we need to provide zero-filled numpy arrays for the encoders
            # to populate.
            timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
            weekendBits = numpy.zeros(weekendEncoder.getWidth())
            CtBits = numpy.zeros(CtEncoder.getWidth())
            ZIP_10467Bits = numpy.zeros(ZIP_10467Encoder.getWidth())
            #      ZIP_10462Bits = numpy.zeros(ZIP_10462Encoder.getWidth())
            #      ZIP_10475Bits = numpy.zeros(ZIP_10475Encoder.getWidth())
            #      ZIP_10466Bits = numpy.zeros(ZIP_10466Encoder.getWidth())
            #      ZIP_10469Bits = numpy.zeros(ZIP_10469Encoder.getWidth())
            #      DEPT_11Bits = numpy.zeros(DEPT_11Encoder.getWidth())
            #      DEPT_24Bits = numpy.zeros(DEPT_24Encoder.getWidth())
            #      DEPT_41Bits = numpy.zeros(DEPT_41Encoder.getWidth())
            #      DEPT_34Bits = numpy.zeros(DEPT_34Encoder.getWidth())
            #      DEPT_31Bits = numpy.zeros(DEPT_31Encoder.getWidth())
            #      DEPT_60Bits = numpy.zeros(DEPT_60Encoder.getWidth())
            #      AGE_0_9Bits = numpy.zeros(AGE_0_9Encoder.getWidth())
            #      AGE_10_19Bits = numpy.zeros(AGE_10_19Encoder.getWidth())
            #      AGE_20_29Bits = numpy.zeros(AGE_20_29Encoder.getWidth())
            #      AGE_30_39Bits = numpy.zeros(AGE_30_39Encoder.getWidth())
            #      AGE_40_49Bits = numpy.zeros(AGE_40_49Encoder.getWidth())
            #      AGE_50_59Bits = numpy.zeros(AGE_50_59Encoder.getWidth())
            #      AGE_60_69Bits = numpy.zeros(AGE_60_69Encoder.getWidth())
            #      AGE_70_79Bits = numpy.zeros(AGE_70_79Encoder.getWidth())
            #      AGE_80_89Bits = numpy.zeros(AGE_80_89Encoder.getWidth())
            #      AGE_90_99Bits = numpy.zeros(AGE_90_99Encoder.getWidth())
            #      DIST_1_7Bits = numpy.zeros(DIST_1_7Encoder.getWidth())
            #      DIST_8_14Bits = numpy.zeros(DIST_8_14Encoder.getWidth())
            #      DIST_15_21Bits = numpy.zeros(DIST_15_21Encoder.getWidth())
            #      DIST_22_28Bits = numpy.zeros(DIST_22_28Encoder.getWidth())
            #      DIST_29_35Bits = numpy.zeros(DIST_29_35Encoder.getWidth())
            #      DIST_36_42Bits = numpy.zeros(DIST_36_42Encoder.getWidth())
            #      DIST_43_49Bits = numpy.zeros(DIST_43_49Encoder.getWidth())
            #      DIST_50_56Bits = numpy.zeros(DIST_50_56Encoder.getWidth())
            #      DIST_57_63Bits = numpy.zeros(DIST_57_63Encoder.getWidth())
            #      DIST_64_70Bits = numpy.zeros(DIST_64_70Encoder.getWidth())

            # Now we call the encoders to create bit representations for each value.
            timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
            weekendEncoder.encodeIntoArray(dateString, weekendBits)
            CtEncoder.encodeIntoArray(Ct, CtBits)
            ZIP_10467Encoder.encodeIntoArray(ZIP_10467, ZIP_10467Bits)
            #      ZIP_10462Encoder.encodeIntoArray(ZIP_10462, ZIP_10462Bits)
            #      ZIP_10475Encoder.encodeIntoArray(ZIP_10475, ZIP_10475Bits)
            #      ZIP_10466Encoder.encodeIntoArray(ZIP_10466, ZIP_10466Bits)
            #      ZIP_10469Encoder.encodeIntoArray(ZIP_10469, ZIP_10469Bits)
            #      DEPT_11Encoder.encodeIntoArray(DEPT_11, DEPT_11Bits)
            #      DEPT_24Encoder.encodeIntoArray(DEPT_24, DEPT_24Bits)
            #      DEPT_41Encoder.encodeIntoArray(DEPT_41, DEPT_41Bits)
            #      DEPT_34Encoder.encodeIntoArray(DEPT_34, DEPT_34Bits)
            #      DEPT_31Encoder.encodeIntoArray(DEPT_31, DEPT_31Bits)
            #      DEPT_60Encoder.encodeIntoArray(DEPT_60, DEPT_60Bits)
            #      AGE_0_9Encoder.encodeIntoArray(AGE_0_9, AGE_0_9Bits)
            #      AGE_10_19Encoder.encodeIntoArray(AGE_10_19, AGE_10_19Bits)
            #      AGE_20_29Encoder.encodeIntoArray(AGE_20_29, AGE_20_29Bits)
            #      AGE_30_39Encoder.encodeIntoArray(AGE_30_39, AGE_30_39Bits)
            #      AGE_40_49Encoder.encodeIntoArray(AGE_40_49, AGE_40_49Bits)
            #      AGE_50_59Encoder.encodeIntoArray(AGE_50_59, AGE_50_59Bits)
            #      AGE_60_69Encoder.encodeIntoArray(AGE_60_69, AGE_60_69Bits)
            #      AGE_70_79Encoder.encodeIntoArray(AGE_70_79, AGE_70_79Bits)
            #      AGE_80_89Encoder.encodeIntoArray(AGE_80_89, AGE_80_89Bits)
            #      AGE_90_99Encoder.encodeIntoArray(AGE_90_99, AGE_90_99Bits)
            #      DIST_1_7Encoder.encodeIntoArray(DIST_1_7, DIST_1_7Bits)
            #      DIST_8_14Encoder.encodeIntoArray(DIST_8_14, DIST_8_14Bits)
            #      DIST_15_21Encoder.encodeIntoArray(DIST_15_21, DIST_15_21Bits)
            #      DIST_22_28Encoder.encodeIntoArray(DIST_22_28, DIST_22_28Bits)
            #      DIST_29_35Encoder.encodeIntoArray(DIST_29_35, DIST_29_35Bits)
            #      DIST_36_42Encoder.encodeIntoArray(DIST_36_42, DIST_36_42Bits)
            #      DIST_43_49Encoder.encodeIntoArray(DIST_43_49, DIST_43_49Bits)
            #      DIST_50_56Encoder.encodeIntoArray(DIST_50_56, DIST_50_56Bits)
            #      DIST_57_63Encoder.encodeIntoArray(DIST_57_63, DIST_57_63Bits)
            #      DIST_64_70Encoder.encodeIntoArray(DIST_64_70, DIST_64_70Bits)
            # Concatenate all these encodings into one large encoding for Spatial
            # Pooling.
            encoding = numpy.concatenate(
                [timeOfDayBits, weekendBits, CtBits, ZIP_10467Bits])
            #      encoding = numpy.concatenate(
            #        [timeOfDayBits, weekendBits, CtBits,
            #         ZIP_10467Bits, ZIP_10462Bits, ZIP_10475Bits, ZIP_10466Bits, ZIP_10469Bits,
            #         DEPT_11Bits, DEPT_24Bits, DEPT_41Bits, DEPT_34Bits, DEPT_31Bits,
            #         DEPT_60Bits, AGE_0_9Bits, AGE_10_19Bits, AGE_20_29Bits, AGE_30_39Bits,
            #         AGE_40_49Bits, AGE_50_59Bits, AGE_60_69Bits, AGE_70_79Bits, AGE_80_89Bits,
            #         AGE_90_99Bits, DIST_1_7Bits, DIST_8_14Bits, DIST_15_21Bits, DIST_22_28Bits,
            #         DIST_29_35Bits, DIST_36_42Bits, DIST_43_49Bits, DIST_50_56Bits, DIST_57_63Bits,
            #         DIST_64_70Bits])

            # Create an array to represent active columns, all initially zero. This
            # will be populated by the compute method below. It must have the same
            # dimensions as the Spatial Pooler.
            activeColumns = numpy.zeros(spParams["columnCount"])

            # Execute Spatial Pooling algorithm over input space.
            sp.compute(encoding, True, activeColumns)
            activeColumnIndices = numpy.nonzero(activeColumns)[0]

            # Execute Temporal Memory algorithm over active mini-columns.
            tm.compute(activeColumnIndices, learn=True)

            activeCells = tm.getActiveCells()

            # Get the bucket info for this input value for classification.
            bucketIdx = CtEncoder.getBucketIndices(Ct)[0]

            # Run classifier to translate active cells back to scalar value.
            classifierResult = classifier.compute(recordNum=count,
                                                  patternNZ=activeCells,
                                                  classification={
                                                      "bucketIdx": bucketIdx,
                                                      "actValue": Ct
                                                  },
                                                  learn=True,
                                                  infer=True)

            # Print the best prediction for 1 step out.
            oneStepConfidence, oneStep = sorted(zip(
                classifierResult[1], classifierResult["actualValues"]),
                                                reverse=True)[0]
            # print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
            #      results.append([oneStep, oneStepConfidence * 100, None, None])
            results.append([record[0], Ct, oneStep, oneStepConfidence * 100])
            output.write(record[0], Ct, oneStep, oneStepConfidence * 100)

        output.close()
        return results
Example #16
0
def runHotgym(numRecords):
  with open(_PARAMS_PATH, "r") as f:
    modelParams = yaml.safe_load(f)["modelParams"]
    enParams = modelParams["sensorParams"]["encoders"]
    spParams = modelParams["spParams"]
    tmParams = modelParams["tmParams"]

  timeOfDayEncoder = DateEncoder(
    timeOfDay=enParams["timestamp_timeOfDay"]["timeOfDay"])
  weekendEncoder = DateEncoder(
    weekend=enParams["timestamp_weekend"]["weekend"])
  scalarEncoder = RandomDistributedScalarEncoder(
    enParams["consumption"]["resolution"])

  encodingWidth = (timeOfDayEncoder.getWidth()
                   + weekendEncoder.getWidth()
                   + scalarEncoder.getWidth())

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(spParams["columnCount"]),
    # What percent of the columns"s receptive field is available for potential
    # synapses?
    potentialPct=spParams["potentialPct"],
    # This means that the input space has no topology.
    globalInhibition=spParams["globalInhibition"],
    localAreaDensity=spParams["localAreaDensity"],
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
    # How quickly synapses grow and degrade.
    synPermInactiveDec=spParams["synPermInactiveDec"],
    synPermActiveInc=spParams["synPermActiveInc"],
    synPermConnected=spParams["synPermConnected"],
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=spParams["boostStrength"],
    # Random number generator seed.
    seed=spParams["seed"],
    # TODO: is this useful?
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(tmParams["columnCount"],),
    # How many cells in each mini-column.
    cellsPerColumn=tmParams["cellsPerColumn"],
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=tmParams["activationThreshold"],
    initialPermanence=tmParams["initialPerm"],
    # TODO: This comes from the SP params, is this normal
    connectedPermanence=spParams["synPermConnected"],
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=tmParams["minThreshold"],
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=tmParams["newSynapseCount"],
    permanenceIncrement=tmParams["permanenceInc"],
    permanenceDecrement=tmParams["permanenceDec"],
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
    seed=tmParams["seed"]
  )

  classifier = SDRClassifierFactory.create()
  results = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):

      if count >= numRecords: break

      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(spParams["columnCount"])

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      oneStepConfidence, oneStep = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(oneStep, oneStepConfidence * 100))
      results.append([oneStep, oneStepConfidence * 100, None, None])

    return results
Example #17
0
def runHotgym():

  timeOfDayEncoder = DateEncoder(timeOfDay=(21,1))
  weekendEncoder = DateEncoder(weekend=21)
  scalarEncoder = RandomDistributedScalarEncoder(0.88)

  encodingWidth = timeOfDayEncoder.getWidth() \
    + weekendEncoder.getWidth() \
    + scalarEncoder.getWidth()

  sp = SpatialPooler(
    # How large the input encoding will be.
    inputDimensions=(encodingWidth),
    # How many mini-columns will be in the Spatial Pooler.
    columnDimensions=(2048),
    # What percent of the columns's receptive field is available for potential
    # synapses?
    potentialPct=0.85,
    # This means that the input space has no topology.
    globalInhibition=True,
    localAreaDensity=-1.0,
    # Roughly 2%, giving that there is only one inhibition area because we have
    # turned on globalInhibition (40 / 2048 = 0.0195)
    numActiveColumnsPerInhArea=40.0,
    # How quickly synapses grow and degrade.
    synPermInactiveDec=0.005,
    synPermActiveInc=0.04,
    synPermConnected=0.1,
    # boostStrength controls the strength of boosting. Boosting encourages
    # efficient usage of SP columns.
    boostStrength=3.0,
    # Random number generator seed.
    seed=1956,
    # Determines if inputs at the beginning and end of an input dimension should
    # be considered neighbors when mapping columns to inputs.
    wrapAround=False
  )

  tm = TemporalMemory(
    # Must be the same dimensions as the SP
    columnDimensions=(2048, ),
    # How many cells in each mini-column.
    cellsPerColumn=32,
    # A segment is active if it has >= activationThreshold connected synapses
    # that are active due to infActiveState
    activationThreshold=16,
    initialPermanence=0.21,
    connectedPermanence=0.5,
    # Minimum number of active synapses for a segment to be considered during
    # search for the best-matching segments.
    minThreshold=12,
    # The max number of synapses added to a segment during learning
    maxNewSynapseCount=20,
    permanenceIncrement=0.1,
    permanenceDecrement=0.1,
    predictedSegmentDecrement=0.0,
    maxSegmentsPerCell=128,
    maxSynapsesPerSegment=32,
    seed=1960
  )

  classifier = SDRClassifierFactory.create()

  with open (_INPUT_FILE_PATH) as fin:
    reader = csv.reader(fin)
    headers = reader.next()
    reader.next()
    reader.next()

    for count, record in enumerate(reader):
      # Convert data string into Python date object.
      dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
      # Convert data value string into float.
      consumption = float(record[1])

      # To encode, we need to provide zero-filled numpy arrays for the encoders
      # to populate.
      timeOfDayBits = numpy.zeros(timeOfDayEncoder.getWidth())
      weekendBits = numpy.zeros(weekendEncoder.getWidth())
      consumptionBits = numpy.zeros(scalarEncoder.getWidth())

      # Now we call the encoders create bit representations for each value.
      timeOfDayEncoder.encodeIntoArray(dateString, timeOfDayBits)
      weekendEncoder.encodeIntoArray(dateString, weekendBits)
      scalarEncoder.encodeIntoArray(consumption, consumptionBits)

      # Concatenate all these encodings into one large encoding for Spatial
      # Pooling.
      encoding = numpy.concatenate(
        [timeOfDayBits, weekendBits, consumptionBits]
      )

      # Create an array to represent active columns, all initially zero. This
      # will be populated by the compute method below. It must have the same
      # dimensions as the Spatial Pooler.
      activeColumns = numpy.zeros(2048)

      # Execute Spatial Pooling algorithm over input space.
      sp.compute(encoding, True, activeColumns)
      activeColumnIndices = numpy.nonzero(activeColumns)[0]

      # Execute Temporal Memory algorithm over active mini-columns.
      tm.compute(activeColumnIndices, learn=True)

      activeCells = tm.getActiveCells()

      # Get the bucket info for this input value for classification.
      bucketIdx = scalarEncoder.getBucketIndices(consumption)[0]

      # Run classifier to translate active cells back to scalar value.
      classifierResult = classifier.compute(
        recordNum=count,
        patternNZ=activeCells,
        classification={
          "bucketIdx": bucketIdx,
          "actValue": consumption
        },
        learn=True,
        infer=True
      )

      # Print the best prediction for 1 step out.
      probability, value = sorted(
        zip(classifierResult[1], classifierResult["actualValues"]),
        reverse=True
      )[0]
      print("1-step: {:16} ({:4.4}%)".format(value, probability * 100))
Example #18
0
def go():
    valueEncoder = RandomDistributedScalarEncoder(resolution=0.88, seed=42)
    timestampEncoder = DateEncoder(timeOfDay=(
        21,
        9.49,
    ))

    inputWidth = timestampEncoder.getWidth() + valueEncoder.getWidth()

    sp = SpatialPooler(
        **{
            "globalInhibition": True,
            "columnDimensions": [2048],
            "inputDimensions": [inputWidth],
            "potentialRadius": inputWidth,
            "numActiveColumnsPerInhArea": 40,
            "seed": 1956,
            "potentialPct": 0.8,
            "boostStrength": 0.0,
            "synPermActiveInc": 0.003,
            "synPermConnected": 0.2,
            "synPermInactiveDec": 0.0005,
        })

    tm = TemporalMemory(
        **{
            "activationThreshold": 20,
            "cellsPerColumn": 32,
            "columnDimensions": (2048, ),
            "initialPermanence": 0.24,
            "maxSegmentsPerCell": 128,
            "maxSynapsesPerSegment": 128,
            "minThreshold": 13,
            "maxNewSynapseCount": 31,
            "permanenceDecrement": 0.008,
            "permanenceIncrement": 0.04,
            "seed": 1961,
        })

    inputPath = os.path.join(os.path.dirname(__file__),
                             "data/rec-center-hourly.csv")
    inputFile = open(inputPath, "rb")
    csvReader = csv.reader(inputFile)
    csvReader.next()
    csvReader.next()
    csvReader.next()

    encodedValue = np.zeros(valueEncoder.getWidth(), dtype=np.uint32)
    encodedTimestamp = np.zeros(timestampEncoder.getWidth(), dtype=np.uint32)
    spOutput = np.zeros(2048, dtype=np.float32)

    sanityInstance = sanity.SPTMInstance(sp, tm)

    for timestampStr, consumptionStr in csvReader:

        sanityInstance.waitForUserContinue()

        timestamp = datetime.datetime.strptime(timestampStr, "%m/%d/%y %H:%M")
        consumption = float(consumptionStr)

        timestampEncoder.encodeIntoArray(timestamp, encodedTimestamp)
        valueEncoder.encodeIntoArray(consumption, encodedValue)

        sensoryInput = np.concatenate((
            encodedTimestamp,
            encodedValue,
        ))
        sp.compute(sensoryInput, True, spOutput)

        activeColumns = np.flatnonzero(spOutput)
        predictedCells = tm.getPredictiveCells()
        tm.compute(activeColumns)

        activeInputBits = np.flatnonzero(sensoryInput)
        displayText = {
            "timestamp": timestampStr,
            "consumption": consumptionStr
        }

        sanityInstance.appendTimestep(activeInputBits, activeColumns,
                                      predictedCells, displayText)
print "3 =   ", rdse.encode(3)
print "4 =   ", rdse.encode(4)
print "5 =   ", rdse.encode(5)
print
print "100 = ", rdse.encode(100)
print "100000 =", rdse.encode(1000)


import datetime
from nupic.encoders.date import DateEncoder

DateEncoder?


de = DateEncoder(season=5)

now = datetime.datetime.strptime("2014-05-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "now =       ", de.encode(now)
nextMonth = datetime.datetime.strptime("2014-06-02 13:08:58", "%Y-%m-%d %H:%M:%S")
print "next month =", de.encode(nextMonth)
xmas = datetime.datetime.strptime("2014-12-25 13:08:58", "%Y-%m-%d %H:%M:%S")
print "xmas =      ", de.encode(xmas)


from nupic.encoders.category import CategoryEncoder

categories = ("cat", "dog", "monkey", "slow loris")
encoder = CategoryEncoder(w=3, categoryList=categories, forced=True)
cat = encoder.encode("cat")
dog = encoder.encode("dog")
Example #20
0
    def initialize(self):

        # Initialize the RDSE with a resolution; calculated from the data min and
        # max, the resolution is specific to the data stream.
        rangePadding = abs(self.inputMax - self.inputMin) * 0.2
        minVal = self.inputMin - rangePadding
        maxVal = (self.inputMax + rangePadding
                  if self.inputMin != self.inputMax else self.inputMin + 1)
        numBuckets = 130.0
        resolution = max(0.001, (maxVal - minVal) / numBuckets)
        self.valueEncoder = RandomDistributedScalarEncoder(resolution, seed=42)
        self.encodedValue = np.zeros(self.valueEncoder.getWidth(),
                                     dtype=np.uint32)

        # Initialize the timestamp encoder
        self.timestampEncoder = DateEncoder(timeOfDay=(
            21,
            9.49,
        ))
        self.encodedTimestamp = np.zeros(self.timestampEncoder.getWidth(),
                                         dtype=np.uint32)

        inputWidth = (self.timestampEncoder.getWidth() +
                      self.valueEncoder.getWidth())

        self.sp = SpatialPooler(
            **{
                "globalInhibition": True,
                "columnDimensions": [2048],
                "inputDimensions": [inputWidth],
                "potentialRadius": inputWidth,
                "numActiveColumnsPerInhArea": 40,
                "seed": 1956,
                "potentialPct": 0.8,
                "maxBoost": 1.0,
                "synPermActiveInc": 0.003,
                "synPermConnected": 0.2,
                "synPermInactiveDec": 0.0005,
            })
        self.spOutput = np.zeros(2048, dtype=np.float32)

        self.tm = TemporalMemory(
            **{
                "activationThreshold": 20,
                "cellsPerColumn": 32,
                "columnDimensions": (2048, ),
                "initialPermanence": 0.24,
                "maxSegmentsPerCell": 128,
                "maxSynapsesPerSegment": 128,
                "minThreshold": 13,
                "maxNewSynapseCount": 31,
                "permanenceDecrement": 0.008,
                "permanenceIncrement": 0.04,
                "seed": 1960,
            })

        if self.useLikelihood:
            learningPeriod = math.floor(self.probationaryPeriod / 2.0)
            self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood(
                claLearningPeriod=learningPeriod,
                estimationSamples=self.probationaryPeriod - learningPeriod,
                reestimationPeriod=100)