Ejemplo n.º 1
0
  def testComputeComplex(self):
    c   = Predictor([1], 1.0)
    inp = SDR(100)

    inp.sparse = [1, 5, 9]
    c.learn(recordNum=0, pattern=inp,
              classification=4,)

    inp.sparse = [0, 6, 9, 11]
    c.learn(recordNum=1, pattern=inp,
              classification=5,)

    inp.sparse = [6, 9]
    c.learn(recordNum=2, pattern=inp,
              classification=5,)

    inp.sparse = [1, 5, 9]
    c.learn(recordNum=3, pattern=inp,
              classification=4,)

    inp.sparse = [1, 5, 9]
    result = c.infer(pattern=inp)

    self.assertSetEqual(set(result.keys()), set([1]))
    self.assertEqual(len(result[1]), 6)
    self.assertAlmostEqual(result[1][0], 0.034234, places=5)
    self.assertAlmostEqual(result[1][1], 0.034234, places=5)
    self.assertAlmostEqual(result[1][2], 0.034234, places=5)
    self.assertAlmostEqual(result[1][3], 0.034234, places=5)
    self.assertAlmostEqual(result[1][4], 0.093058, places=5)
    self.assertAlmostEqual(result[1][5], 0.770004, places=5)
Ejemplo n.º 2
0
    def testComputeInferOrLearnOnly(self):
        c = Predictor([1], 1.0)
        inp = SDR(10)
        inp.randomize(.3)

        # learn only
        c.infer(recordNum=0,
                pattern=inp)  # Don't crash with not enough training data.
        c.learn(recordNum=0, pattern=inp, classification=4)
        c.infer(recordNum=1,
                pattern=inp)  # Don't crash with not enough training data.
        c.learn(recordNum=2, pattern=inp, classification=4)
        c.learn(recordNum=3, pattern=inp, classification=4)

        # infer only
        retval1 = c.infer(recordNum=5, pattern=inp)
        retval2 = c.infer(recordNum=6, pattern=inp)
        self.assertSequenceEqual(list(retval1[1]), list(retval2[1]))
Ejemplo n.º 3
0
  def testComputeInferOrLearnOnly(self):
    c = Predictor([1], 1.0)
    inp = SDR(10)
    inp.randomize( .3 )

    # learn only
    prediction = c.infer(pattern=inp)[1]
    self.assertTrue(prediction == []) # not enough training data -> []
    c.learn(recordNum=0, pattern=inp, classification=4)
    self.assertTrue(c.infer(pattern=inp)[1] == []) # not enough training data.
    c.learn(recordNum=2, pattern=inp, classification=4)
    c.learn(recordNum=3, pattern=inp, classification=4)
    self.assertTrue(c.infer(pattern=inp)[1] != []) # Don't crash with enough training data.

    # infer only
    retval1 = c.infer(pattern=inp)
    retval2 = c.infer(pattern=inp)
    self.assertSequenceEqual(list(retval1[1]), list(retval2[1]))
Ejemplo n.º 4
0
    def testExampleUsage(self):
        # Make a random SDR and associate it with a category.
        inputData = SDR(1000).randomize(0.02)
        categories = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
        clsr = Classifier()
        clsr.learn(inputData, categories['B'])
        assert (numpy.argmax(clsr.infer(inputData)) == categories['B'])

        # Estimate a scalar value.  The Classifier only accepts categories, so
        # put real valued inputs into bins (AKA buckets) by subtracting the
        # minimum value and dividing by a resolution.
        scalar = 567.8
        minimum = 500
        resolution = 10
        clsr.learn(inputData, int((scalar - minimum) / resolution))
        assert (numpy.argmax(clsr.infer(inputData)) * resolution +
                minimum == 560)

        # Predict 1 and 2 time steps into the future.

        # Make a sequence of 4 random SDRs, each SDR has 1000 bits and 2% sparsity.
        sequence = [SDR(1000).randomize(0.02) for i in range(4)]

        # Make category labels for the sequence.
        labels = [4, 5, 6, 7]

        # Make a Predictor and train it.
        pred = Predictor([1, 2])
        pred.learn(0, sequence[0], labels[0])
        pred.learn(1, sequence[1], labels[1])
        pred.learn(2, sequence[2], labels[2])
        pred.learn(3, sequence[3], labels[3])

        # Give the predictor partial information, and make predictions
        # about the future.
        pred.reset()
        A = pred.infer(0, sequence[0])
        assert (numpy.argmax(A[1]) == labels[1])
        assert (numpy.argmax(A[2]) == labels[2])

        B = pred.infer(1, sequence[1])
        assert (numpy.argmax(B[1]) == labels[2])
        assert (numpy.argmax(B[2]) == labels[3])
Ejemplo n.º 5
0
    def testMultistepSingleValue(self):
        classifier = Predictor(steps=[1, 2])
        inp = SDR(10)
        inp.randomize(.2)

        for recordNum in range(10):
            classifier.learn(recordNum, inp, 0)

        retval = classifier.infer(10, inp)

        # Should have a probability of 100% for that bucket.
        self.assertEqual(retval[1], [1.])
        self.assertEqual(retval[2], [1.])
Ejemplo n.º 6
0
    def testSingleValue0Steps(self):
        """Send same value 10 times and expect high likelihood for prediction
    using 0-step ahead prediction"""
        pred = Predictor(steps=[0], alpha=0.5)

        # Enough times to perform Inference and learn associations
        inp = SDR(10)
        inp.randomize(.2)
        for recordNum in range(10):
            pred.learn(recordNum, inp, 2)

        retval = pred.infer(10, inp)
        self.assertGreater(retval[0][2], 0.9)
Ejemplo n.º 7
0
    def testMultistepSimple(self):
        classifier = Predictor(steps=[1, 2], alpha=10.0)
        inp = SDR(10)

        for i in range(100):
            inp.sparse = [i % 10]
            classifier.learn(recordNum=i, pattern=inp, classification=(i % 10))

        retval = classifier.infer(99, inp)

        self.assertGreater(retval[1][0], 0.99)
        for i in range(1, 10):
            self.assertLess(retval[1][i], 0.01)
        self.assertGreater(retval[2][1], 0.99)
        for i in [0] + list(range(2, 10)):
            self.assertLess(retval[2][i], 0.01)
Ejemplo n.º 8
0
    def testMultiStepPredictions(self):
        """ Test multi-step predictions
    We train the 0-step and the 1-step classifiers simultaneously on
    data stream
    (SDR1, bucketIdx0)
    (SDR2, bucketIdx1)
    (SDR1, bucketIdx0)
    (SDR2, bucketIdx1)
    ...

    We intend the 0-step classifier to learn the associations:
      SDR1    => bucketIdx 0
      SDR2    => bucketIdx 1

    and the 1-step classifier to learn the associations
      SDR1    => bucketIdx 1
      SDR2    => bucketIdx 0
    """

        c = Predictor([0, 1], 1.0)

        SDR1 = SDR(10)
        SDR1.sparse = [1, 3, 5]
        SDR2 = SDR(10)
        SDR2.sparse = [2, 4, 6]
        recordNum = 0
        for _ in range(100):
            c.learn(recordNum, pattern=SDR1, classification=0)
            recordNum += 1

            c.learn(recordNum, pattern=SDR2, classification=1)
            recordNum += 1

        result1 = c.infer(recordNum, SDR1)
        result2 = c.infer(recordNum, SDR2)

        self.assertAlmostEqual(result1[0][0], 1.0, places=1)
        self.assertAlmostEqual(result1[0][1], 0.0, places=1)
        self.assertAlmostEqual(result2[0][0], 0.0, places=1)
        self.assertAlmostEqual(result2[0][1], 1.0, places=1)
Ejemplo n.º 9
0
  def testSerialization2(self):
    # This test verifies that pickle works for pickle of a Predictor
    SDR1 = SDR(15);  SDR1.sparse = [1, 5, 9]
    SDR2 = SDR(15);  SDR2.sparse = [0, 6, 9, 11]
    SDR3 = SDR(15);  SDR3.sparse = [6, 9]
    SDR4 = SDR(15);  SDR4.sparse = [1, 5, 9]
    
    c1 = Predictor( steps=[1], alpha=1.0 )
    c1.learn(1, pattern=SDR1, classification=4)
    c1.learn(2, pattern=SDR2, classification=5)
    c1.learn(3, pattern=SDR3, classification=5)
    c1.learn(4, pattern=SDR4, classification=4)
    c1.learn(5, pattern=SDR4, classification=4)
    
    serialized = pickle.dumps(c1)
    c2 = pickle.loads(serialized)

    result1 = c1.infer(SDR1)
    result2 = c2.infer(SDR1)
    #print("  testSerialization2 result: %.6f, %.6f, %.6f, %.6f, %.6f, %.6f "%( result1[1][0], result1[1][1], result1[1][2], result1[1][3], result1[1][4], result1[1][5]));
    self.assertEqual(len(result1[1]), 6)
    self.assertEqual(len(result1[1]), len(result2[1]))
    for i in range(len(result1[1])):
      self.assertAlmostEqual(result1[1][i], result2[1][i], places=5)
Ejemplo n.º 10
0
    def testMissingRecords(self):
        """ Test missing record support.

    Here, we intend the classifier to learn the associations:
      [1,3,5] => bucketIdx 1
      [2,4,6] => bucketIdx 2
      [7,8,9] => don"t care

    If it doesn't pay attention to the recordNums in this test, it will learn the
    wrong associations.
    """

        c = Predictor(steps=[1], alpha=1.0)
        recordNum = 0
        inp = SDR(10)

        inp.sparse = [1, 3, 5]
        c.learn(recordNum=recordNum, pattern=inp, classification=0)
        recordNum += 1

        inp.sparse = [2, 4, 6]
        c.learn(recordNum=recordNum, pattern=inp, classification=1)
        recordNum += 1

        inp.sparse = [1, 3, 5]
        c.learn(recordNum=recordNum, pattern=inp, classification=2)
        recordNum += 1

        inp.sparse = [2, 4, 6]
        c.learn(recordNum=recordNum, pattern=inp, classification=1)
        recordNum += 1

        # -----------------------------------------------------------------------
        # At this point, we should have learned [1,3,5] => bucket 1
        #                                       [2,4,6] => bucket 2
        inp.sparse = [1, 3, 5]
        result = c.infer(recordNum=recordNum, pattern=inp)
        c.learn(recordNum=recordNum, pattern=inp, classification=2)
        recordNum += 1
        self.assertLess(result[1][0], 0.1)
        self.assertGreater(result[1][1], 0.9)
        self.assertLess(result[1][2], 0.1)

        inp.sparse = [2, 4, 6]
        result = c.infer(recordNum=recordNum, pattern=inp)
        c.learn(recordNum=recordNum, pattern=inp, classification=1)
        recordNum += 1
        self.assertLess(result[1][0], 0.1)
        self.assertLess(result[1][1], 0.1)
        self.assertGreater(result[1][2], 0.9)

        # -----------------------------------------------------------------------
        # Feed in records that skip and make sure they don"t mess up what we
        #  learned
        # If we skip a record, the CLA should NOT learn that [2,4,6] from
        #  the previous learn associates with bucket 0
        recordNum += 1
        inp.sparse = [1, 3, 5]
        result = c.infer(recordNum=recordNum, pattern=inp)
        c.learn(recordNum=recordNum, pattern=inp, classification=0)
        recordNum += 1
        self.assertLess(result[1][0], 0.1)
        self.assertGreater(result[1][1], 0.9)
        self.assertLess(result[1][2], 0.1)

        # If we skip a record, the CLA should NOT learn that [1,3,5] from
        #  the previous learn associates with bucket 0
        recordNum += 1
        inp.sparse = [2, 4, 6]
        result = c.infer(recordNum=recordNum, pattern=inp)
        c.learn(recordNum=recordNum, pattern=inp, classification=0)
        recordNum += 1
        self.assertLess(result[1][0], 0.1)
        self.assertLess(result[1][1], 0.1)
        self.assertGreater(result[1][2], 0.9)

        # If we skip a record, the CLA should NOT learn that [2,4,6] from
        #  the previous learn associates with bucket 0
        recordNum += 1
        inp.sparse = [1, 3, 5]
        result = c.infer(recordNum=recordNum, pattern=inp)
        c.learn(recordNum=recordNum, pattern=inp, classification=0)
        recordNum += 1
        self.assertLess(result[1][0], 0.1)
        self.assertGreater(result[1][1], 0.9)
        self.assertLess(result[1][2], 0.1)
Ejemplo n.º 11
0
def main(parameters=default_parameters, argv=None, verbose=True):
  if verbose:
    import pprint
    print("Parameters:")
    pprint.pprint(parameters, indent=4)
    print("")

  # Read the input file.
  records = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = next(reader)
    next(reader)
    next(reader)
    for record in reader:
      records.append(record)

  # Make the Encoders.  These will convert input data into binary representations.
  dateEncoder = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], 
                            weekend  = parameters["enc"]["time"]["weekend"]) 
  
  scalarEncoderParams            = RDSE_Parameters()
  scalarEncoderParams.size       = parameters["enc"]["value"]["size"]
  scalarEncoderParams.sparsity   = parameters["enc"]["value"]["sparsity"]
  scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"]
  scalarEncoder = RDSE( scalarEncoderParams )
  encodingWidth = (dateEncoder.size + scalarEncoder.size)
  enc_info = Metrics( [encodingWidth], 999999999 )

  # Make the HTM.  SpatialPooler & TemporalMemory & associated tools.
  spParams = parameters["sp"]
  sp = SpatialPooler(
    inputDimensions            = (encodingWidth,),
    columnDimensions           = (spParams["columnCount"],),
    potentialPct               = spParams["potentialPct"],
    potentialRadius            = encodingWidth,
    globalInhibition           = True,
    localAreaDensity           = spParams["localAreaDensity"],
    synPermInactiveDec         = spParams["synPermInactiveDec"],
    synPermActiveInc           = spParams["synPermActiveInc"],
    synPermConnected           = spParams["synPermConnected"],
    boostStrength              = spParams["boostStrength"],
    wrapAround                 = True
  )
  sp_info = Metrics( sp.getColumnDimensions(), 999999999 )

  tmParams = parameters["tm"]
  tm = TemporalMemory(
    columnDimensions          = (spParams["columnCount"],),
    cellsPerColumn            = tmParams["cellsPerColumn"],
    activationThreshold       = tmParams["activationThreshold"],
    initialPermanence         = tmParams["initialPerm"],
    connectedPermanence       = spParams["synPermConnected"],
    minThreshold              = tmParams["minThreshold"],
    maxNewSynapseCount        = tmParams["newSynapseCount"],
    permanenceIncrement       = tmParams["permanenceInc"],
    permanenceDecrement       = tmParams["permanenceDec"],
    predictedSegmentDecrement = 0.0,
    maxSegmentsPerCell        = tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment     = tmParams["maxSynapsesPerSegment"]
  )
  tm_info = Metrics( [tm.numberOfCells()], 999999999 )

  # setup likelihood, these settings are used in NAB
  anParams = parameters["anomaly"]["likelihood"]
  probationaryPeriod = int(math.floor(float(anParams["probationaryPct"])*len(records)))
  learningPeriod     = int(math.floor(probationaryPeriod / 2.0))
  anomaly_history = AnomalyLikelihood(learningPeriod= learningPeriod,
                                      estimationSamples= probationaryPeriod - learningPeriod,
                                      reestimationPeriod= anParams["reestimationPeriod"])

  predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] )
  predictor_resolution = 1

  # Iterate through every datum in the dataset, record the inputs & outputs.
  inputs      = []
  anomaly     = []
  anomalyProb = []
  predictions = {1: [], 5: []}
  for count, record in enumerate(records):

    # Convert date string into Python date object.
    dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
    # Convert data value string into float.
    consumption = float(record[1])
    inputs.append( consumption )

    # Call the encoders to create bit representations for each value.  These are SDR objects.
    dateBits        = dateEncoder.encode(dateString)
    consumptionBits = scalarEncoder.encode(consumption)

    # Concatenate all these encodings into one large encoding for Spatial Pooling.
    encoding = SDR( encodingWidth ).concatenate([consumptionBits, dateBits])
    enc_info.addData( encoding )

    # Create an SDR to represent active columns, This will be populated by the
    # compute method below. It must have the same dimensions as the Spatial Pooler.
    activeColumns = SDR( sp.getColumnDimensions() )

    # Execute Spatial Pooling algorithm over input space.
    sp.compute(encoding, True, activeColumns)
    sp_info.addData( activeColumns )

    # Execute Temporal Memory algorithm over active mini-columns.
    tm.compute(activeColumns, learn=True)
    tm_info.addData( tm.getActiveCells().flatten() )

    # Predict what will happen, and then train the predictor based on what just happened.
    pdf = predictor.infer( count, tm.getActiveCells() )
    for n in (1, 5):
      if pdf[n]:
        predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution )
      else:
        predictions[n].append(float('nan'))
    predictor.learn( count, tm.getActiveCells(), int(consumption / predictor_resolution))

    anomalyLikelihood = anomaly_history.anomalyProbability( consumption, tm.anomaly )
    anomaly.append( tm.anomaly )
    anomalyProb.append( anomalyLikelihood )

  # Print information & statistics about the state of the HTM.
  print("Encoded Input", enc_info)
  print("")
  print("Spatial Pooler Mini-Columns", sp_info)
  print(str(sp))
  print("")
  print("Temporal Memory Cells", tm_info)
  print(str(tm))
  print("")

  # Shift the predictions so that they are aligned with the input they predict.
  for n_steps, pred_list in predictions.items():
    for x in range(n_steps):
        pred_list.insert(0, float('nan'))
        pred_list.pop()

  # Calculate the predictive accuracy, Root-Mean-Squared
  accuracy         = {1: 0, 5: 0}
  accuracy_samples = {1: 0, 5: 0}
  for idx, inp in enumerate(inputs):
    for n in predictions: # For each [N]umber of time steps ahead which was predicted.
      val = predictions[n][ idx ]
      if not math.isnan(val):
        accuracy[n] += (inp - val) ** 2
        accuracy_samples[n] += 1
  for n in sorted(predictions):
    accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5
    print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n])

  # Show info about the anomaly (mean & std)
  print("Anomaly Mean", np.mean(anomaly))
  print("Anomaly Std ", np.std(anomaly))

  # Plot the Predictions and Anomalies.
  if verbose:
    try:
      import matplotlib.pyplot as plt
    except:
      print("WARNING: failed to import matplotlib, plots cannot be shown.")
      return -accuracy[5]

    plt.subplot(2,1,1)
    plt.title("Predictions")
    plt.xlabel("Time")
    plt.ylabel("Power Consumption")
    plt.plot(np.arange(len(inputs)), inputs, 'red',
             np.arange(len(inputs)), predictions[1], 'blue',
             np.arange(len(inputs)), predictions[5], 'green',)
    plt.legend(labels=('Input', '1 Step Prediction, Shifted 1 step', '5 Step Prediction, Shifted 5 steps'))

    plt.subplot(2,1,2)
    plt.title("Anomaly Score")
    plt.xlabel("Time")
    plt.ylabel("Power Consumption")
    inputs = np.array(inputs) / max(inputs)
    plt.plot(np.arange(len(inputs)), inputs, 'red',
             np.arange(len(inputs)), anomaly, 'blue',)
    plt.legend(labels=('Input', 'Anomaly Score'))
    plt.show()

  return -accuracy[5]
                        stdout=subprocess.PIPE)
    temp = float(cp.stdout[5:-3])
    inputs.append(temp)

    dateBits = dateEncoder.encode(dateObject)
    tempBits = scalarEncoder.encode(temp)

    encoding = SDR(encodingWidth).concatenate([tempBits, dateBits])

    activeColumns = SDR(sp.getColumnDimensions())

    sp.compute(encoding, True, activeColumns)

    tm.compute(activeColumns, learn=True)

    pdf = predictor.infer(tm.getActiveCells())
    for n in (1, 5):
        if pdf[n]:
            predictions[n].append(np.argmax(pdf[n]) * predictor_resolution)
        else:
            predictions[n].append(float('nan'))

    anomalyLikelihood = anomaly_history.anomalyProbability(temp, tm.anomaly)

    anomaly.append(tm.anomaly)
    anomalyProb.append(anomalyLikelihood)

    predictor.learn(count, tm.getActiveCells(),
                    int(temp / predictor_resolution))
    print(count, " ", dateObject, " ", temp)