Example #1
0
    def testStatistics(self):
        p = ScalarEncoderParameters()
        p.size       = 100
        p.activeBits = 10
        p.minimum    = 0
        p.maximum    = 20
        p.clipInput  = True
        enc = ScalarEncoder( p )
        del p
        out = SDR( enc.parameters.size )
        mtr = Metrics(out, 9999)

        # The activation frequency of bits near the endpoints of the range is a
        # little weird, because the bits at the very end are not used as often
        # as the ones in the middle of the range, unless clipInputs is enabled.
        # If clipInputs is enabled then the bits 1 radius from the end get used
        # twice as often as the should because they respond to inputs off
        # outside of the valid range as well as inputs inside of the range.
        for i in np.linspace(
                        enc.parameters.minimum - enc.parameters.radius / 2,
                        enc.parameters.maximum + enc.parameters.radius / 2,
                        100 + 10 ):
            enc.encode( i, out )
            # print( i, out.sparse )

        print(str(mtr))
        assert( mtr.sparsity.min() >  .95 * .10 )
        assert( mtr.sparsity.max() < 1.05 * .10 )
        assert( mtr.activationFrequency.min() >  .50 * .10 )
        assert( mtr.activationFrequency.max() < 1.75 * .10 )
        assert( mtr.overlap.min() > .85 )
Example #2
0
 def testAverageOverlap(self):
     """ Verify that nearby values have the correct amount of semantic
     similarity. Also measure sparsity & activation frequency. """
     P = RDSE_Parameters()
     P.size     = 2000
     P.sparsity = .08
     P.radius   = 12
     P.seed     = 42
     R = RDSE( P )
     A = SDR( R.parameters.size )
     num_samples = 10000
     M = Metrics( A, num_samples + 1 )
     for i in range( num_samples ):
         R.encode( i, A )
     print( M )
     assert(M.overlap.min()  > (1 - 1. / R.parameters.radius) - .04 )
     assert(M.overlap.max()  < (1 - 1. / R.parameters.radius) + .04 )
     assert(M.overlap.mean() > (1 - 1. / R.parameters.radius) - .001 )
     assert(M.overlap.mean() < (1 - 1. / R.parameters.radius) + .001 )
     assert(M.sparsity.min()  > R.parameters.sparsity - .01 )
     assert(M.sparsity.max()  < R.parameters.sparsity + .01 )
     assert(M.sparsity.mean() > R.parameters.sparsity - .005 )
     assert(M.sparsity.mean() < R.parameters.sparsity + .005 )
     assert(M.activationFrequency.min()  > R.parameters.sparsity - .05 )
     assert(M.activationFrequency.max()  < R.parameters.sparsity + .05 )
     assert(M.activationFrequency.mean() > R.parameters.sparsity - .005 )
     assert(M.activationFrequency.mean() < R.parameters.sparsity + .005 )
     assert(M.activationFrequency.entropy() > .99 )
Example #3
0
 def testRandomOverlap(self):
     """ Verify that distant values have little to no semantic similarity.
     Also measure sparsity & activation frequency. """
     P = RDSE_Parameters()
     P.size     = 2000
     P.sparsity = .08
     P.radius   = 12
     P.seed     = 42
     R = RDSE( P )
     num_samples = 1000
     A = SDR( R.parameters.size )
     M = Metrics( A, num_samples + 1 )
     for i in range( num_samples ):
         X = i * R.parameters.radius
         R.encode( X, A )
     print( M )
     assert(M.overlap.max()  < .15 )
     assert(M.overlap.mean() < .10 )
     assert(M.sparsity.min()  > R.parameters.sparsity - .01 )
     assert(M.sparsity.max()  < R.parameters.sparsity + .01 )
     assert(M.sparsity.mean() > R.parameters.sparsity - .005 )
     assert(M.sparsity.mean() < R.parameters.sparsity + .005 )
     assert(M.activationFrequency.min()  > R.parameters.sparsity - .05 )
     assert(M.activationFrequency.max()  < R.parameters.sparsity + .05 )
     assert(M.activationFrequency.mean() > R.parameters.sparsity - .005 )
     assert(M.activationFrequency.mean() < R.parameters.sparsity + .005 )
     assert(M.activationFrequency.entropy() > .99 )
Example #4
0
def main(parameters=default_parameters, argv=None, verbose=True):

    # Load data.
    train_labels, train_images, test_labels, test_images = load_ds(
        'mnist_784', 10000, shape=[28, 28])  # HTM: ~95.6%
    #train_labels, train_images, test_labels, test_images = load_ds('Fashion-MNIST', 10000, shape=[28,28]) # HTM baseline: ~83%

    training_data = list(zip(train_images, train_labels))
    test_data = list(zip(test_images, test_labels))
    random.shuffle(training_data)

    # Setup the AI.
    enc = SDR(train_images[0].shape)
    sp = SpatialPooler(
        inputDimensions=enc.dimensions,
        columnDimensions=parameters['columnDimensions'],
        potentialRadius=parameters['potentialRadius'],
        potentialPct=parameters['potentialPct'],
        globalInhibition=True,
        localAreaDensity=parameters['localAreaDensity'],
        stimulusThreshold=int(round(parameters['stimulusThreshold'])),
        synPermInactiveDec=parameters['synPermInactiveDec'],
        synPermActiveInc=parameters['synPermActiveInc'],
        synPermConnected=parameters['synPermConnected'],
        minPctOverlapDutyCycle=parameters['minPctOverlapDutyCycle'],
        dutyCyclePeriod=int(round(parameters['dutyCyclePeriod'])),
        boostStrength=parameters['boostStrength'],
        seed=
        0,  # this is important, 0="random" seed which changes on each invocation
        spVerbosity=99,
        wrapAround=False)
    columns = SDR(sp.getColumnDimensions())
    columns_stats = Metrics(columns, 99999999)
    sdrc = Classifier()

    # Training Loop
    for i in range(len(train_images)):
        img, lbl = training_data[i]
        encode(img, enc)
        sp.compute(enc, True, columns)
        sdrc.learn(
            columns, lbl
        )  #TODO SDRClassifier could accept string as a label, currently must be int

    print(str(sp))
    print(str(columns_stats))

    # Testing Loop
    score = 0
    for img, lbl in test_data:
        encode(img, enc)
        sp.compute(enc, False, columns)
        if lbl == np.argmax(sdrc.infer(columns)):
            score += 1
    score = score / len(test_data)

    print('Score:', 100 * score, '%')
    return score
Example #5
0
    def testDayOfWeek(self):
        """ Creating date encoder instance. """
        # 1 bit for days in a week (x7 days -> 7 bits), no other fields encoded
        p = DateEncoderParameters()
        p.dayOfWeek_width = 1
        p.verbose = False
        enc = DateEncoder(p)
        # In the middle of fall, Thursday, not a weekend, afternoon - 4th Nov,
        # 2010, 14:55
        d = datetime.datetime(2010, 11, 4, 14, 55)
        bits = enc.encode(d)

        # Week is MTWTFSS,
        # Monday = 0 (for python datetime.datetime.timetuple())
        dayOfWeekExpected = [0, 0, 0, 1, 0, 0, 0]  #Thu

        expected = dayOfWeekExpected
        self.assertEqual(bits.size, 7)
        self.assertEqual(expected, bits.dense.tolist())

        # check a day is encoded consistently during most of its hours.
        p = DateEncoderParameters()
        p.dayOfWeek_width = 40
        p.verbose = False
        enc = DateEncoder(p)
        dMorn = datetime.datetime(2010, 11, 4, 8, 00)  # 8 AM to
        dEve = datetime.datetime(2010, 11, 4, 8 + 12, 00)  # 8 PM

        bits1 = enc.encode(dMorn)
        bits2 = enc.encode(dEve)
        assert (bits1.getOverlap(bits2) > 40 * .25)

        # Check the long term statistics of the encoder.
        p = DateEncoderParameters()
        p.dayOfWeek_width = 300
        p.verbose = False
        enc = DateEncoder(p)
        sdr = SDR(enc.dimensions)
        test_period = 1000
        metrics = Metrics(sdr, test_period)
        now = datetime.datetime.now()
        inc = datetime.timedelta(hours=1)
        for i in range(test_period):
            enc.encode(now, sdr)
            now += inc

        #print( metrics )

        assert (metrics.sparsity.min() >= .05)
        assert (metrics.sparsity.max() <= .20)
        assert (metrics.activationFrequency.min() >= .05)
        assert (metrics.activationFrequency.max() <= .20)
Example #6
0
    def testStatistics(self):
        gc = GridCellEncoder(size=200,
                             sparsity=.25,
                             periods=[6, 8.5, 12, 17, 24],
                             seed=42)
        sdr = SDR(gc.dimensions)
        M = Metrics(sdr, 999999)
        for x in range(1000):
            gc.encode([-x, 0], sdr)
        print(M)
        assert (M.sparsity.min() > .25 - .02)
        assert (M.sparsity.max() < .25 + .02)
        assert (M.activationFrequency.min() > .25 - .05)
        assert (M.activationFrequency.max() < .25 + .05)

        # These are approximate...
        assert (M.overlap.min() > .5)
        assert (M.overlap.max() < .9)
        assert (M.overlap.mean() > .7)
        assert (M.overlap.mean() < .8)
Example #7
0
    def testPeriodic(self):
        p = ScalarEncoderParameters()
        p.size       = 100
        p.activeBits = 10
        p.minimum    = 0
        p.maximum    = 20
        p.periodic   = True
        enc = ScalarEncoder( p )
        out = SDR( enc.parameters.size )
        mtr = Metrics(out, 9999)

        for i in range(201 * 10 + 1):
            x = (i % 201) / 10.
            enc.encode( x, out )
            # print( x, out.sparse )

        print(str(mtr))
        assert( mtr.sparsity.min() >  .95 * .10 )
        assert( mtr.sparsity.max() < 1.05 * .10 )
        assert( mtr.activationFrequency.min() >  .9 * .10 )
        assert( mtr.activationFrequency.max() < 1.1 * .10 )
        assert( mtr.overlap.min() > .85 )
Example #8
0
def SystemSetup(parameters, verbose=True):
    global agent, sensorEncoder, env, sensorLayer_sp, sensorLayer_SDR_columns
    global gridCellEncoder, locationlayer_SDR_cells
    global sensorLayer_tm

    if verbose:
        import pprint

        print("Parameters:")
        pprint.pprint(parameters, indent=4)
        print("")

    # create environment and the agent
    env = htm2d.environment.TwoDimensionalEnvironment(20, 20)
    agent = htm2d.agent.Agent()

    # load object from yml file
    with open(os.path.join(_OBJECTS_DIR, OBJECT_FILENAME), "r") as stream:
        try:
            env.load_object(stream)
        except yaml.YAMLError as exc:
            print(exc)

    # SENSOR LAYER --------------------------------------------------------------
    # setup sensor encoder
    sensorEncoderParams = RDSE_Parameters()
    sensorEncoderParams.category = True
    sensorEncoderParams.size = parameters["enc"]["size"]
    sensorEncoderParams.sparsity = parameters["enc"]["sparsity"]
    sensorEncoderParams.seed = parameters["enc"]["seed"]
    sensorEncoder = RDSE(sensorEncoderParams)

    # Create SpatialPooler
    spParams = parameters["sensorLayer_sp"]
    sensorLayer_sp = SpatialPooler(
        inputDimensions=(sensorEncoder.size, ),
        columnDimensions=(spParams["columnCount"], ),
        potentialPct=spParams["potentialPct"],
        potentialRadius=sensorEncoder.size,
        globalInhibition=True,
        localAreaDensity=spParams["localAreaDensity"],
        synPermInactiveDec=spParams["synPermInactiveDec"],
        synPermActiveInc=spParams["synPermActiveInc"],
        synPermConnected=spParams["synPermConnected"],
        boostStrength=spParams["boostStrength"],
        wrapAround=True,
    )
    sp_info = Metrics(sensorLayer_sp.getColumnDimensions(), 999999999)

    # Create an SDR to represent active columns, This will be populated by the
    # compute method below. It must have the same dimensions as the Spatial Pooler.
    sensorLayer_SDR_columns = SDR(spParams["columnCount"])

    # LOCATION LAYER ------------------------------------------------------------
    # Grid cell modules
    locParams = parameters["locationLayer"]

    gridCellEncoder = GridCellEncoder(
        size=locParams["cellCount"],
        sparsity=locParams["sparsity"],
        periods=locParams["periods"],
        seed=locParams["seed"],
    )

    locationlayer_SDR_cells = SDR(gridCellEncoder.dimensions)

    tmParams = parameters["sensorLayer_tm"]
    sensorLayer_tm = TemporalMemory(
        columnDimensions=(spParams["columnCount"], ),
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=spParams["synPermConnected"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
        externalPredictiveInputs=locParams["cellCount"],
    )
    tm_info = Metrics([sensorLayer_tm.numberOfCells()], 999999999)
Example #9
0
def building_htm(len_data):
    global enc_info
    global sp_info
    global tm_info
    global anomaly_history
    global predictor
    global predictor_resolution
    global tm
    global sp
    global scalarEncoder
    global encodingWidth
    global dateEncoder

    # Initial message
    print("Building HTM for predicting trends...")

    # Default parameters in HTM
    default_parameters = {
        # There are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay)
        'enc': {
            "value": {
                'resolution': 0.88,
                'size': 700,
                'sparsity': 0.02
            },
            "time": {
                'timeOfDay': (30, 1)
            }  #, 'weekend': 21}
        },
        'predictor': {
            'sdrc_alpha': 0.1
        },
        'sp': {
            'boostStrength': 3.0,
            'columnCount': 1638,
            'localAreaDensity': 0.04395604395604396,
            'potentialPct': 0.85,
            'synPermActiveInc': 0.04,
            'synPermConnected': 0.13999999999999999,
            'synPermInactiveDec': 0.006
        },
        'tm': {
            'activationThreshold': 17,
            'cellsPerColumn': 13,
            'initialPerm': 0.21,
            'maxSegmentsPerCell': 128,
            'maxSynapsesPerSegment': 64,
            'minThreshold': 10,
            'newSynapseCount': 32,
            'permanenceDec': 0.1,
            'permanenceInc': 0.1
        },
        'anomaly': {
            'likelihood': {
                'probationaryPct': 0.1,
                'reestimationPeriod': 100
            }
        }
    }

    # Make the encoder
    print("- Make the encoder")
    dateEncoder = DateEncoder(
        timeOfDay=default_parameters["enc"]["time"]["timeOfDay"])
    scalarEncoderParams = RDSE_Parameters()
    scalarEncoderParams.size = default_parameters["enc"]["value"]["size"]
    scalarEncoderParams.sparsity = default_parameters["enc"]["value"][
        "sparsity"]
    scalarEncoderParams.resolution = default_parameters["enc"]["value"][
        "resolution"]
    scalarEncoder = RDSE(scalarEncoderParams)
    encodingWidth = (dateEncoder.size + scalarEncoder.size)
    enc_info = Metrics([encodingWidth], 999999999)

    # Make the SP
    print("- Make the SP")
    spParams = default_parameters["sp"]
    sp = SpatialPooler(inputDimensions=(encodingWidth, ),
                       columnDimensions=(spParams["columnCount"], ),
                       potentialPct=spParams["potentialPct"],
                       potentialRadius=encodingWidth,
                       globalInhibition=True,
                       localAreaDensity=spParams["localAreaDensity"],
                       synPermInactiveDec=spParams["synPermInactiveDec"],
                       synPermActiveInc=spParams["synPermActiveInc"],
                       synPermConnected=spParams["synPermConnected"],
                       boostStrength=spParams["boostStrength"],
                       wrapAround=True)
    sp_info = Metrics(sp.getColumnDimensions(), 999999999)

    # Temporal Memory Parameters
    print("- Make the TM")
    tmParams = default_parameters["tm"]
    tm = TemporalMemory(
        columnDimensions=(spParams["columnCount"], ),
        cellsPerColumn=tmParams["cellsPerColumn"],
        activationThreshold=tmParams["activationThreshold"],
        initialPermanence=tmParams["initialPerm"],
        connectedPermanence=spParams["synPermConnected"],
        minThreshold=tmParams["minThreshold"],
        maxNewSynapseCount=tmParams["newSynapseCount"],
        permanenceIncrement=tmParams["permanenceInc"],
        permanenceDecrement=tmParams["permanenceDec"],
        predictedSegmentDecrement=0.0,
        maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
        maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"])
    tm_info = Metrics([tm.numberOfCells()], 999999999)

    # Setup Likelihood
    print("- Make Anomaly Score/Likelihood")
    anParams = default_parameters["anomaly"]["likelihood"]
    probationaryPeriod = int(
        math.floor(float(anParams["probationaryPct"]) * len_data))
    learningPeriod = int(math.floor(probationaryPeriod / 2.0))
    anomaly_history = AnomalyLikelihood(
        learningPeriod=learningPeriod,
        estimationSamples=probationaryPeriod - learningPeriod,
        reestimationPeriod=anParams["reestimationPeriod"])

    # Make predictor
    print("- Make the predictor")
    predictor = Predictor(steps=[1, 5],
                          alpha=default_parameters["predictor"]['sdrc_alpha'])
    predictor_resolution = 1

    # End message
    print("- Finish the building of HTM!")
    def initialize(self, input_min=0, input_max=0):
        # setup spatial anomaly
        if self.useSpatialAnomaly:
            self.spatial_tolerance = self.parameters["spatial_tolerance"]

        ## setup Enc, SP, TM
        # Make the Encoders.  These will convert input data into binary representations.
        self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"])

        scalarEncoderParams = RDSE_Parameters()
        scalarEncoderParams.size = self.parameters["enc"]["value"]["size"]
        scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"]
        scalarEncoderParams.resolution = max(0.001, (input_max - input_min) / 130)
        scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"]

        self.encValue = RDSE(scalarEncoderParams)
        encodingWidth = (self.encTimestamp.size + self.encValue.size)
        self.enc_info = Metrics([encodingWidth], 999999999)

        # Make the HTM.  SpatialPooler & TemporalMemory & associated tools.
        # SpatialPooler
        spParams = self.parameters["sp"]
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth,),
            columnDimensions=(spParams["columnDimensions"],),
            potentialRadius=encodingWidth,
            potentialPct=spParams["potentialPct"],
            globalInhibition=spParams["globalInhibition"],
            localAreaDensity=spParams["localAreaDensity"],
            numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
            stimulusThreshold=spParams["stimulusThreshold"],
            synPermInactiveDec=spParams["synPermInactiveDec"],
            synPermActiveInc=spParams["synPermActiveInc"],
            synPermConnected=spParams["synPermConnected"],
            boostStrength=spParams["boostStrength"],
            wrapAround=spParams["wrapAround"],
            minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"],
            dutyCyclePeriod=spParams["dutyCyclePeriod"],
            seed=spParams["seed"],
        )
        self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999)

        # TemporalMemory
        tmParams = self.parameters["tm"]
        self.tm = TemporalMemory(
            columnDimensions=(spParams["columnDimensions"],),
            cellsPerColumn=tmParams["cellsPerColumn"],
            activationThreshold=tmParams["activationThreshold"],
            initialPermanence=tmParams["initialPermanence"],
            connectedPermanence=tmParams["connectedPermanence"],
            minThreshold=tmParams["minThreshold"],
            maxNewSynapseCount=tmParams["maxNewSynapseCount"],
            permanenceIncrement=tmParams["permanenceIncrement"],
            permanenceDecrement=tmParams["permanenceDecrement"],
            predictedSegmentDecrement=tmParams["predictedSegmentDecrement"],
            maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
            maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
            seed=tmParams["seed"]
        )
        self.tm_info = Metrics([self.tm.numberOfCells()], 999999999)

        anParams = self.parameters["anomaly"]["likelihood"]
        self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
        self.anomalyLikelihood = AnomalyLikelihood(
            learningPeriod=self.learningPeriod,
            estimationSamples=self.probationaryPeriod - self.learningPeriod,
            reestimationPeriod=anParams["reestimationPeriod"])

        self.kernel = self._gauss_kernel(self.historic_raw_anomaly_scores.maxlen,
                                         self.historic_raw_anomaly_scores.maxlen)
Example #11
0
  def initialize(self):
    # toggle parameters here
    if self.use_optimization:
      parameters = get_params('params.json')
    else:
      parameters = parameters_numenta_comparable

    # setup spatial anomaly
    if self.useSpatialAnomaly:
      # Keep track of value range for spatial anomaly detection
      self.minVal = None
      self.maxVal = None

    ## setup Enc, SP, TM, Likelihood
    # Make the Encoders.  These will convert input data into binary representations.
    self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"],
                                    weekend  = parameters["enc"]["time"]["weekend"])

    scalarEncoderParams            = RDSE_Parameters()
    scalarEncoderParams.size       = parameters["enc"]["value"]["size"]
    scalarEncoderParams.sparsity   = parameters["enc"]["value"]["sparsity"]
    scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"]

    self.encValue = RDSE( scalarEncoderParams )
    encodingWidth = (self.encTimestamp.size + self.encValue.size)
    self.enc_info = Metrics( [encodingWidth], 999999999 )

    # Make the HTM.  SpatialPooler & TemporalMemory & associated tools.
    # SpatialPooler
    spParams = parameters["sp"]
    self.sp = SpatialPooler(
      inputDimensions            = (encodingWidth,),
      columnDimensions           = (spParams["columnCount"],),
      potentialPct               = spParams["potentialPct"],
      potentialRadius            = encodingWidth,
      globalInhibition           = True,
      localAreaDensity           = spParams["localAreaDensity"],
      synPermInactiveDec         = spParams["synPermInactiveDec"],
      synPermActiveInc           = spParams["synPermActiveInc"],
      synPermConnected           = spParams["synPermConnected"],
      boostStrength              = spParams["boostStrength"],
      wrapAround                 = True
    )
    self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 )

    # TemporalMemory
    tmParams = parameters["tm"]
    self.tm = TemporalMemory(
      columnDimensions          = (spParams["columnCount"],),
      cellsPerColumn            = tmParams["cellsPerColumn"],
      activationThreshold       = tmParams["activationThreshold"],
      initialPermanence         = tmParams["initialPerm"],
      connectedPermanence       = spParams["synPermConnected"],
      minThreshold              = tmParams["minThreshold"],
      maxNewSynapseCount        = tmParams["newSynapseCount"],
      permanenceIncrement       = tmParams["permanenceInc"],
      permanenceDecrement       = tmParams["permanenceDec"],
      predictedSegmentDecrement = 0.0,
      maxSegmentsPerCell        = tmParams["maxSegmentsPerCell"],
      maxSynapsesPerSegment     = tmParams["maxSynapsesPerSegment"]
    )
    self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 )

    # setup likelihood, these settings are used in NAB
    if self.useLikelihood:
      anParams = parameters["anomaly"]["likelihood"]
      learningPeriod     = int(math.floor(self.probationaryPeriod / 2.0))
      self.anomalyLikelihood = AnomalyLikelihood(
                                 learningPeriod= learningPeriod,
                                 estimationSamples= self.probationaryPeriod - learningPeriod,
                                 reestimationPeriod= anParams["reestimationPeriod"])
    # Predictor
    # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] )
    # predictor_resolution = 1

    # initialize pandaBaker
    if PANDA_VIS_BAKE_DATA:
      self.BuildPandaSystem(self.sp, self.tm, parameters["enc"]["value"]["size"], self.encTimestamp.size)
Example #12
0
        else:
            if not similar:
                similar = {"doc": document, "bits": current}
            else:
                if (distance(current, reference["bits"]) < distance(
                        similar["bits"], reference["bits"])):
                    similar = {"doc": document, "bits": current}

            if not unsimilar:
                unsimilar = {"doc": document, "bits": current}
            else:
                if (distance(current, reference["bits"]) > distance(
                        unsimilar["bits"], reference["bits"])):
                    unsimilar = {"doc": document, "bits": current}

    report = Metrics([encoder.size], len(sdrs) + 1)
    for sdr in sdrs:
        report.addData(sdr)

    print("Statistics:")
    print("\tEncoded %d Document inputs." % len(sdrs))
    print("\tOutput: " + str(report))

    print("Similarity:")
    print("\tReference:\n\t\t" + str(reference["doc"]))
    print("\tMOST Similar (Distance = " +
          str(distance(similar["bits"], reference["bits"])) + "):")
    print("\t\t" + str(similar["doc"]))
    print("\tLEAST Similar (Distance = " +
          str(distance(unsimilar["bits"], reference["bits"])) + "):")
    print("\t\t" + str(unsimilar["doc"]))
Example #13
0
def main(parameters=default_parameters, argv=None, verbose=True):
  if verbose:
    import pprint
    print("Parameters:")
    pprint.pprint(parameters, indent=4)
    print("")

  # Read the input file.
  records = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = next(reader)
    next(reader)
    next(reader)
    for record in reader:
      records.append(record)

  # Make the Encoders.  These will convert input data into binary representations.
  dateEncoder = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], 
                            weekend  = parameters["enc"]["time"]["weekend"]) 
  
  scalarEncoderParams            = RDSE_Parameters()
  scalarEncoderParams.size       = parameters["enc"]["value"]["size"]
  scalarEncoderParams.sparsity   = parameters["enc"]["value"]["sparsity"]
  scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"]
  scalarEncoder = RDSE( scalarEncoderParams )
  encodingWidth = (dateEncoder.size + scalarEncoder.size)
  enc_info = Metrics( [encodingWidth], 999999999 )

  # Make the HTM.  SpatialPooler & TemporalMemory & associated tools.
  spParams = parameters["sp"]
  sp = SpatialPooler(
    inputDimensions            = (encodingWidth,),
    columnDimensions           = (spParams["columnCount"],),
    potentialPct               = spParams["potentialPct"],
    potentialRadius            = encodingWidth,
    globalInhibition           = True,
    localAreaDensity           = spParams["localAreaDensity"],
    synPermInactiveDec         = spParams["synPermInactiveDec"],
    synPermActiveInc           = spParams["synPermActiveInc"],
    synPermConnected           = spParams["synPermConnected"],
    boostStrength              = spParams["boostStrength"],
    wrapAround                 = True
  )
  sp_info = Metrics( sp.getColumnDimensions(), 999999999 )

  tmParams = parameters["tm"]
  tm = TemporalMemory(
    columnDimensions          = (spParams["columnCount"],),
    cellsPerColumn            = tmParams["cellsPerColumn"],
    activationThreshold       = tmParams["activationThreshold"],
    initialPermanence         = tmParams["initialPerm"],
    connectedPermanence       = spParams["synPermConnected"],
    minThreshold              = tmParams["minThreshold"],
    maxNewSynapseCount        = tmParams["newSynapseCount"],
    permanenceIncrement       = tmParams["permanenceInc"],
    permanenceDecrement       = tmParams["permanenceDec"],
    predictedSegmentDecrement = 0.0,
    maxSegmentsPerCell        = tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment     = tmParams["maxSynapsesPerSegment"]
  )
  tm_info = Metrics( [tm.numberOfCells()], 999999999 )

  # setup likelihood, these settings are used in NAB
  anParams = parameters["anomaly"]["likelihood"]
  probationaryPeriod = int(math.floor(float(anParams["probationaryPct"])*len(records)))
  learningPeriod     = int(math.floor(probationaryPeriod / 2.0))
  anomaly_history = AnomalyLikelihood(learningPeriod= learningPeriod,
                                      estimationSamples= probationaryPeriod - learningPeriod,
                                      reestimationPeriod= anParams["reestimationPeriod"])

  predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] )
  predictor_resolution = 1

  # Iterate through every datum in the dataset, record the inputs & outputs.
  inputs      = []
  anomaly     = []
  anomalyProb = []
  predictions = {1: [], 5: []}
  for count, record in enumerate(records):

    # Convert date string into Python date object.
    dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
    # Convert data value string into float.
    consumption = float(record[1])
    inputs.append( consumption )

    # Call the encoders to create bit representations for each value.  These are SDR objects.
    dateBits        = dateEncoder.encode(dateString)
    consumptionBits = scalarEncoder.encode(consumption)

    # Concatenate all these encodings into one large encoding for Spatial Pooling.
    encoding = SDR( encodingWidth ).concatenate([consumptionBits, dateBits])
    enc_info.addData( encoding )

    # Create an SDR to represent active columns, This will be populated by the
    # compute method below. It must have the same dimensions as the Spatial Pooler.
    activeColumns = SDR( sp.getColumnDimensions() )

    # Execute Spatial Pooling algorithm over input space.
    sp.compute(encoding, True, activeColumns)
    sp_info.addData( activeColumns )

    # Execute Temporal Memory algorithm over active mini-columns.
    tm.compute(activeColumns, learn=True)
    tm_info.addData( tm.getActiveCells().flatten() )

    # Predict what will happen, and then train the predictor based on what just happened.
    pdf = predictor.infer( count, tm.getActiveCells() )
    for n in (1, 5):
      if pdf[n]:
        predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution )
      else:
        predictions[n].append(float('nan'))
    predictor.learn( count, tm.getActiveCells(), int(consumption / predictor_resolution))

    anomalyLikelihood = anomaly_history.anomalyProbability( consumption, tm.anomaly )
    anomaly.append( tm.anomaly )
    anomalyProb.append( anomalyLikelihood )

  # Print information & statistics about the state of the HTM.
  print("Encoded Input", enc_info)
  print("")
  print("Spatial Pooler Mini-Columns", sp_info)
  print(str(sp))
  print("")
  print("Temporal Memory Cells", tm_info)
  print(str(tm))
  print("")

  # Shift the predictions so that they are aligned with the input they predict.
  for n_steps, pred_list in predictions.items():
    for x in range(n_steps):
        pred_list.insert(0, float('nan'))
        pred_list.pop()

  # Calculate the predictive accuracy, Root-Mean-Squared
  accuracy         = {1: 0, 5: 0}
  accuracy_samples = {1: 0, 5: 0}
  for idx, inp in enumerate(inputs):
    for n in predictions: # For each [N]umber of time steps ahead which was predicted.
      val = predictions[n][ idx ]
      if not math.isnan(val):
        accuracy[n] += (inp - val) ** 2
        accuracy_samples[n] += 1
  for n in sorted(predictions):
    accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5
    print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n])

  # Show info about the anomaly (mean & std)
  print("Anomaly Mean", np.mean(anomaly))
  print("Anomaly Std ", np.std(anomaly))

  # Plot the Predictions and Anomalies.
  if verbose:
    try:
      import matplotlib.pyplot as plt
    except:
      print("WARNING: failed to import matplotlib, plots cannot be shown.")
      return -accuracy[5]

    plt.subplot(2,1,1)
    plt.title("Predictions")
    plt.xlabel("Time")
    plt.ylabel("Power Consumption")
    plt.plot(np.arange(len(inputs)), inputs, 'red',
             np.arange(len(inputs)), predictions[1], 'blue',
             np.arange(len(inputs)), predictions[5], 'green',)
    plt.legend(labels=('Input', '1 Step Prediction, Shifted 1 step', '5 Step Prediction, Shifted 5 steps'))

    plt.subplot(2,1,2)
    plt.title("Anomaly Score")
    plt.xlabel("Time")
    plt.ylabel("Power Consumption")
    inputs = np.array(inputs) / max(inputs)
    plt.plot(np.arange(len(inputs)), inputs, 'red',
             np.arange(len(inputs)), anomaly, 'blue',)
    plt.legend(labels=('Input', 'Anomaly Score'))
    plt.show()

  return -accuracy[5]
Example #14
0
def main(parameters=default_parameters, argv=None, verbose=True):
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir',
                        type=str,
                        default=os.path.join(os.path.dirname(__file__), '..',
                                             '..', '..', 'build', 'ThirdParty',
                                             'mnist_data', 'mnist-src'))
    args = parser.parse_args(args=argv)

    # Load data.
    train_labels, train_images, test_labels, test_images = load_mnist(
        args.data_dir)
    training_data = list(zip(train_images, train_labels))
    test_data = list(zip(test_images, test_labels))
    random.shuffle(training_data)
    random.shuffle(test_data)

    # Setup the AI.
    enc = SDR((train_images[0].shape))
    sp = SpatialPooler(
        inputDimensions=enc.dimensions,
        columnDimensions=parameters['columnDimensions'],
        potentialRadius=parameters['potentialRadius'],
        potentialPct=parameters['potentialPct'],
        globalInhibition=True,
        localAreaDensity=parameters['localAreaDensity'],
        stimulusThreshold=int(round(parameters['stimulusThreshold'])),
        synPermInactiveDec=parameters['synPermInactiveDec'],
        synPermActiveInc=parameters['synPermActiveInc'],
        synPermConnected=parameters['synPermConnected'],
        minPctOverlapDutyCycle=parameters['minPctOverlapDutyCycle'],
        dutyCyclePeriod=int(round(parameters['dutyCyclePeriod'])),
        boostStrength=parameters['boostStrength'],
        seed=0,
        spVerbosity=99,
        wrapAround=False)
    columns = SDR(sp.getColumnDimensions())
    columns_stats = Metrics(columns, 99999999)
    sdrc = Classifier()

    # Training Loop
    for i in range(len(train_images)):
        img, lbl = random.choice(training_data)
        enc.dense = img >= np.mean(img)  # Convert greyscale image to binary.
        sp.compute(enc, True, columns)
        sdrc.learn(columns, lbl)

    print(str(sp))
    print(str(columns_stats))

    # Testing Loop
    score = 0
    for img, lbl in test_data:
        enc.dense = img >= np.mean(img)  # Convert greyscale image to binary.
        sp.compute(enc, False, columns)
        if lbl == np.argmax(sdrc.infer(columns)):
            score += 1
    score = score / len(test_data)

    print('Score:', 100 * score, '%')
    return score
Example #15
0
                        nargs='+',
                        default=[6 * (2**.5)**i for i in range(5)],
                        help='')

    args = parser.parse_args()
    print('Module Periods', args.periods)

    gc = GridCellEncoder(
        size=100,
        sparsity=args.sparsity,
        periods=args.periods,
    )

    gc_sdr = SDR(gc.dimensions)

    gc_statistics = Metrics(gc_sdr, args.arena_size**2)

    assert (args.arena_size >= 10)
    rf = np.empty((gc.size, args.arena_size, args.arena_size))
    for x in range(args.arena_size):
        for y in range(args.arena_size):
            gc.encode([x, y], gc_sdr)
            rf[:, x, y] = gc_sdr.dense.ravel()

    print(gc_statistics)

    rows = 4
    cols = 5
    n_subplots = rows * cols
    assert (gc.size > n_subplots)
    samples = np.linspace(0, gc.size - 1, n_subplots, dtype=np.int)
Example #16
0
    def testStatistics(self):
        # 100 random simple English words run mass encoding stats against
        testCorpus = [
            "find", "any", "new", "work", "part", "take", "get", "place",
            "made", "live", "where", "after", "back", "little", "only",
            "round", "man", "year", "came", "show", "every", "good", "me",
            "give", "our", "under", "name", "very", "through", "just", "form",
            "sentence", "great", "think", "say", "help", "low", "line",
            "differ", "turn", "cause", "much", "mean", "before", "move",
            "right", "boy", "old", "too", "same", "tell", "does", "set",
            "three", "want", "air", "well", "also", "play", "small", "end",
            "put", "home", "read", "hand", "port", "large", "spell", "add",
            "even", "land", "here", "must", "big", "high", "such", "follow",
            "act", "why", "ask", "men", "change", "went", "light", "kind",
            "off", "need", "house", "picture", "try", "us", "again", "animal",
            "point", "mother", "world", "near", "build", "self", "earth"]
        num_samples = 1000  # number of documents to run
        num_tokens = 10     # tokens per document

        # Case 1 = tokenSimilarity OFF
        params1 = SimHashDocumentEncoderParameters()
        params1.size = 400
        params1.sparsity = 0.33
        params1.tokenSimilarity = False
        encoder1 = SimHashDocumentEncoder(params1)

        # Case 2 = tokenSimilarity ON
        params2 = params1
        params2.tokenSimilarity = True
        encoder2 = SimHashDocumentEncoder(params2)

        sdrs1 = []
        sdrs2 = []
        for _ in range(num_samples):
            document = []
            for _ in range(num_tokens - 1):
                token = testCorpus[random.randint(0, len(testCorpus) - 1)]
                document.append(token)
            sdrs1.append(encoder1.encode(document))
            sdrs2.append(encoder2.encode(document))

        report1 = Metrics([encoder1.size], len(sdrs1) + 1)
        report2 = Metrics([encoder2.size], len(sdrs2) + 1)

        for sdr in sdrs1:
            report1.addData(sdr)
        for sdr in sdrs2:
            report2.addData(sdr)

        # Assertions for Case 1 = tokenSimilarity OFF
        assert(report1.activationFrequency.entropy() > 0.87)
        assert(report1.activationFrequency.min() > 0.01)
        assert(report1.activationFrequency.max() < 0.99)
        assert(report1.activationFrequency.mean() > params1.sparsity - 0.005)
        assert(report1.activationFrequency.mean() < params1.sparsity + 0.005)
        assert(report1.overlap.min() > 0.21)
        assert(report1.overlap.max() > 0.53)
        assert(report1.overlap.mean() > 0.38)
        assert(report1.sparsity.min() > params1.sparsity - 0.01)
        assert(report1.sparsity.max() < params1.sparsity + 0.01)
        assert(report1.sparsity.mean() > params1.sparsity - 0.005)
        assert(report1.sparsity.mean() < params1.sparsity + 0.005)

        # Assertions for Case 2 = tokenSimilarity ON
        assert(report2.activationFrequency.entropy() > 0.59)
        assert(report2.activationFrequency.min() >= 0)
        assert(report2.activationFrequency.max() <= 1)
        assert(report2.activationFrequency.mean() > params2.sparsity - 0.005)
        assert(report2.activationFrequency.mean() < params2.sparsity + 0.005)
        assert(report2.overlap.min() > 0.38)
        assert(report2.overlap.max() > 0.78)
        assert(report2.overlap.mean() > 0.61)
        assert(report2.sparsity.min() > params2.sparsity - 0.01)
        assert(report2.sparsity.max() < params2.sparsity + 0.01)
        assert(report2.sparsity.mean() > params2.sparsity - 0.005)
        assert(report2.sparsity.mean() < params2.sparsity + 0.005)
Example #17
0
        exit()

    #
    # Run the encoder and measure some statistics about its output.
    #
    if args.category:
        n_samples = int(args.maximum - args.minimum + 1)
    else:
        n_samples = (args.maximum - args.minimum) / enc.parameters.resolution
        oversample = 2  # Use more samples than needed to avoid aliasing & artifacts.
        n_samples = int(round(oversample * n_samples))
    sdrs = []
    for i in np.linspace(args.minimum, args.maximum, n_samples):
        sdrs.append(enc.encode(i))

    M = Metrics([enc.size], len(sdrs) + 1)
    for s in sdrs:
        M.addData(s)
    print("Statistics:")
    print("Encoded %d inputs." % len(sdrs))
    print("Output " + str(M))

    #
    # Plot the Receptive Field of each bit in the encoder.
    #
    import matplotlib.pyplot as plt
    if 'matplotlib.pyplot' in modules:
        rf = np.zeros([enc.size, len(sdrs)], dtype=np.uint8)
        for i in range(len(sdrs)):
            rf[:, i] = sdrs[i].dense
        plt.imshow(rf, interpolation='nearest')
Example #18
0
    def initialize(self):
        # toggle parameters here
        # parameters = default_parameters
        parameters = parameters_numenta_comparable

        ## setup Enc, SP, TM, Likelihood
        # Make the Encoders.  These will convert input data into binary representations.
        self.encTimestamp = DateEncoder(
            timeOfDay=parameters["enc"]["time"]["timeOfDay"],
            weekend=parameters["enc"]["time"]["weekend"],
            season=parameters["enc"]["time"]["season"],
            dayOfWeek=parameters["enc"]["time"]["dayOfWeek"])

        scalarEncoderParams = EncParameters()
        scalarEncoderParams.size = parameters["enc"]["value"]["size"]
        scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"]
        scalarEncoderParams.resolution = parameters["enc"]["value"][
            "resolution"]

        self.encValue = Encoder(scalarEncoderParams)
        encodingWidth = (self.encTimestamp.size + self.encValue.size)
        self.enc_info = Metrics([encodingWidth], 999999999)

        # Make the HTM.  SpatialPooler & TemporalMemory & associated tools.
        # SpatialPooler
        spParams = parameters["sp"]
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth, ),
            columnDimensions=(spParams["columnCount"], ),
            potentialPct=spParams["potentialPct"],
            potentialRadius=spParams["potentialRadius"],
            globalInhibition=True,
            localAreaDensity=spParams["localAreaDensity"],
            stimulusThreshold=spParams["stimulusThreshold"],
            synPermInactiveDec=spParams["synPermInactiveDec"],
            synPermActiveInc=spParams["synPermActiveInc"],
            synPermConnected=spParams["synPermConnected"],
            boostStrength=spParams["boostStrength"],
            wrapAround=True)
        self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999)

        # TemporalMemory
        tmParams = parameters["tm"]
        self.tm = TemporalMemory(
            columnDimensions=(spParams["columnCount"], ),
            cellsPerColumn=tmParams["cellsPerColumn"],
            activationThreshold=tmParams["activationThreshold"],
            initialPermanence=tmParams["initialPerm"],
            connectedPermanence=spParams["synPermConnected"],
            minThreshold=tmParams["minThreshold"],
            maxNewSynapseCount=tmParams["newSynapseCount"],
            permanenceIncrement=tmParams["permanenceInc"],
            permanenceDecrement=tmParams["permanenceDec"],
            predictedSegmentDecrement=0.0,
            maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
            maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"])
        self.tm_info = Metrics([self.tm.numberOfCells()], 999999999)

        # setup likelihood, these settings are used in NAB
        if self.useLikelihood:
            anParams = parameters["anomaly"]["likelihood"]
            learningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
            self.anomalyLikelihood = AnomalyLikelihood(
                learningPeriod=learningPeriod,
                estimationSamples=self.probationaryPeriod - learningPeriod,
                reestimationPeriod=anParams["reestimationPeriod"])