def testRemoveUnlikelyPredictionsSingleValues(self): result = HTMPredictionModel._removeUnlikelyPredictions({1: 0.1}, 0.01, 3) self.assertDictEqual(result, {1: 0.1}) result = HTMPredictionModel._removeUnlikelyPredictions({1: 0.001}, 0.01, 3) self.assertDictEqual(result, {1: 0.001})
def testRemoveUnlikelyPredictionsComplex(self): result = HTMPredictionModel._removeUnlikelyPredictions( { 1: 0.1, 2: 0.2, 3: 0.3, 4: 0.004 }, 0.01, 3) self.assertDictEqual(result, {1: 0.1, 2: 0.2, 3: 0.3}) result = HTMPredictionModel._removeUnlikelyPredictions( { 1: 0.1, 2: 0.2, 3: 0.3, 4: 0.4, 5: 0.005 }, 0.01, 3) self.assertDictEqual(result, {2: 0.2, 3: 0.3, 4: 0.4}) result = HTMPredictionModel._removeUnlikelyPredictions( { 1: 0.1, 2: 0.2, 3: 0.3, 4: 0.004, 5: 0.005 }, 0.01, 3) self.assertDictEqual(result, {1: 0.1, 2: 0.2, 3: 0.3})
def testRemoveUnlikelyPredictionsMaxPredictions(self): result = HTMPredictionModel._removeUnlikelyPredictions({1: 0.1, 2: 0.2, 3: 0.3}, 0.01, 3) self.assertDictEqual(result, {1: 0.1, 2: 0.2, 3: 0.3}) result = HTMPredictionModel._removeUnlikelyPredictions( {1: 0.1, 2: 0.2, 3: 0.3, 4: 0.4}, 0.01, 3) self.assertDictEqual(result, {2: 0.2, 3: 0.3, 4: 0.4})
def testRemoveUnlikelyPredictionsLikelihoodThresholds(self): result = HTMPredictionModel._removeUnlikelyPredictions({1: 0.1, 2: 0.001}, 0.01, 3) self.assertDictEqual(result, {1: 0.1}) result = HTMPredictionModel._removeUnlikelyPredictions({1: 0.001, 2: 0.002}, 0.01, 3) self.assertDictEqual(result, {2: 0.002}) result = HTMPredictionModel._removeUnlikelyPredictions({1: 0.002, 2: 0.001}, 0.01, 3) self.assertDictEqual(result, {1: 0.002})
def testPredictedFieldAndInferenceEnabledAreSaved(self): m1 = ModelFactory.create(PY_MODEL_PARAMS) m1.enableInference({'predictedField': 'consumption'}) self.assertTrue(m1.isInferenceEnabled()) self.assertEqual(m1.getInferenceArgs().get('predictedField'), 'consumption') headers = ['timestamp', 'consumption'] record = [datetime.datetime(2013, 12, 12), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) m1.run(modelInput) # Serialize builderProto = HTMPredictionModelProto.new_message() m1.write(builderProto) # Construct HTMPredictionModelProto reader from populated builder readerProto = HTMPredictionModelProto.from_bytes( builderProto.to_bytes()) # Deserialize m2 = HTMPredictionModel.read(readerProto) self.assertTrue(m2.isInferenceEnabled()) self.assertEqual(m2.getInferenceArgs().get('predictedField'), 'consumption') # Running the desrialized m2 without redundant enableInference call should # work record = [datetime.datetime(2013, 12, 14), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) m2.run(modelInput) # Check that disabled inference is saved, too (since constructor defaults to # enabled at time of this writing) m1.disableInference() self.assertFalse(m1.isInferenceEnabled()) builderProto = HTMPredictionModelProto.new_message() m1.write(builderProto) readerProto = HTMPredictionModelProto.from_bytes( builderProto.to_bytes()) m3 = HTMPredictionModel.read(readerProto) self.assertFalse(m3.isInferenceEnabled())
def testPredictedFieldAndInferenceEnabledAreSaved(self): m1 = ModelFactory.create(PY_MODEL_PARAMS) m1.enableInference({'predictedField': 'consumption'}) self.assertTrue(m1.isInferenceEnabled()) self.assertEqual(m1.getInferenceArgs().get('predictedField'), 'consumption') headers = ['timestamp', 'consumption'] record = [datetime.datetime(2013, 12, 12), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) m1.run(modelInput) # Serialize builderProto = HTMPredictionModelProto.new_message() m1.write(builderProto) # Construct HTMPredictionModelProto reader from populated builder readerProto = HTMPredictionModelProto.from_bytes(builderProto.to_bytes()) # Deserialize m2 = HTMPredictionModel.read(readerProto) self.assertTrue(m2.isInferenceEnabled()) self.assertEqual(m2.getInferenceArgs().get('predictedField'), 'consumption') # Running the desrialized m2 without redundant enableInference call should # work record = [datetime.datetime(2013, 12, 14), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) m2.run(modelInput) # Check that disabled inference is saved, too (since constructor defaults to # enabled at time of this writing) m1.disableInference() self.assertFalse(m1.isInferenceEnabled()) builderProto = HTMPredictionModelProto.new_message() m1.write(builderProto) readerProto = HTMPredictionModelProto.from_bytes(builderProto.to_bytes()) m3 = HTMPredictionModel.read(readerProto) self.assertFalse(m3.isInferenceEnabled())
trainSP = bool(_options.trainSP) boostStrength = _options.boostStrength DATE_FORMAT = '%Y-%m-%d %H:%M:%S' predictedField = "passenger_count" modelParams = getModelParamsFromName("nyc_taxi") modelParams['modelParams']['clParams']['steps'] = str(_options.stepsAhead) modelParams['modelParams']['clParams']['regionName'] = classifierType modelParams['modelParams']['spParams']['boostStrength'] = boostStrength print "Creating model from %s..." % dataSet # use customized CLA model model = HTMPredictionModel(**modelParams['modelParams']) model.enableInference({"predictedField": predictedField}) model.enableLearning() model._spLearningEnabled = bool(trainSP) model._tpLearningEnabled = True print model._spLearningEnabled printTPRegionParams(model._getTPRegion()) inputData = "%s/%s.csv" % (DATA_DIR, dataSet.replace(" ", "_")) sensor = model._getSensorRegion() encoderList = sensor.getSelf().encoder.getEncoderList() if sensor.getSelf().disabledEncoder is not None: classifier_encoder = sensor.getSelf().disabledEncoder.getEncoderList() classifier_encoder = classifier_encoder[0]
def testRemoveUnlikelyPredictionsEmpty(self): result = HTMPredictionModel._removeUnlikelyPredictions({}, 0.01, 3) self.assertDictEqual(result, {})
def _runModelSerializationDeserializationChecks(self, modelParams): m1 = ModelFactory.create(modelParams) m1.enableInference({'predictedField': 'consumption'}) headers = ['timestamp', 'consumption'] record = [datetime.datetime(2013, 12, 12), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) m1.run(modelInput) # Serialize builderProto = HTMPredictionModelProto.new_message() m1.write(builderProto) # Construct HTMPredictionModelProto reader from populated builder readerProto = HTMPredictionModelProto.from_bytes( builderProto.to_bytes()) # Deserialize m2 = HTMPredictionModel.read(readerProto) self.assertEqual(m1.getInferenceType(), modelParams['modelParams']['inferenceType']) self.assertEqual(m1.getInferenceType(), m2.getInferenceType()) # TODO NUP-2463: remove this work-around. # Work around a serialization bug that doesn't save the enabled predicted # field m2.enableInference({'predictedField': 'consumption'}) # Run computes on m1 & m2 and compare results record = [datetime.datetime(2013, 12, 14), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) # Use deepcopy to guarantee no input side-effect between calls r1 = m1.run(copy.deepcopy(modelInput)) r2 = m2.run(copy.deepcopy(modelInput)) # Compare results self.assertEqual(r2.predictionNumber, r1.predictionNumber) self.assertEqual(r2.rawInput, r1.rawInput) self.assertEqual(r2.sensorInput.dataRow, r1.sensorInput.dataRow) self.assertEqual(r2.sensorInput.dataDict, r1.sensorInput.dataDict) numpy.testing.assert_array_equal(r2.sensorInput.dataEncodings, r1.sensorInput.dataEncodings) self.assertEqual(r2.sensorInput.sequenceReset, r1.sensorInput.sequenceReset) self.assertEqual(r2.sensorInput.category, r1.sensorInput.category) self.assertEqual(r2.inferences, r1.inferences) self.assertEqual(r2.metrics, r1.metrics) self.assertEqual(r2.predictedFieldIdx, r1.predictedFieldIdx) self.assertEqual(r2.predictedFieldName, r1.predictedFieldName) numpy.testing.assert_array_equal(r2.classifierInput.dataRow, r1.classifierInput.dataRow) self.assertEqual(r2.classifierInput.bucketIndex, r1.classifierInput.bucketIndex) # Compre regions self.assertIsNotNone(m2._getSensorRegion()) self.assertEqual(m2._getSensorRegion(), m1._getSensorRegion()) self.assertIsNotNone(m2._getClassifierRegion()) self.assertEqual(m2._getClassifierRegion(), m1._getClassifierRegion()) # TODO NUP-2356: Uncomment after issue is resolved. #self.assertIsNotNone(m2._getTPRegion()) self.assertEqual(m2._getTPRegion(), m1._getTPRegion()) self.assertIsNotNone(m2._getSPRegion()) self.assertEqual(m2._getSPRegion(), m1._getSPRegion())
def _runModelSerializationDeserializationChecks(self, modelParams): m1 = ModelFactory.create(modelParams) m1.enableInference({'predictedField': 'consumption'}) headers = ['timestamp', 'consumption'] record = [datetime.datetime(2013, 12, 12), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) m1.run(modelInput) # Serialize builderProto = HTMPredictionModelProto.new_message() m1.write(builderProto) # Construct HTMPredictionModelProto reader from populated builder readerProto = HTMPredictionModelProto.from_bytes(builderProto.to_bytes()) # Deserialize m2 = HTMPredictionModel.read(readerProto) self.assertEqual(m1.getInferenceType(), modelParams['modelParams']['inferenceType']) self.assertEqual(m1.getInferenceType(), m2.getInferenceType()) # Run computes on m1 & m2 and compare results record = [datetime.datetime(2013, 12, 14), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) # Use deepcopy to guarantee no input side-effect between calls r1 = m1.run(copy.deepcopy(modelInput)) r2 = m2.run(copy.deepcopy(modelInput)) # Compare results self.assertEqual(r2.predictionNumber, r1.predictionNumber) self.assertEqual(r2.rawInput, r1.rawInput) self.assertEqual(r2.sensorInput.dataRow, r1.sensorInput.dataRow) self.assertEqual(r2.sensorInput.dataDict, r1.sensorInput.dataDict) numpy.testing.assert_array_equal(r2.sensorInput.dataEncodings, r1.sensorInput.dataEncodings) self.assertEqual(r2.sensorInput.sequenceReset, r1.sensorInput.sequenceReset) self.assertEqual(r2.sensorInput.category, r1.sensorInput.category) self.assertEqual(r2.inferences, r1.inferences) self.assertEqual(r2.metrics, r1.metrics) self.assertEqual(r2.predictedFieldIdx, r1.predictedFieldIdx) self.assertEqual(r2.predictedFieldName, r1.predictedFieldName) numpy.testing.assert_array_equal(r2.classifierInput.dataRow, r1.classifierInput.dataRow) self.assertEqual(r2.classifierInput.bucketIndex, r1.classifierInput.bucketIndex) # Compre regions self.assertIsNotNone(m2._getSensorRegion()) self.assertEqual(m2._getSensorRegion(), m1._getSensorRegion()) self.assertIsNotNone(m2._getClassifierRegion()) self.assertEqual(m2._getClassifierRegion(), m1._getClassifierRegion()) self.assertIsNotNone(m2._getTPRegion()) self.assertEqual(m2._getTPRegion(), m1._getTPRegion()) self.assertIsNotNone(m2._getSPRegion()) self.assertEqual(m2._getSPRegion(), m1._getSPRegion())