def __init__(self, use_saved_model, checkpoint_path, likelihood_path): self.use_saved_model = use_saved_model if use_saved_model: self.model = ModelFactory.loadFromCheckpoint(checkpoint_path) self.model.enableInference({'predictedField': 'cpu'}) self.model.enableInference({'predictedField': 'memory'}) with open(likelihood_path, "rb") as f: self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( ).readFromFile(f) else: self.model = ModelFactory.create(model_params.MODEL_PARAMS) self.model.enableInference({'predictedField': 'cpu'}) self.model.enableInference({'predictedField': 'memory'}) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood()
def __init__(self, modelConfig, inferenceArgs, metricSpecs, sourceSpec, sinkSpec=None): """Initialization. Args: modelConfig: The model config dict. metricSpecs: A sequence of MetricSpec instances. sourceSpec: Path to the source CSV file. sinkSpec: Path to the sink CSV file. """ self.model = ModelFactory.create(modelConfig) self.model.enableInference(inferenceArgs) self.metricsManager = MetricsManager(metricSpecs, self.model.getFieldInfo(), self.model.getInferenceType()) self.sink = None if sinkSpec is not None: # TODO: make this work - sinkSpec not yet supported. raise NotImplementedError('The sinkSpec is not yet implemented.') #self.sink = BasicPredictionLogger( # self.model.getFieldInfo(), sinkSpec, 'myOutput', # self.model.getInferenceType()) #self.sink.setLoggedMetrics( # self.metricsManager.getMetricLabels()) self.datasetReader = BasicDatasetReader(sourceSpec)
def initialize(self): # Get config params, setting the RDSE resolution rangePadding = abs(self.inputMax - self.inputMin) * 0.2 modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin - rangePadding, maxVal=self.inputMax + rangePadding, minResolution=0.001, tmImplementation="cpp")["modelConfig"] self._setupEncoderParams( modelParams["modelParams"]["sensorParams"]["encoders"]) self.model = ModelFactory.create(modelParams) self.model.enableInference({"predictedField": "value"}) if self.useLikelihood: # Initialize the anomaly likelihood object numentaLearningPeriod = int( math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100)
def run_cpu_experiment(): input_file = "cpu.csv" cur_dir = os.getcwd() input_file = os.path.join(cur_dir,'cpu/cpu.csv') cpu_generate_data.run(input_file) model_params = swarm_over_data(SWARM_CONFIG) model = ModelFactory.create(model_params) model.enableInference({"predictedField": "cpu"}) #To load with no swarming #model = ModelFactory.create(model_params) if PLOT: output = NuPICPlotOutput("cpu/final_cpu_output") else: output = NuPICFileOutput("cpu/final_cpu_output") with open(input_file, "rb") as cpu_input: csv_reader = csv.reader(cpu_input) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() # the real data sumOfUtilityFitness=0.0 sumOfWeaight = 0.0 for row in csv_reader: timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) cpu = float(row[1]) result = model.run({"cpu": cpu}) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] anomalyLikelihood = anomalyLikelihoodHelper.anomalyProbability(cpu, anomalyScore, timestamp) uc= (anomalyLikelihood * cpu + prediction * anomalyLikelihood )/(anomalyScore + anomalyLikelihood) sumOfUtilityFitness= sumOfUtilityFitness + (float(cpu) * float(anomalyLikelihood)) sumOfWeaight = sumOfWeaight + float(anomalyLikelihood) output.write(timestamp, cpu, prediction, anomalyScore) output.close() print 'sumOfWeaight: ', sumOfWeaight, 'sumOfUtilityFitness: ', sumOfUtilityFitness result_output = 'cpu/final_cpu_output_out.csv' utilityOfCpu=0.0 with open(result_output, "rb") as result_input: csv_reader = csv.reader(result_input) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() for row in csv_reader: anomalyLikelihood = float(row[3]) utilityOfCpu= utilityOfCpu + (anomalyLikelihood * sumOfUtilityFitness)/sumOfWeaight print 'utilityOfCpu: ', utilityOfCpu move_model()
def POST(self, name): """ /models/{name} schema: { "modelParams": dict containing model parameters "predictedFieldName": str } returns: {"success":name} """ global g_models data = json.loads(web.data()) modelParams = data["modelParams"] predictedFieldName = data["predictedFieldName"] if name in g_models.keys(): raise web.badrequest("Model with name <%s> already exists" % name) model = ModelFactory.create(modelParams) model.enableInference({'predictedField': predictedFieldName}) g_models[name] = model return json.dumps({"success": name})
def test_hotgym_anomalyScore_stays_below_50_perc_after_110_rows(self): """ Tests that the hotgym anomalyScore values stays below 50% after feeding in 110 rows of data. """ model = ModelFactory.create(rec_center_hourly_model_params.MODEL_PARAMS) model.enableInference({"predictedField": "kw_energy_consumption"}) inputFile = open(CSV_DATA, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() rowCount = 0 for row in csvReader: rowCount += 1 timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) consumption = float(row[1]) result = model.run({ "timestamp": timestamp, "kw_energy_consumption": consumption }) anomalyScore = result.inferences["anomalyScore"] print "row %i: %f" % (rowCount, anomalyScore) if rowCount >= START_AT_ROW: unittest.TestCase.assertGreater(self, ANOMALY_THRESHOLD, anomalyScore, "Anomaly score exceeded threshold of %f after %i rows of data." % ( ANOMALY_THRESHOLD, rowCount)) break inputFile.close()
def getData(): model = ModelFactory.create(MODEL_PARAMS) model.enableInference({"predictedField": "heartrate"}) contentType = request.headers.get('Content-Type') if 'application/json' or 'application/xml' not in contentType: splitted = (request.data).split("\r\n") print "model olustu" print "***********************************************" for i in splitted: splitted = i.strip().split(",") print i print splitted timestamp = str(splitted[0]) rate = float(splitted[1]) result = model.run({"timestamp": timestamp, "heartrate": rate}) result1 = { 'prediction': result.inferences["multiStepBestPredictions"][1], 'anomalyScore': result.inferences["anomalyScore"] } print result1 else: print "Not Supported Type" ##print timestamp + "-->"+ str(prediction) + "-->" + str(anomalyScore) + "\n" return json.dumps({"status": True})
def POST(self, name): """ /models/{name} schema: { "modelParams": dict containing model parameters "predictedFieldName": str } returns: {"success":name} """ global g_models data = json.loads(web.data()) modelParams = data["modelParams"] predictedFieldName = data["predictedFieldName"] if name in list(g_models.keys()): raise web.badrequest("Model with name <%s> already exists" % name) model = ModelFactory.create(modelParams) model.enableInference({'predictedField': predictedFieldName}) g_models[name] = model return json.dumps({"success": name})
def __init__(self, numPredictions, resultsDir): random.seed(43) self.numPredictions = numPredictions if not os.path.exists(resultsDir): os.makedirs(resultsDir) self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) self.shifter = InferenceShifter() self.mapping = getEncoderMapping(self.model) self.correct = [] self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.iteration = 0 self.perturbed = False self.randoms = [] self.verbosity = 1 self.dataset = HighOrderDataset(numPredictions=self.numPredictions) self.sequences = [] self.currentSequence = [] self.replenish_sequence()
def run_sine_experiment(): input_file = "netio.csv" generate_data.run(input_file) model_params = swarm_over_data() if PLOT: output = NuPICPlotOutput("netio_output", show_anomaly_score=True) else: output = NuPICFileOutput("netio_output", show_anomaly_score=True) model = ModelFactory.create(model_params) model.enableInference({"predictedField": "bytes_sent"}) with open(input_file, "rb") as netio_input: csv_reader = csv.reader(netio_input) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() # the real data for row in csv_reader: timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) bytes_sent = float(row[1]) #netio = float(row[3]) result = model.run({"bytes_sent": bytes_sent}) output.write(timestamp, bytes_sent, result, prediction_step=1) output.close()
def __init__(self, predictStep, enablePredict, maxValue, minValue, minResolution): # initial the parameters and data variables. self.predictStep = predictStep self.enablePredict = enablePredict self.metricData = xrange(int(minValue), int(maxValue), int((maxValue - minValue) / minResolution)) self.maxValue = maxValue self.minValue = minValue self.minResolution = minResolution self.timestamp = None self.actualValue = None self.predictValue = None self.anomalyScore = None self.modelResult = None self.output = None # get the model parameters. self.parameters = getScalarMetricWithTimeOfDayAnomalyParams( self.metricData, self.minValue, self.maxValue, self.minResolution) # make sure the result contains the predictions. self.parameters["modelConfig"]["modelParams"][ "clEnable"] = self.enablePredict # so we can modify the predict step by do that: self.parameters["modelConfig"]["modelParams"]["clParams"][ "steps"] = self.predictStep # create the model self.model = ModelFactory.create(self.parameters["modelConfig"]) self.model.enableInference(self.parameters["inferenceArgs"])
def test_hotgym_anomalyScore_stays_below_50_perc_after_110_rows(self): """ Tests that the hotgym anomalyScore values stays below 50% after feeding in 110 rows of data. """ model = ModelFactory.create( rec_center_hourly_model_params.MODEL_PARAMS) model.enableInference({"predictedField": "kw_energy_consumption"}) inputFile = open(CSV_DATA, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() rowCount = 0 for row in csvReader: rowCount += 1 timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) consumption = float(row[1]) result = model.run({ "timestamp": timestamp, "kw_energy_consumption": consumption }) anomalyScore = result.inferences["anomalyScore"] print "row %i: %f" % (rowCount, anomalyScore) if rowCount >= START_AT_ROW: unittest.TestCase.assertGreater( self, ANOMALY_THRESHOLD, anomalyScore, "Anomaly score exceeded threshold of %f after %i rows of data." % (ANOMALY_THRESHOLD, rowCount)) break inputFile.close()
def initialize(self): # Get config params, setting the RDSE resolution rangePadding = abs(self.inputMax - self.inputMin) * 0.2 modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin-rangePadding, maxVal=self.inputMax+rangePadding, minResolution=0.001, tmImplementation="tm_cpp" )["modelConfig"] self._setupEncoderParams( modelParams["modelParams"]["sensorParams"]["encoders"]) self.model = ModelFactory.create(modelParams) self.model.enableInference({"predictedField": "value"}) # Initialize the anomaly likelihood object numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod-numentaLearningPeriod, reestimationPeriod=100 )
def testModelParams(self): """ Test that clusterParams loads returns a valid dict that can be instantiated as a HTMPredictionModel. """ params = getScalarMetricWithTimeOfDayAnomalyParams([0], minVal=23.42, maxVal=23.420001) encodersDict = ( params['modelConfig']['modelParams']['sensorParams']['encoders']) model = ModelFactory.create(modelConfig=params['modelConfig']) self.assertIsInstance( model, HTMPredictionModel, "JSON returned cannot be used to create a model") # Ensure we have a time of day field self.assertIsNotNone(encodersDict['c0_timeOfDay']) # Ensure resolution doesn't get too low if encodersDict['c1']['type'] == 'RandomDistributedScalarEncoder': self.assertGreaterEqual(encodersDict['c1']['resolution'], 0.001, "Resolution is too low") # Ensure tm_cpp returns correct json file params = getScalarMetricWithTimeOfDayAnomalyParams( [0], tmImplementation="tm_cpp") self.assertEqual( params['modelConfig']['modelParams']['tmParams']['temporalImp'], "tm_cpp", "Incorrect json for tm_cpp tmImplementation") # Ensure incorrect tmImplementation throws exception with self.assertRaises(ValueError): getScalarMetricWithTimeOfDayAnomalyParams([0], tmImplementation="")
def run_mem_experiment(): input_file = "cpu.csv" generate_data.run(input_file) model_params = swarm_over_data(SWARM_CONFIG) if PLOT: output = NuPICPlotOutput("final_mem_output") else: output = NuPICFileOutput("final_mem_output") model = ModelFactory.create(model_params) model.enableInference({"predictedField": "mem"}) with open(input_file, "rb") as sine_input: csv_reader = csv.reader(sine_input) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() # the real data for row in csv_reader: timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) mem = float(row[1]) result = model.run({"mem": mem}) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] output.write(timestamp, mem, prediction, anomalyScore) output.close()
def createModel(InputName): """ Given a model params dictionary, create a CLA Model. Automatically enables inference for predicted field. :param modelParams: Model params dict :return: OPF Model object """ CsvCol,CsvDataTypes,CsvData,csvMin,csvMax,csvStd = getNewParams(InputName) # Try to find already existing params file try: params = getModelParamsFromName(InputName) steps = int(params["modelConfig"]["modelParams"]["clParams"]["steps"]) params["inferenceArgs"] = {'inputPredictedField':'auto', 'predictionSteps': [steps],'predictedField': CsvCol[1]} print 'swarm file found, using given values' except: print 'swarm file NOT found, using generic values' minResolution = 0.001 tmImplementation = "cpp" # Load model parameters and update encoder params params = GenericParams(tmImplementation) _fixupRandomEncoderParams(params, CsvCol, CsvDataTypes, CsvData, csvMin, csvMax, csvStd, minResolution) params["inferenceArgs"]["predictedField"] = CsvCol[1] params['modelConfig']['modelParams']['clEnable'] = True model = ModelFactory.create(modelConfig=params["modelConfig"]) model.enableLearning() model.enableInference(params["inferenceArgs"]) return model
def run_sine_experiment(): input_file = "sine.csv" generate_data.run(input_file) model_params = swarm_over_data() if PLOT: output = NuPICPlotOutput("sine_output", show_anomaly_score=True) else: output = NuPICFileOutput("sine_output", show_anomaly_score=True) model = ModelFactory.create(model_params) model.enableInference({"predictedField": "sine"}) with open(input_file, "rb") as sine_input: csv_reader = csv.reader(sine_input) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() # the real data for row in csv_reader: angle = float(row[0]) sine_value = float(row[1]) result = model.run({"sine": sine_value}) output.write(angle, sine_value, result, prediction_step=1) output.close()
def get_cpu_observation(): tstart = time.time() end = str(tstart + 30) start = str(tstart) model_cpu_model = ModelFactory.create(model_cpu.MODEL_PARAMS) model_cpu_model.enableInference({"predictedField": "cpu"}) #response = requests.get('http://*****:*****@'+prometheus+':9090/api/v1/query?query=sum(irate(node_cpu_seconds_total%7Bmode%3D%22idle%22%7D%5B30s%5D)%20*%20on(instance)%20group_left(node_name)%20node_meta%7Bnode_id%3D~%22.%2B%22%7D)%20*%20100%20%2F%20count(node_cpu%7Bmode%3D%22user%22%7D%20*%20on(instance)%20group_left(node_name)%20node_meta%7Bnode_id%3D~%22.%2B%22%7D)%20&start=1544788200&end=1544788260&step=30', timeout=5) response = requests.get( 'http://*****:*****@' + prometheus + ':9090/api/v1/query_range?query=sum(irate(node_cpu_seconds_total%7Bmode%3D%22idle%22%7D%5B30s%5D)%20*%20on(instance)%20group_left(node_name)%20node_meta%7Bnode_id%3D~%22.%2B%22%7D)%20*%20100%20%2F%20count(node_cpu_seconds_total%7Bmode%3D%22user%22%7D%20*%20on(instance)%20group_left(node_name)%20node_meta%7Bnode_id%3D~%22.%2B%22%7D)%20&start=' + start + '&end=' + end + '&step=30', timeout=5) results = response.json() cpuData = results['data']['result'] result = 0 prediction = 0 anomalyScore = 0 anomalyLikelihood = 0 utility_cpu = 0 cpu = 0 js = None if len(cpuData) > 0: cpuValue = cpuData[0]['values'] #print (cpuValue[0][0]) timestamp = datetime.datetime.fromtimestamp(float( cpuValue[0][0])).strftime('%m/%d/%y %H:%M') cpu = 100 - float(cpuValue[0][1]) result = model_cpu_model.run({"cpu": cpu}) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] anomalyLikelihood = anomalyLikelihoodHelper.anomalyProbability( cpu, anomalyScore, timestamp) utility_cpu = anomalyLikelihood * cpu #print 'time: ', timestamp, ' cpu Usage: ',cpu , 'utility_cpu: ', utility_cpu #cpu_axis=[cpu, prediction, anomalyScore, anomalyLikelihood, utility_cpu] data = { 'cpu': float(cpu), 'prediction': float(prediction), 'anomalyScore': float(anomalyScore), 'anomalyLikelihood': float(anomalyLikelihood), 'utility_cpu': float(utility_cpu) } js = json.dumps(data) else: data = { 'cpu': float(0), 'prediction': float(0), 'anomalyScore': float(0), 'anomalyLikelihood': float(0), 'utility_cpu': float(0) } js = json.dumps(data) resp = Response(js, status=200, mimetype='application/json') resp.headers['Link'] = 'http://nupicapi.8888' return resp
def swarmModel(basedir, column): filename = os.path.join(_FILE_PATH, "data\\\swarm_params") model_params = permutations_runner.runWithJsonFile(filename, { 'maxWorkers': 1, 'overwrite': True, 'verbosityCount': 3 }, column + "_swarm", basedir) model = ModelFactory.create(model_params)
def get_mem_observation(): tstart = time.time() end = str(tstart + 30) start = str(tstart) model_mem_model = ModelFactory.create(model_mem.MODEL_PARAMS) model_mem_model.enableInference({"predictedField": "mem"}) response = requests.get( 'http://*****:*****@' + prometheus + ':9090/api/v1/query_range?query=sum((node_memory_MemAvailable_bytes%20%2F%20node_memory_MemTotal_bytes)%20*%20on(instance)%20group_left(node_name)%20node_meta%7Bnode_id%3D~%22.%2B%22%7D%20*%20100)%20%2F%20count(node_meta%20*%20on(instance)%20group_left(node_name)%20node_meta%7Bnode_id%3D~%22.%2B%22%7D)&start=' + start + '&end=' + end + '&step=30', timeout=5) results = response.json() memData = results['data']['result'] result = 0 prediction = 0 anomalyScore = 0 anomalyLikelihood = 0 utility_mem = 0 mem = 0 js = None if len(memData) > 0: #print(memData) memValue = memData[0]['values'] #print(memValue) timestamp = datetime.datetime.fromtimestamp(float( memValue[0][0])).strftime('%m/%d/%y %H:%M') mem = 100 - float(memValue[0][1]) #print 'time: ', timestamp, ' mem Usage: ',mem result = model_mem_model.run({"mem": mem}) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] anomalyLikelihood = anomalyLikelihoodHelper.anomalyProbability( mem, anomalyScore, timestamp) utility_mem = anomalyLikelihood * mem data = { 'mem': float(mem), 'prediction': float(prediction), 'anomalyScore': float(anomalyScore), 'anomalyLikelihood': float(anomalyLikelihood), 'utility_mem': float(utility_mem) } js = json.dumps(data) else: data = { 'mem': float(0.0), 'prediction': float(0), 'anomalyScore': float(0), 'anomalyLikelihood': float(0), 'utility_mem': float(0) } js = json.dumps(data) resp = Response(js, status=200, mimetype='application/json') resp.headers['Link'] = 'http://nupicapi.8888' return resp return np.array(mem_axis)
def createModel(model_par): """ Creates the HTM model :param model_params : parameters for model """ model = ModelFactory.create(model_par) model.enableInference({"predictedField": "value"}) return model
def __init__(self): #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp") with open("model_params.json") as fp: self.model_params = json.load(fp) print self.model_params self.newmodel = ModelFactory.create(self.model_params) self.newmodel.enableLearning() self.newmodel.enableInference({"predictedField": "value"}) self.DATE_FORMAT = "%d/%m/%Y %H:%M" self.anomalylikelihood = AnomalyLikelihood()
def createModel(modelParams): """ Given a model params dictionary, create a CLA Model. Automatically enables inference for kw_energy_consumption. :param modelParams: Model params dict :return: OPF Model object """ model = ModelFactory.create(modelParams) model.enableInference({"predictedField": "kw_energy_consumption"}) return model
def createModel(modelParams): """ Given a model params dictionary, create a CLA Model. Automatically enables inference for metric_value. :param modelParams: Model params dict :return: OPF Model object """ model = ModelFactory.create(modelParams) model.enableInference({"predictedField": "metric_value"}) return model
def get_disk_observation(): tstart = time.time() end = str(tstart + 30) start = str(tstart) model_disk_model = ModelFactory.create(model_disk.MODEL_PARAMS) model_disk_model.enableInference({"predictedField": "disk"}) response = requests.get( 'http://*****:*****@' + prometheus + ':9090/api/v1/query?query=sum((node_disk_io_time_weighted_seconds_total))%20%2F%20avg(node_disk_io_time_weighted_seconds_total)%20*%20count(node_meta%20*%20on(instance)%20group_left(node_name)%20node_meta%7Bnode_id%3D~".%2B"%7D)&start=' + start + '& end=' + end + '&step=30', timeout=5) results = response.json() diskData = results['data']['result'] result = 0 prediction = 0 anomalyScore = 0 anomalyLikelihood = 0 utility_disk = 0 disk = 0 js = None if len(diskData) > 0: diskValue = diskData[0]['value'] timestamp = datetime.datetime.fromtimestamp(float( diskValue[0])).strftime('%m/%d/%y %H:%M') disk = 100 - float(diskValue[1]) #print 'time: ', timestamp,' disk Usage: ',disk result = model_disk_model.run({"disk": disk}) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] anomalyLikelihood = anomalyLikelihoodHelper.anomalyProbability( disk, anomalyScore, timestamp) utility_disk = anomalyLikelihood * disk data = { 'disk': float(disk), 'prediction': float(prediction), 'anomalyScore': float(anomalyScore), 'anomalyLikelihood': float(anomalyLikelihood), 'utility_disk': float(utility_disk) } js = json.dumps(data) else: data = { 'disk': float(0.0), 'prediction': float(0), 'anomalyScore': float(0), 'anomalyLikelihood': float(0), 'utility_disk': float(0) } js = json.dumps(data) resp = Response(js, status=200, mimetype='application/json') resp.headers['Link'] = 'http://nupicapi.8888' return resp
def run_sine_experiment(): input_file = "cpu.csv" #generate_data.run(input_file) #model_params = swarm_over_data() attempt = 0 if PLOT: output = NuPICPlotOutput("final_cpu_output") else: output = NuPICFileOutput("final_cpu_output") #model = ModelFactory.create(model_params) model = ModelFactory.create(model_params.MODEL_PARAMS) model.enableInference({"predictedField": "cpu"}) adapter = 0 for row in range(1, 300): s = time.strftime(DATE_FORMAT) timestamp = datetime.datetime.strptime(s, DATE_FORMAT) #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) cpu1 = psutil.cpu_percent(interval=1) cpu = float(cpu1) result = model.run({"cpu": cpu}) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] anomalyLikelihood = anomalyLikelihoodHelper.anomalyProbability( cpu, anomalyScore, timestamp) print 'anomalyLikelihood: ', anomalyLikelihood if anomalyScore > 0.75: print "anomalyScore is high: ", 'anomalyScore: ', str( anomalyScore ), 'anomalyLikelihood: ', anomalyLikelihood, " CPU@: ", cpu, " steps: ", str( adapter) adapter = adapter + 20 if adapter >= 300: run_adaptation_strategy(attempt, cpu, anomalyLikelihood) attempt += 1 adapter = 0 print "reset timer for new adaptation action" else: print "anomalyScore is high: ", 'anomalyScore: ', str( anomalyScore ), 'anomalyLikelihood: ', anomalyLikelihood, " CPU@: ", cpu, " steps: ", str( adapter) run_adaptation_strategy(attempt, cpu, anomalyLikelihood) attempt += 1 #with open("/tmp/output.log", "w") as loutput: #subprocess.call("docker service scale web=1", shell=True, stdout=loutput, stderr=loutput) #output.write(timestamp, cpu, prediction, anomalyScore) try: plt.pause(SECONDS_PER_STEP) except: pass row += 1
def run_mem_experiment(): cur_dir = os.getcwd() input_file = source = os.path.join(cur_dir, 'mem/mem.csv') print input_file #mem_generate_data.run(input_file) print 'input_file', input_file model_params = swarm_over_data() model = ModelFactory.create(model_params) model.enableInference({"predictedField": "mem"}) out_file = 'mem/final_mem.csv' if PLOT: output = NuPICPlotOutput(out_file) else: output = NuPICFileOutput(out_file) with open(input_file, "rb") as sine_input: csv_reader = csv.reader(sine_input) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() # the real data sumOfUtilityFitness = 0.0 sumOfWeaight = 0.0 for row in csv_reader: timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) mem = float(row[1]) result = model.run({"mem": mem}) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] anomalyLikelihood = anomalyLikelihoodHelper.anomalyProbability( mem, anomalyScore, timestamp) sumOfUtilityFitness = sumOfUtilityFitness + ( float(mem) * float(anomalyLikelihood)) sumOfWeaight = sumOfWeaight + float(anomalyLikelihood) output.write(timestamp, mem, prediction, anomalyScore) output.close() print 'sumOfWeaight: ', sumOfWeaight, 'sumOfUtilityFitness: ', sumOfUtilityFitness result_output = 'mem/final_mem_out.csv' utilityOfmem = 0.0 with open(result_output, "rb") as result_input: csv_reader = csv.reader(result_input) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() for row in csv_reader: anomalyLikelihood = float(row[3]) utilityOfmem = utilityOfmem + (anomalyLikelihood * sumOfUtilityFitness) / sumOfWeaight print 'utilityOfmem: ', utilityOfmem move_model()
def initalizeModels(): MODELS = [] for index in range(len(MODEL_NAMES)): model = ModelFactory.create(getModelParamsFromName(MODEL_NAMES[index])) model.enableInference({"predictedField": MODEL_NAMES[index]}) MODELS.append(model) anomaly = nupic_output.NuPICFileOutput(SYSTEM_NAME) ANOMALY_OBJ = anomaly.anomalyLikelihoodHelper return MODELS, ANOMALY_OBJ
def createModel(useTimeEncoders, scale, verbose): params = model_params.MODEL_PARAMS if useTimeEncoders: params = addTimeEncoders(params) if scale: params = setEncoderScale(params, scale) if verbose: print "Model parameters:" print params model = ModelFactory.create(params) model.enableInference({"predictedField": "vector"}) return model
def __createModel(self, expDir): # ----------------------------------------------------------------------- # Load the experiment's description.py module descriptionPyModule = helpers.loadExperimentDescriptionScriptFromDir( expDir) expIface = helpers.getExperimentDescriptionInterfaceFromModule( descriptionPyModule) # ----------------------------------------------------------------------- # Construct the model instance modelDescription = expIface.getModelDescription() return ModelFactory.create(modelDescription)
def get_net_observation(): tstart = time.time() end = str(tstart + 30) start = str(tstart) model_net = ModelFactory.create(net_params.MODEL_PARAMS) model_net.enableInference({"predictedField": "bytes_sent"}) response = requests.get( 'http://*****:*****@' + prometheus + ':9090/api/v1/query?query=sum(rate(container_network_receive_bytes_total%7Bcontainer_label_com_docker_swarm_node_id%3D~".%2B"%7D%5B30s%5D))%20by%20(container_label_com_docker_swarm_service_name)&start=' + start + '& end=' + end + '&step=30', timeout=5) results = response.json() diskData = results['data']['result'] result = 0 prediction = 0 anomalyScore = 0 anomalyLikelihood = 0 utility_net = 0 net = 0 js = None if len(diskData) > 0: diskValue = diskData[0]['value'] timestamp = datetime.datetime.fromtimestamp(float( diskValue[0])).strftime('%m/%d/%y %H:%M') net = float(diskValue[1]) #print 'time: ', timestamp,' net bytes_sent: ',net result = model_net.run({"bytes_sent": net}) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] anomalyLikelihood = anomalyLikelihoodHelper.anomalyProbability( net, anomalyScore, timestamp) utility_net = anomalyLikelihood * net data = { 'net': float(net), 'prediction': float(prediction), 'anomalyScore': float(anomalyScore), 'anomalyLikelihood': float(anomalyLikelihood), 'utility_net': float(utility_net) } js = json.dumps(data) else: data = { 'net': float(0), 'prediction': float(0), 'anomalyScore': float(0), 'anomalyLikelihood': float(0), 'utility_net': float(0) } js = json.dumps(data) resp = Response(js, status=200, mimetype='application/json') resp.headers['Link'] = 'http://nupicapi.8888/net' return resp
def createPredictionModel(): with open(MODEL_PARAMS_PATH, "r") as dataIn: modelParams = json.loads(dataIn.read()) minInput = 0 maxInput = 100 valueEncoderParams = \ modelParams["modelParams"]["sensorParams"]["encoders"]["value"] numBuckets = float(valueEncoderParams.pop("numBuckets")) resolution = max(0.001, (maxInput - minInput) / numBuckets) # 301.45 valueEncoderParams["resolution"] = resolution model = ModelFactory.create(modelParams) model.enableInference({"predictedField": "value"}) return model
def main(*args, **kwargs): """POS Experiment main entry point.""" (options, args) = parser.parse_args() verbosity = NLTKReader.WARN if options.verbose: verbosity = NLTKReader.DEBUG reader = NLTKReader(input='./resources/text', cache_dir='./cache/text', verbosity=verbosity) if options.text_info: reader.text_report() if options.list_texts: print 'Available texts:' for t in reader.available_texts(): print '\t%s' % t if options.input_text: target_text = options.input_text else: target_text = None if target_text is not None: if options.pos_report: print 'Parts of Speech found in %s:' % target_text for pos in reader.get_parts_of_speech(target_text): tag_description = reader.describe_tag(pos) print '\t%6s %s (%s)' % (pos, tag_description[0], tag_description[1]) else: output_dir = options.output_dir model = ModelFactory.create(run_pos_model_params.MODEL_PARAMS) model.enableInference({'predictedField': 'pos'}) if output_dir: if not os.path.exists(output_dir): os.mkdir(output_dir) output_file_path = os.path.join(output_dir, 'pos_out_' + target_text) # Clear the output file with a header. with open(output_file_path, 'w') as output_file: output_file.write('%10s%10s%20s\n' % ('input', 'pos', 'predicted_pos')) # Append each result to output file. with open(output_file_path, 'a') as output_file: run_pos_experiment(model, reader, target_text, output_file) else: run_pos_experiment(model, reader, target_text)
def main(*args, **kwargs): """POS Experiment main entry point.""" (options, args) = parser.parse_args() verbosity = NLTKReader.WARN if options.verbose: verbosity = NLTKReader.DEBUG reader = NLTKReader( input='./resources/text', cache_dir='./cache/text', verbosity=verbosity ) if options.text_info: reader.text_report() if options.list_texts: print 'Available texts:' for t in reader.available_texts(): print '\t%s' % t if options.input_text: target_text = options.input_text else: target_text = None if target_text is not None: if options.pos_report: print 'Parts of Speech found in %s:' % target_text for pos in reader.get_parts_of_speech(target_text): tag_description = reader.describe_tag(pos) print '\t%6s %s (%s)' % (pos, tag_description[0], tag_description[1]) else: output_dir = options.output_dir model = ModelFactory.create(run_pos_model_params.MODEL_PARAMS) model.enableInference({'predictedField': 'pos'}) if output_dir: if not os.path.exists(output_dir): os.mkdir(output_dir) output_file_path = os.path.join(output_dir, 'pos_out_' + target_text) # Clear the output file with a header. with open(output_file_path, 'w') as output_file: output_file.write('%10s%10s%20s\n' % ('input', 'pos', 'predicted_pos')) # Append each result to output file. with open(output_file_path, 'a') as output_file: run_pos_experiment(model, reader, target_text, output_file) else: run_pos_experiment(model, reader, target_text)
def createPredictionModel(dataFrame): with open(MODEL_PARAMS_PATH, "r") as dataIn: modelParams = json.loads(dataIn.read()) minInput, maxInput = getMinMax(dataFrame) # RDSE - resolution calculation valueEncoderParams = \ modelParams["modelParams"]["sensorParams"]["encoders"]["value"] numBuckets = float(valueEncoderParams.pop("numBuckets")) resolution = max(0.001, (maxInput - minInput) / numBuckets) valueEncoderParams["resolution"] = resolution model = ModelFactory.create(modelParams) model.enableInference({"predictedField": "value"}) return model
def __init__(self, modelConfig, inferenceArgs, metricSpecs, sourceSpec, sinkSpec=None): self.model = ModelFactory.create(modelConfig) self.model.enableInference(inferenceArgs) self.metricsManager = MetricsManager(metricSpecs, self.model.getFieldInfo(), self.model.getInferenceType()) self.sink = None if sinkSpec is not None: # TODO: make this work - sinkSpec not yet supported. raise NotImplementedError('The sinkSpec is not yet implemented.') #self.sink = BasicPredictionLogger( # self.model.getFieldInfo(), sinkSpec, 'myOutput', # self.model.getInferenceType()) #self.sink.setLoggedMetrics( # self.metricsManager.getMetricLabels()) self.datasetReader = BasicDatasetReader(sourceSpec)
def test_rule30_prediction_is_perfect_after_600_iterations(self): """ Generates Rule 30 elementary cellular automaton and passes it through NuPIC. Asserts that predictions are perfect after X rows of data. """ iterations = 600 model = ModelFactory.create(rule_30_model_params.MODEL_PARAMS) model.enableInference({"predictedField": PREDICTED_FIELD}) prediction_history = deque(maxlen=500) counter = [0] last_prediction = [None] def stream_handler(row, _): counter[0] += 1 input_row = {} for index, field in enumerate(row): input_row["bit_%i" % index] = str(field) prediction = last_prediction[0] predicted_index = int(PREDICTED_FIELD.split("_").pop()) value = str(row[predicted_index]) correct = (value == prediction) count = counter[0] if correct: prediction_history.append(1.0) else: prediction_history.append(0.0) correctness = reduce(lambda x, y: x + y, prediction_history) / len(prediction_history) if count == iterations: unittest.TestCase.assertEqual( self, 1.0, correctness, "Predictions should be 100 percent correct after reaching %i " "iterations." % iterations ) result = model.run(input_row) prediction = result.inferences["multiStepBestPredictions"][1] last_prediction[0] = prediction automaton = automatatron.Engine(RULE_NUMBER) automaton.run(handler=stream_handler, width=21, iterations=iterations)
def runCPU(): """Poll CPU usage, make predictions, and plot the results. Runs forever.""" # Create the model for predicting CPU usage. model = ModelFactory.create(model_params.MODEL_PARAMS) model.enableInference({'predictedField': 'cpu'}) # The shifter will align prediction and actual values. shifter = InferenceShifter() # Keep the last WINDOW predicted and actual values for plotting. actHistory = deque([0.0] * WINDOW, maxlen=60) predHistory = deque([0.0] * WINDOW, maxlen=60) # Initialize the plot lines that we will update with each new record. actline, = plt.plot(range(WINDOW), actHistory) predline, = plt.plot(range(WINDOW), predHistory) # Set the y-axis range. actline.axes.set_ylim(0, 100) predline.axes.set_ylim(0, 100) while True: s = time.time() # Get the CPU usage. cpu = psutil.cpu_percent() # Run the input through the model and shift the resulting prediction. modelInput = {'cpu': cpu} result = shifter.shift(model.run(modelInput)) # Update the trailing predicted and actual value deques. inference = result.inferences['multiStepBestPredictions'][5] if inference is not None: actHistory.append(result.rawInput['cpu']) predHistory.append(inference) # Redraw the chart with the new data. actline.set_ydata(actHistory) # update the data predline.set_ydata(predHistory) # update the data plt.draw() plt.legend( ('actual','predicted') ) # Make sure we wait a total of 2 seconds per iteration. try: plt.pause(SECONDS_PER_STEP) except: pass
def testPredictedFieldAndInferenceEnabledAreSaved(self): m1 = ModelFactory.create(PY_MODEL_PARAMS) m1.enableInference({'predictedField': 'consumption'}) self.assertTrue(m1.isInferenceEnabled()) self.assertEqual(m1.getInferenceArgs().get('predictedField'), 'consumption') headers = ['timestamp', 'consumption'] record = [datetime.datetime(2013, 12, 12), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) m1.run(modelInput) # Serialize builderProto = HTMPredictionModelProto.new_message() m1.write(builderProto) # Construct HTMPredictionModelProto reader from populated builder readerProto = HTMPredictionModelProto.from_bytes(builderProto.to_bytes()) # Deserialize m2 = HTMPredictionModel.read(readerProto) self.assertTrue(m2.isInferenceEnabled()) self.assertEqual(m2.getInferenceArgs().get('predictedField'), 'consumption') # Running the desrialized m2 without redundant enableInference call should # work record = [datetime.datetime(2013, 12, 14), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) m2.run(modelInput) # Check that disabled inference is saved, too (since constructor defaults to # enabled at time of this writing) m1.disableInference() self.assertFalse(m1.isInferenceEnabled()) builderProto = HTMPredictionModelProto.new_message() m1.write(builderProto) readerProto = HTMPredictionModelProto.from_bytes(builderProto.to_bytes()) m3 = HTMPredictionModel.read(readerProto) self.assertFalse(m3.isInferenceEnabled())
def createModel(modelParams): model = ModelFactory.create(modelParams) model.enableInference({"predictedField": "kw_energy_consumption"}) return model
def create_model(model_params, bin): model = ModelFactory.create(model_params) model.enableInference({"predictedField": bin}) return model
def testHelloWorldPrediction(self): text = 'hello world.' categories = list("abcdefghijklmnopqrstuvwxyz 1234567890.") colsPerChar = 11 numColumns = (len(categories) + 1) * colsPerChar MODEL_PARAMS = { "model": "HTMPrediction", "version": 1, "predictAheadTime": None, "modelParams": { "inferenceType": "TemporalMultiStep", "sensorParams": { "verbosity": 0, "encoders": { "token": { "fieldname": u"token", "name": u"token", "type": "CategoryEncoder", "categoryList": categories, "w": colsPerChar, "forced": True, } }, "sensorAutoReset": None, }, "spEnable": False, "spParams": { "spVerbosity": 0, "globalInhibition": 1, "columnCount": 2048, "inputWidth": 0, "numActiveColumnsPerInhArea": 40, "seed": 1956, "columnDimensions": 0.5, "synPermConnected": 0.1, "synPermActiveInc": 0.1, "synPermInactiveDec": 0.01, "boostStrength": 0.0, }, "tmEnable": True, "tmParams": { "verbosity": 0, "columnCount": numColumns, "cellsPerColumn": 16, "inputWidth": numColumns, "seed": 1960, "temporalImp": "tm_cpp", "newSynapseCount": 6, "maxSynapsesPerSegment": 11, "maxSegmentsPerCell": 32, "initialPerm": 0.21, "permanenceInc": 0.1, "permanenceDec": 0.05, "globalDecay": 0.0, "maxAge": 0, "minThreshold": 3, "activationThreshold": 5, "outputType": "normal", }, "clParams": { "implementation": "py", "regionName": "SDRClassifierRegion", "verbosity": 0, "alpha": 0.1, "steps": "1", }, "trainSPNetOnlyIfRequested": False, }, } model = ModelFactory.create(MODEL_PARAMS) model.enableInference({"predictedField": "token"}) model.enableLearning() # train prediction = None for rpt in xrange(20): for token in text: if prediction is not None: if rpt > 15: self.assertEqual(prediction, token) modelInput = {"token": token} result = model.run(modelInput) prediction = sorted(result.inferences["multiStepPredictions"][1].items(), key=itemgetter(1), reverse=True)[0][0] model.resetSequenceStates() prediction = None
def createModel(): return ModelFactory.create(model_params.MODEL_PARAMS)
def createModel(modelParams): model = ModelFactory.create(modelParams) model.enableInference({"predictedField": predictedField}) return model
def _runExperimentImpl(options, model=None): """Creates and runs the experiment Args: options: namedtuple ParseCommandLineOptionsResult model: For testing: may pass in an existing OPF Model instance to use instead of creating a new one. Returns: reference to OPFExperiment instance that was constructed (this is provided to aid with debugging) or None, if none was created. """ json_helpers.validate(options.privateOptions, schemaDict=g_parsedPrivateCommandLineOptionsSchema) # Load the experiment's description.py module experimentDir = options.experimentDir descriptionPyModule = helpers.loadExperimentDescriptionScriptFromDir( experimentDir) expIface = helpers.getExperimentDescriptionInterfaceFromModule( descriptionPyModule) # Handle "list checkpoints" request if options.privateOptions['listAvailableCheckpoints']: _printAvailableCheckpoints(experimentDir) return None # Load experiment tasks experimentTasks = expIface.getModelControl().get('tasks', []) # If the tasks list is empty, and this is a nupic environment description # file being run from the OPF, convert it to a simple OPF description file. if (len(experimentTasks) == 0 and expIface.getModelControl()['environment'] == OpfEnvironment.Nupic): expIface.convertNupicEnvToOPF() experimentTasks = expIface.getModelControl().get('tasks', []) # Ensures all the source locations are either absolute paths or relative to # the nupic.datafiles package_data location. expIface.normalizeStreamSources() # Extract option newSerialization = options.privateOptions['newSerialization'] # Handle listTasks if options.privateOptions['listTasks']: print "Available tasks:" for label in [t['taskLabel'] for t in experimentTasks]: print "\t", label return None # Construct the experiment instance if options.privateOptions['runCheckpointName']: assert model is None checkpointName = options.privateOptions['runCheckpointName'] model = ModelFactory.loadFromCheckpoint( savedModelDir=_getModelCheckpointDir(experimentDir, checkpointName), newSerialization=newSerialization) elif model is not None: print "Skipping creation of OPFExperiment instance: caller provided his own" else: modelDescription = expIface.getModelDescription() model = ModelFactory.create(modelDescription) # Handle "create model" request if options.privateOptions['createCheckpointName']: checkpointName = options.privateOptions['createCheckpointName'] _saveModel(model=model, experimentDir=experimentDir, checkpointLabel=checkpointName, newSerialization=newSerialization) return model # Build the task list # Default task execution index list is in the natural list order of the tasks taskIndexList = range(len(experimentTasks)) customTaskExecutionLabelsList = options.privateOptions['taskLabels'] if customTaskExecutionLabelsList: taskLabelsList = [t['taskLabel'] for t in experimentTasks] taskLabelsSet = set(taskLabelsList) customTaskExecutionLabelsSet = set(customTaskExecutionLabelsList) assert customTaskExecutionLabelsSet.issubset(taskLabelsSet), \ ("Some custom-provided task execution labels don't correspond " "to actual task labels: mismatched labels: %r; actual task " "labels: %r.") % (customTaskExecutionLabelsSet - taskLabelsSet, customTaskExecutionLabelsList) taskIndexList = [taskLabelsList.index(label) for label in customTaskExecutionLabelsList] print "#### Executing custom task list: %r" % [taskLabelsList[i] for i in taskIndexList] # Run all experiment tasks for taskIndex in taskIndexList: task = experimentTasks[taskIndex] # Create a task runner and run it! taskRunner = _TaskRunner(model=model, task=task, cmdOptions=options) taskRunner.run() del taskRunner if options.privateOptions['checkpointModel']: _saveModel(model=model, experimentDir=experimentDir, checkpointLabel=task['taskLabel'], newSerialization=newSerialization) return model
def resurrect_model(saved_model): return ModelFactory.loadFromCheckpoint(saved_model)
def run(): params = model_params.MODEL_PARAMS model = ModelFactory.create(params) model.enableInference({"predictedField": "vector"}) socket_cycle(model)
def main(): # Create HTM prediction model and enable inference on the page field model = ModelFactory.create(MODEL_PARAMS) model.enableInference({"predictedField": "page"}) # Use the model encoder to display the encoded SDRs the model will learn sdr_table = PrettyTable(field_names=["Page Category", "Encoded SDR (on bit indices)"], sortby="Page Category") sdr_table.align = "l" encoder = model._getEncoder() sdrout = np.zeros(encoder.getWidth(), dtype=np.bool) for page in PAGE_CATEGORIES: encoder.encodeIntoArray({"page": page}, sdrout) sdr_table.add_row([page, sdrout.nonzero()[0]]) print "The following table shows the encoded SDRs for every page " \ "category in the dataset" print sdr_table # At this point our model is configured and ready to learn the user sessions # Extract the learning data from MSNBC archive and stream it to the model filename = os.path.join(os.path.dirname(__file__), "msnbc990928.zip") with zipfile.ZipFile(filename) as archive: with archive.open("msnbc990928.seq") as datafile: # Skip header lines (first 7 lines) for _ in xrange(7): next(datafile) print print "Start learning page sequences using the first {} user " \ "sessions".format(LEARNING_RECORDS) model.enableLearning() for count in xrange(LEARNING_RECORDS): # Learn each user session as a single sequence session = readUserSession(datafile) model.resetSequenceStates() for page in session: model.run({"page": page}) # Simple progress status sys.stdout.write("\rLearned {} Sessions".format(count + 1)) sys.stdout.flush() print "\nFinished learning" model.disableLearning() # Use the new HTM model to predict next user session # The test data starts right after the learning data print print "Start Inference using a new user session from the dataset" prediction_table = PrettyTable(field_names=["Page", "Prediction"], hrules=prettytable.ALL) prediction_table.align["Prediction"] = "l" # Infer one page of the sequence at the time model.resetSequenceStates() session = readUserSession(datafile) for page in session: result = model.run({"page": page}) inferences = result.inferences["multiStepPredictions"][1] # Print predictions ordered by probabilities predicted = sorted(inferences.items(), key=itemgetter(1), reverse=True) prediction_table.add_row([page, zip(*predicted)[0]]) print "User Session to Predict: ", session print prediction_table print print "Compute prediction accuracy by checking if the next page in the " \ "sequence is within the predicted pages calculated by the model:" accuracy = computeAccuracy(model, 100, 1) print " - Prediction Accuracy:", accuracy accuracy = computeAccuracy(model, 100, 3) print " - Accuracy Predicting Top 3 Pages:", accuracy
def testTemporalAnomalyModelFactory(self): """ Simple test to assert that ModelFactory.create() with a given specific Temporal Anomaly configuration will return a model that can return inferences """ modelConfig = ( {u'aggregationInfo': {u'days': 0, u'fields': [], u'hours': 0, u'microseconds': 0, u'milliseconds': 0, u'minutes': 0, u'months': 0, u'seconds': 0, u'weeks': 0, u'years': 0}, u'model': u'HTMPrediction', u'modelParams': {u'anomalyParams': {u'anomalyCacheRecords': None, u'autoDetectThreshold': None, u'autoDetectWaitRecords': 5030}, u'clEnable': False, u'clParams': {u'alpha': 0.035828933612158, u'verbosity': 0, u'regionName': u'SDRClassifierRegion', u'steps': u'1'}, u'inferenceType': u'TemporalAnomaly', u'sensorParams': {u'encoders': {u'c0_dayOfWeek': None, u'c0_timeOfDay': {u'fieldname': u'c0', u'name': u'c0', u'timeOfDay': [21, 9.49122334747737], u'type': u'DateEncoder'}, u'c0_weekend': None, u'c1': {u'fieldname': u'c1', u'name': u'c1', u'resolution': 0.8771929824561403, u'seed': 42, u'type': u'RandomDistributedScalarEncoder'}}, u'sensorAutoReset': None, u'verbosity': 0}, u'spEnable': True, u'spParams': {u'potentialPct': 0.8, u'columnCount': 2048, u'globalInhibition': 1, u'inputWidth': 0, u'boostStrength': 0.0, u'numActiveColumnsPerInhArea': 40, u'seed': 1956, u'spVerbosity': 0, u'spatialImp': u'cpp', u'synPermActiveInc': 0.0015, u'synPermConnected': 0.1, u'synPermInactiveDec': 0.0005, }, u'tmEnable': True, u'tmParams': {u'activationThreshold': 13, u'cellsPerColumn': 32, u'columnCount': 2048, u'globalDecay': 0.0, u'initialPerm': 0.21, u'inputWidth': 2048, u'maxAge': 0, u'maxSegmentsPerCell': 128, u'maxSynapsesPerSegment': 32, u'minThreshold': 10, u'newSynapseCount': 20, u'outputType': u'normal', u'pamLength': 3, u'permanenceDec': 0.1, u'permanenceInc': 0.1, u'seed': 1960, u'temporalImp': u'cpp', u'verbosity': 0}, u'trainSPNetOnlyIfRequested': False}, u'predictAheadTime': None, u'version': 1} ) inferenceArgs = {u'inputPredictedField': u'auto', u'predictedField': u'c1', u'predictionSteps': [1]} data = [ {'_category': [None], '_reset': 0, '_sequenceId': 0, '_timestamp': datetime.datetime(2013, 12, 5, 0, 0), '_timestampRecordIdx': None, u'c0': datetime.datetime(2013, 12, 5, 0, 0), u'c1': 5.0}, {'_category': [None], '_reset': 0, '_sequenceId': 0, '_timestamp': datetime.datetime(2013, 12, 6, 0, 0), '_timestampRecordIdx': None, u'c0': datetime.datetime(2013, 12, 6, 0, 0), u'c1': 6.0}, {'_category': [None], '_reset': 0, '_sequenceId': 0, '_timestamp': datetime.datetime(2013, 12, 7, 0, 0), '_timestampRecordIdx': None, u'c0': datetime.datetime(2013, 12, 7, 0, 0), u'c1': 7.0} ] model = ModelFactory.create(modelConfig=modelConfig) model.enableLearning() model.enableInference(inferenceArgs) for row in data: result = model.run(row) self.assertIsInstance(result, ModelResult)
def createModel(): _setRandomEncoderResolution() return ModelFactory.create(model_params.MODEL_PARAMS)
predictedField = "kw_energy_consumption" elif dataSet == "nyc_taxi" or dataSet == "nyc_taxi_perturb" or dataSet =="nyc_taxi_perturb_baseline": DATE_FORMAT = '%Y-%m-%d %H:%M:%S' predictedField = "passenger_count" else: raise RuntimeError("un recognized dataset") modelParams = getModelParamsFromName(dataSet) modelParams['modelParams']['clParams']['steps'] = str(_options.stepsAhead) modelParams['modelParams']['clParams']['regionName'] = classifierType print "Creating model from %s..." % dataSet # use customized CLA model model = ModelFactory.create(modelParams) model.enableInference({"predictedField": predictedField}) model.enableLearning() model._spLearningEnabled = True model._tpLearningEnabled = True printTPRegionParams(model._getTPRegion()) inputData = "%s/%s.csv" % (DATA_DIR, dataSet.replace(" ", "_")) sensor = model._getSensorRegion() encoderList = sensor.getSelf().encoder.getEncoderList() if sensor.getSelf().disabledEncoder is not None: classifier_encoder = sensor.getSelf().disabledEncoder.getEncoderList() classifier_encoder = classifier_encoder[0] else:
def createModel(): with open(_PARAMS_PATH, "r") as f: modelParams = yaml.safe_load(f) return ModelFactory.create(modelParams)
def _runModelSerializationDeserializationChecks(self, modelParams): m1 = ModelFactory.create(modelParams) m1.enableInference({'predictedField': 'consumption'}) headers = ['timestamp', 'consumption'] record = [datetime.datetime(2013, 12, 12), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) m1.run(modelInput) # Serialize builderProto = HTMPredictionModelProto.new_message() m1.write(builderProto) # Construct HTMPredictionModelProto reader from populated builder readerProto = HTMPredictionModelProto.from_bytes(builderProto.to_bytes()) # Deserialize m2 = HTMPredictionModel.read(readerProto) self.assertEqual(m1.getInferenceType(), modelParams['modelParams']['inferenceType']) self.assertEqual(m1.getInferenceType(), m2.getInferenceType()) # Run computes on m1 & m2 and compare results record = [datetime.datetime(2013, 12, 14), numpy.random.uniform(100)] modelInput = dict(zip(headers, record)) # Use deepcopy to guarantee no input side-effect between calls r1 = m1.run(copy.deepcopy(modelInput)) r2 = m2.run(copy.deepcopy(modelInput)) # Compare results self.assertEqual(r2.predictionNumber, r1.predictionNumber) self.assertEqual(r2.rawInput, r1.rawInput) self.assertEqual(r2.sensorInput.dataRow, r1.sensorInput.dataRow) self.assertEqual(r2.sensorInput.dataDict, r1.sensorInput.dataDict) numpy.testing.assert_array_equal(r2.sensorInput.dataEncodings, r1.sensorInput.dataEncodings) self.assertEqual(r2.sensorInput.sequenceReset, r1.sensorInput.sequenceReset) self.assertEqual(r2.sensorInput.category, r1.sensorInput.category) self.assertEqual(r2.inferences, r1.inferences) self.assertEqual(r2.metrics, r1.metrics) self.assertEqual(r2.predictedFieldIdx, r1.predictedFieldIdx) self.assertEqual(r2.predictedFieldName, r1.predictedFieldName) numpy.testing.assert_array_equal(r2.classifierInput.dataRow, r1.classifierInput.dataRow) self.assertEqual(r2.classifierInput.bucketIndex, r1.classifierInput.bucketIndex) # Compre regions self.assertIsNotNone(m2._getSensorRegion()) self.assertEqual(m2._getSensorRegion(), m1._getSensorRegion()) self.assertIsNotNone(m2._getClassifierRegion()) self.assertEqual(m2._getClassifierRegion(), m1._getClassifierRegion()) self.assertIsNotNone(m2._getTPRegion()) self.assertEqual(m2._getTPRegion(), m1._getTPRegion()) self.assertIsNotNone(m2._getSPRegion()) self.assertEqual(m2._getSPRegion(), m1._getSPRegion())
def reset(self, params, repetition): random.seed(params['seed']) if params['dataset'] == 'simple': self.dataset = SimpleDataset() elif params['dataset'] == 'reber': self.dataset = ReberDataset(maxLength=params['max_length']) elif params['dataset'] == 'high-order': self.dataset = HighOrderDataset(numPredictions=params['num_predictions'], seed=params['seed'], smallAlphabet=params['use_small_alphabet']) print "Sequence dataset: " print " Symbol Number {}".format(self.dataset.numSymbols) for seq in self.dataset.sequences: print seq elif params['dataset'] == 'high-order-long': self.dataset = LongHighOrderDataset(params['sequence_length'], seed=params['seed']) print "Sequence dataset: " print " Symbol Number {}".format(self.dataset.numSymbols) for seq in self.dataset.sequences: print seq else: raise Exception("Dataset not found") self.randomStart = self.dataset.numSymbols + 1 self.randomEnd = self.randomStart + 5000 MODEL_PARAMS['modelParams']['sensorParams']['encoders']['element']\ ['categoryList'] = range(self.randomEnd) # if not os.path.exists(resultsDir): # os.makedirs(resultsDir) # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w') if params['verbosity'] > 0: print " initializing HTM model..." # print MODEL_PARAMS self.model = ModelFactory.create(MODEL_PARAMS) self.model.enableInference({"predictedField": "element"}) # self.classifier = SDRClassifier(steps=[1], alpha=0.001) print "finish initializing HTM model " if params['kill_cell_percent'] > 0: # a hack to use faulty temporal memory instead self.model._getTPRegion().getSelf()._tfdr = MonitoredFaultyTPShim( numberOfCols=2048, cellsPerColumn=32, newSynapseCount=32, maxSynapsesPerSegment=128, maxSegmentsPerCell=128, initialPerm=0.21, connectedPerm=0.50, permanenceInc=0.10, permanenceDec=0.10, predictedSegmentDecrement=0.01, minThreshold=15, activationThreshold=15, seed=1960, ) self.mapping = getEncoderMapping(self.model, self.dataset.numSymbols) self.numPredictedActiveCells = [] self.numPredictedInactiveCells = [] self.numUnpredictedActiveColumns = [] self.currentSequence = [] self.targetPrediction = [] self.replenish_sequence(params, iteration=0) self.resets = [] self.randoms = [] self.verbosity = 1 self.sequenceCounter = 0