def run(numRecords): ''' Run the Hot Gym example. ''' # Create a data source for the network. dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH) numRecords = min(numRecords, dataSource.getDataRowCount()) network = createNetwork(dataSource) network.regions["sensor"].getSelf().predictedField = "price" # Set predicted field network.regions["sensor"].setParameter("predictedField", "price") # Enable learning for all regions. network.regions["SP"].setParameter("learningMode", 1) network.regions["TM"].setParameter("learningMode", 1) network.regions["classifier"].setParameter("learningMode", 1) # Enable inference for all regions. network.regions["SP"].setParameter("inferenceMode", 1) network.regions["TM"].setParameter("inferenceMode", 1) network.regions["classifier"].setParameter("inferenceMode", 1) results = [] N = _RUN_EPOCH # Run the network, N iterations at a time. graph = Graph({ 'title': 'Bitcoin Prediction', 'y_label': 'price', 'y_lim': 'auto', 'prediction_num': 2, 'line_labels': ['1-step', '5-step'] }) for iteration in range(0, numRecords, N): if iteration % _RUN_INTERVAL == 0: network.run(N) price = network.regions["sensor"].getOutputData("sourceOut")[0] predictionResults = getPredictionResults(network, "classifier") oneStep = predictionResults[1]["predictedValue"] oneStepConfidence = predictionResults[1]["predictionConfidence"] fiveStep = predictionResults[5]["predictedValue"] fiveStepConfidence = predictionResults[5]["predictionConfidence"] result = (oneStep, oneStepConfidence * 100, fiveStep, fiveStepConfidence * 100) if iteration % _PRINT_INTERVAL == 0: print "iteration: {}".format(iteration) print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(*result) results.append(result) graph.write(price, [oneStep, fiveStep]) graph.close() return results
def runDemo(): dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH) numRecords = dataSource.getDataRowCount() print "Creating network" network = createNetwork(dataSource) outputPath = os.path.join(os.path.dirname(__file__), _OUTPUT_FILE_NAME) with open(outputPath, "w") as outputFile: writer = csv.writer(outputFile) print "Running network" print "Writing output to: %s" % outputPath runNetwork(network, numRecords, writer) print "Hierarchy demo finished"
def run(numRecords): ''' Run the Hot Gym example. ''' # Create a data source for the network. dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH) numRecords = min(numRecords, dataSource.getDataRowCount()) network = createNetwork(dataSource) network.regions["sensor"].getSelf().predictedField = "sine" # Set predicted field network.regions["sensor"].setParameter("predictedField", "sine") # Enable learning for all regions. network.regions["SP"].setParameter("learningMode", 1) network.regions["TM"].setParameter("learningMode", 1) network.regions["classifier"].setParameter("learningMode", 1) # Enable inference for all regions. network.regions["SP"].setParameter("inferenceMode", 1) network.regions["TM"].setParameter("inferenceMode", 1) network.regions["classifier"].setParameter("inferenceMode", 1) results = [] N = 1 # Run the network, N iterations at a time. output = nupic_output.NuPICPlotOutput("Sine", show_anomaly_score=True) for iteration in range(0, numRecords, N): network.run(N) sine = network.regions["sensor"].getOutputData("sourceOut")[0] predictionResults = getPredictionResults(network, "classifier") oneStep = predictionResults[1]["predictedValue"] oneStepConfidence = predictionResults[1]["predictionConfidence"] fiveStep = predictionResults[10]["predictedValue"] fiveStepConfidence = predictionResults[10]["predictionConfidence"] result = (oneStep, oneStepConfidence * 100, fiveStep, fiveStepConfidence * 100) print "1-step: {:16} ({:4.4}%)\t 10-step: {:16} ({:4.4}%)".format( *result) results.append(result) output.write(sine, oneStep, 0) output.close() return results
def runHotgym(numRecords): """Run the Hot Gym example.""" # Create a data source for the network. dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH) numRecords = min(numRecords, dataSource.getDataRowCount()) network = createNetwork(dataSource) # Set predicted field index. It needs to be the same index as the data source. predictedIdx = dataSource.getFieldNames().index("consumption") network.regions["sensor"].setParameter("predictedFieldIdx", predictedIdx) # Enable learning for all regions. network.regions["SP"].setParameter("learningMode", 1) network.regions["TM"].setParameter("learningMode", 1) network.regions["classifier"].setParameter("learningMode", 1) # Enable inference for all regions. network.regions["SP"].setParameter("inferenceMode", 1) network.regions["TM"].setParameter("inferenceMode", 1) network.regions["classifier"].setParameter("inferenceMode", 1) results = [] N = 1 # Run the network, N iterations at a time. for iteration in range(0, numRecords, N): network.run(N) predictionResults = getPredictionResults(network, "classifier") oneStep = predictionResults[1]["predictedValue"] oneStepConfidence = predictionResults[1]["predictionConfidence"] fiveStep = predictionResults[5]["predictedValue"] fiveStepConfidence = predictionResults[5]["predictionConfidence"] result = (oneStep, oneStepConfidence * 100, fiveStep, fiveStepConfidence * 100) print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format( *result) results.append(result) return results
def runHotgym(numRecords): """Run the Hot Gym example.""" # Create a data source for the network. dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH) numRecords = min(numRecords, dataSource.getDataRowCount()) network = createNetwork(dataSource) # Set predicted field index. It needs to be the same index as the data source. predictedIdx = dataSource.getFieldNames().index("consumption") network.regions["sensor"].setParameter("predictedFieldIdx", predictedIdx) # Enable learning for all regions. network.regions["SP"].setParameter("learningMode", 1) network.regions["TM"].setParameter("learningMode", 1) network.regions["classifier"].setParameter("learningMode", 1) # Enable inference for all regions. network.regions["SP"].setParameter("inferenceMode", 1) network.regions["TM"].setParameter("inferenceMode", 1) network.regions["classifier"].setParameter("inferenceMode", 1) results = [] N = 1 # Run the network, N iterations at a time. for iteration in range(0, numRecords, N): network.run(N) predictionResults = getPredictionResults(network, "classifier") oneStep = predictionResults[1]["predictedValue"] oneStepConfidence = predictionResults[1]["predictionConfidence"] fiveStep = predictionResults[5]["predictedValue"] fiveStepConfidence = predictionResults[5]["predictionConfidence"] result = (oneStep, oneStepConfidence * 100, fiveStep, fiveStepConfidence * 100) print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(*result) results.append(result) return results
class _BasicPredictionWriter(PredictionWriterIface): """ This class defines the basic (file-based) implementation of PredictionWriterIface, whose instances are returned by BasicPredictionWriterFactory """ def __init__(self, experimentDir, label, inferenceType, fields, metricNames=None, checkpointSource=None): """ Constructor experimentDir: experiment directory path that contains description.py label: A label string to incorporate into the filename. inferenceElements: inferenceType: An constant from opfutils.InferenceType for the requested prediction writer fields: a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo representing fields that will be emitted to this prediction writer metricNames: OPTIONAL - A list of metric names that well be emiited by this prediction writer checkpointSource: If not None, a File-like object containing the previously-checkpointed predictions for setting the initial contents of this PredictionOutputStream. Will be copied before returning, if needed. """ #assert len(fields) > 0 self.__experimentDir = experimentDir # opfutils.InferenceType kind value self.__inferenceType = inferenceType # A tuple of nupic.data.fieldmeta.FieldMetaInfo self.__inputFieldsMeta = tuple(copy.deepcopy(fields)) self.__numInputFields = len(self.__inputFieldsMeta) self.__label = label if metricNames is not None: metricNames.sort() self.__metricNames = metricNames # Define our output field meta info self.__outputFieldsMeta = [] # The list of inputs that we include in the prediction output self._rawInputNames = [] # Output dataset self.__datasetPath = None self.__dataset = None # Save checkpoint data until we're ready to create the output dataset self.__checkpointCache = None if checkpointSource is not None: checkpointSource.seek(0) self.__checkpointCache = StringIO.StringIO() shutil.copyfileobj(checkpointSource, self.__checkpointCache) return ############################################################################ def __openDatafile(self, modelResult): """Open the data file and write the header row""" # Write reset bit resetFieldMeta = FieldMetaInfo(name="reset", type=FieldMetaType.integer, special=FieldMetaSpecial.reset) self.__outputFieldsMeta.append(resetFieldMeta) # ----------------------------------------------------------------------- # Write each of the raw inputs that go into the encoders rawInput = modelResult.rawInput rawFields = rawInput.keys() rawFields.sort() for field in rawFields: if field.startswith('_') or field == 'reset': continue value = rawInput[field] meta = FieldMetaInfo(name=field, type=FieldMetaType.string, special=FieldMetaSpecial.none) self.__outputFieldsMeta.append(meta) self._rawInputNames.append(field) # ----------------------------------------------------------------------- # Handle each of the inference elements for inferenceElement, value in modelResult.inferences.iteritems(): inferenceLabel = InferenceElement.getLabel(inferenceElement) # TODO: Right now we assume list inferences are associated with # The input field metadata if type(value) in (list, tuple): # Append input and prediction field meta-info self.__outputFieldsMeta.extend( self.__getListMetaInfo(inferenceElement)) elif isinstance(value, dict): self.__outputFieldsMeta.extend( self.__getDictMetaInfo(inferenceElement, value)) else: if InferenceElement.getInputElement(inferenceElement): self.__outputFieldsMeta.append( FieldMetaInfo(name=inferenceLabel + ".actual", type=FieldMetaType.string, special='')) self.__outputFieldsMeta.append( FieldMetaInfo(name=inferenceLabel, type=FieldMetaType.string, special='')) if self.__metricNames: for metricName in self.__metricNames: metricField = FieldMetaInfo(name=metricName, type=FieldMetaType.float, special=FieldMetaSpecial.none) self.__outputFieldsMeta.append(metricField) # Create the inference directory for our experiment inferenceDir = _FileUtils.createExperimentInferenceDir( self.__experimentDir) # Consctruct the prediction dataset file path filename = (self.__label + "." + opfutils.InferenceType.getLabel(self.__inferenceType) + ".predictionLog.csv") self.__datasetPath = os.path.join(inferenceDir, filename) # Create the output dataset print "OPENING OUTPUT FOR PREDICTION WRITER AT: %r" % self.__datasetPath print "Prediction field-meta: %r" % ( [tuple(i) for i in self.__outputFieldsMeta], ) self.__dataset = FileRecordStream(streamID=self.__datasetPath, write=True, fields=self.__outputFieldsMeta) # Copy data from checkpoint cache if self.__checkpointCache is not None: self.__checkpointCache.seek(0) reader = csv.reader(self.__checkpointCache, dialect='excel') # Skip header row try: header = reader.next() except StopIteration: print "Empty record checkpoint initializer for %r" % ( self.__datasetPath, ) else: assert tuple(self.__dataset.getFieldNames()) == tuple(header), \ "dataset.getFieldNames(): %r; predictionCheckpointFieldNames: %r" % ( tuple(self.__dataset.getFieldNames()), tuple(header)) # Copy the rows from checkpoint numRowsCopied = 0 while True: try: row = reader.next() except StopIteration: break #print "DEBUG: restoring row from checkpoint: %r" % (row,) self.__dataset.appendRecord(row) numRowsCopied += 1 self.__dataset.flush() print "Restored %d rows from checkpoint for %r" % ( numRowsCopied, self.__datasetPath) # Dispose of our checkpoint cache self.__checkpointCache.close() self.__checkpointCache = None return ############################################################################ def setLoggedMetrics(self, metricNames): """ Tell the writer which metrics should be written Parameters: ----------------------------------------------------------------------- metricsNames: A list of metric lables to be written """ if metricNames is None: self.__metricNames = set([]) else: self.__metricNames = set(metricNames) ############################################################################ def close(self): """ [virtual method override] Closes the writer (e.g., close the underlying file) """ if self.__dataset: self.__dataset.close() self.__dataset = None return ############################################################################ def __getListMetaInfo(self, inferenceElement): """ Get field metadata information for inferences that are of list type TODO: Right now we assume list inferences are associated with the input field metadata """ fieldMetaInfo = [] inferenceLabel = InferenceElement.getLabel(inferenceElement) for inputFieldMeta in self.__inputFieldsMeta: if InferenceElement.getInputElement(inferenceElement): outputFieldMeta = FieldMetaInfo(name=inputFieldMeta.name + ".actual", type=inputFieldMeta.type, special=inputFieldMeta.special) predictionField = FieldMetaInfo(name=inputFieldMeta.name + "." + inferenceLabel, type=inputFieldMeta.type, special=inputFieldMeta.special) fieldMetaInfo.append(outputFieldMeta) fieldMetaInfo.append(predictionField) return fieldMetaInfo ############################################################################ def __getDictMetaInfo(self, inferenceElement, inferenceDict): """Get field metadate information for inferences that are of dict type""" fieldMetaInfo = [] inferenceLabel = InferenceElement.getLabel(inferenceElement) if InferenceElement.getInputElement(inferenceElement): fieldMetaInfo.append( FieldMetaInfo(name=inferenceLabel + ".actual", type=FieldMetaType.string, special='')) keys = sorted(inferenceDict.keys()) for key in keys: fieldMetaInfo.append( FieldMetaInfo(name=inferenceLabel + "." + str(key), type=FieldMetaType.string, special='')) return fieldMetaInfo ############################################################################ def append(self, modelResult): """ [virtual method override] Emits a single prediction as input versus predicted. modelResult: An opfutils.ModelResult object that contains the model input and output for the current timestep. """ #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,) # If there are no inferences, don't write anything inferences = modelResult.inferences hasInferences = False if inferences is not None: for value in inferences.itervalues(): hasInferences = hasInferences or (value is not None) if not hasInferences: return if self.__dataset is None: self.__openDatafile(modelResult) inputData = modelResult.sensorInput sequenceReset = int(bool(inputData.sequenceReset)) outputRow = [sequenceReset] # ----------------------------------------------------------------------- # Write out the raw inputs rawInput = modelResult.rawInput for field in self._rawInputNames: outputRow.append(str(rawInput[field])) # ----------------------------------------------------------------------- # Write out the inference element info for inferenceElement, outputVal in inferences.iteritems(): inputElement = InferenceElement.getInputElement(inferenceElement) if inputElement: inputVal = getattr(inputData, inputElement) else: inputVal = None if type(outputVal) in (list, tuple): assert type(inputVal) in (list, tuple, None) for iv, ov in zip(inputVal, outputVal): # Write actual outputRow.append(str(iv)) # Write inferred outputRow.append(str(ov)) elif isinstance(outputVal, dict): if inputVal is not None: # If we have a predicted field, include only that in the actuals if modelResult.predictedFieldIdx is not None: outputRow.append( str(inputVal[modelResult.predictedFieldIdx])) else: outputRow.append(str(inputVal)) for key in sorted(outputVal.keys()): outputRow.append(str(outputVal[key])) else: if inputVal is not None: outputRow.append(str(inputVal)) outputRow.append(str(outputVal)) metrics = modelResult.metrics for metricName in self.__metricNames: outputRow.append(metrics.get(metricName, 0.0)) #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,) self.__dataset.appendRecord(outputRow) self.__dataset.flush() return def checkpoint(self, checkpointSink, maxRows): """ [virtual method override] Save a checkpoint of the prediction output stream. The checkpoint comprises up to maxRows of the most recent inference records. Parameters: ---------------------------------------------------------------------- checkpointSink: A File-like object where predictions checkpoint data, if any, will be stored. maxRows: Maximum number of most recent inference rows to checkpoint. """ checkpointSink.truncate() if self.__dataset is None: if self.__checkpointCache is not None: self.__checkpointCache.seek(0) shutil.copyfileobj(self.__checkpointCache, checkpointSink) checkpointSink.flush() return else: # Nothing to checkpoint return self.__dataset.flush() totalDataRows = self.__dataset.getDataRowCount() if totalDataRows == 0: # Nothing to checkpoint return # Open reader of prediction file (suppress missingValues conversion) reader = FileRecordStream(self.__datasetPath, missingValues=[]) # Create CSV writer for writing checkpoint rows writer = csv.writer(checkpointSink) # Write the header row to checkpoint sink -- just field names writer.writerow(reader.getFieldNames()) # Determine number of rows to checkpoint numToWrite = min(maxRows, totalDataRows) # Skip initial rows to get to the rows that we actually need to checkpoint numRowsToSkip = totalDataRows - numToWrite for i in xrange(numRowsToSkip): reader.next() # Write the data rows to checkpoint sink numWritten = 0 while True: row = reader.getNextRecord() if row is None: break row = [str(element) for element in row] #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,) writer.writerow(row) numWritten += 1 assert numWritten == numToWrite, \ "numWritten (%s) != numToWrite (%s)" % (numWritten, numToWrite) checkpointSink.flush() return
class _BasicPredictionWriter(PredictionWriterIface): """ This class defines the basic (file-based) implementation of PredictionWriterIface, whose instances are returned by BasicPredictionWriterFactory """ def __init__(self, experimentDir, label, inferenceType, fields, metricNames=None, checkpointSource=None): """ Constructor experimentDir: experiment directory path that contains description.py label: A label string to incorporate into the filename. inferenceElements: inferenceType: An constant from opfutils.InferenceType for the requested prediction writer fields: a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo representing fields that will be emitted to this prediction writer metricNames: OPTIONAL - A list of metric names that well be emiited by this prediction writer checkpointSource: If not None, a File-like object containing the previously-checkpointed predictions for setting the initial contents of this PredictionOutputStream. Will be copied before returning, if needed. """ #assert len(fields) > 0 self.__experimentDir = experimentDir # opfutils.InferenceType kind value self.__inferenceType = inferenceType # A tuple of nupic.data.fieldmeta.FieldMetaInfo self.__inputFieldsMeta = tuple(copy.deepcopy(fields)) self.__numInputFields = len(self.__inputFieldsMeta) self.__label = label if metricNames is not None: metricNames.sort() self.__metricNames = metricNames # Define our output field meta info self.__outputFieldsMeta = [] # The list of inputs that we include in the prediction output self._rawInputNames = [] # Output dataset self.__datasetPath = None self.__dataset = None # Save checkpoint data until we're ready to create the output dataset self.__checkpointCache = None if checkpointSource is not None: checkpointSource.seek(0) self.__checkpointCache = StringIO.StringIO() shutil.copyfileobj(checkpointSource, self.__checkpointCache) return def __openDatafile(self, modelResult): """Open the data file and write the header row""" # Write reset bit resetFieldMeta = FieldMetaInfo( name="reset", type=FieldMetaType.integer, special = FieldMetaSpecial.reset) self.__outputFieldsMeta.append(resetFieldMeta) # ----------------------------------------------------------------------- # Write each of the raw inputs that go into the encoders rawInput = modelResult.rawInput rawFields = rawInput.keys() rawFields.sort() for field in rawFields: if field.startswith('_') or field == 'reset': continue value = rawInput[field] meta = FieldMetaInfo(name=field, type=FieldMetaType.string, special=FieldMetaSpecial.none) self.__outputFieldsMeta.append(meta) self._rawInputNames.append(field) # ----------------------------------------------------------------------- # Handle each of the inference elements for inferenceElement, value in modelResult.inferences.iteritems(): inferenceLabel = InferenceElement.getLabel(inferenceElement) # TODO: Right now we assume list inferences are associated with # The input field metadata if type(value) in (list, tuple): # Append input and prediction field meta-info self.__outputFieldsMeta.extend(self.__getListMetaInfo(inferenceElement)) elif isinstance(value, dict): self.__outputFieldsMeta.extend(self.__getDictMetaInfo(inferenceElement, value)) else: if InferenceElement.getInputElement(inferenceElement): self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel+".actual", type=FieldMetaType.string, special = '')) self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel, type=FieldMetaType.string, special = '')) if self.__metricNames: for metricName in self.__metricNames: metricField = FieldMetaInfo( name = metricName, type = FieldMetaType.float, special = FieldMetaSpecial.none) self.__outputFieldsMeta.append(metricField) # Create the inference directory for our experiment inferenceDir = _FileUtils.createExperimentInferenceDir(self.__experimentDir) # Consctruct the prediction dataset file path filename = (self.__label + "." + opfutils.InferenceType.getLabel(self.__inferenceType) + ".predictionLog.csv") self.__datasetPath = os.path.join(inferenceDir, filename) # Create the output dataset print "OPENING OUTPUT FOR PREDICTION WRITER AT: {0!r}".format(self.__datasetPath) print "Prediction field-meta: {0!r}".format([tuple(i) for i in self.__outputFieldsMeta]) self.__dataset = FileRecordStream(streamID=self.__datasetPath, write=True, fields=self.__outputFieldsMeta) # Copy data from checkpoint cache if self.__checkpointCache is not None: self.__checkpointCache.seek(0) reader = csv.reader(self.__checkpointCache, dialect='excel') # Skip header row try: header = reader.next() except StopIteration: print "Empty record checkpoint initializer for {0!r}".format(self.__datasetPath) else: assert tuple(self.__dataset.getFieldNames()) == tuple(header), \ "dataset.getFieldNames(): {0!r}; predictionCheckpointFieldNames: {1!r}".format( tuple(self.__dataset.getFieldNames()), tuple(header)) # Copy the rows from checkpoint numRowsCopied = 0 while True: try: row = reader.next() except StopIteration: break #print "DEBUG: restoring row from checkpoint: %r" % (row,) self.__dataset.appendRecord(row) numRowsCopied += 1 self.__dataset.flush() print "Restored {0:d} rows from checkpoint for {1!r}".format( numRowsCopied, self.__datasetPath) # Dispose of our checkpoint cache self.__checkpointCache.close() self.__checkpointCache = None return def setLoggedMetrics(self, metricNames): """ Tell the writer which metrics should be written Parameters: ----------------------------------------------------------------------- metricsNames: A list of metric lables to be written """ if metricNames is None: self.__metricNames = set([]) else: self.__metricNames = set(metricNames) def close(self): """ [virtual method override] Closes the writer (e.g., close the underlying file) """ if self.__dataset: self.__dataset.close() self.__dataset = None return def __getListMetaInfo(self, inferenceElement): """ Get field metadata information for inferences that are of list type TODO: Right now we assume list inferences are associated with the input field metadata """ fieldMetaInfo = [] inferenceLabel = InferenceElement.getLabel(inferenceElement) for inputFieldMeta in self.__inputFieldsMeta: if InferenceElement.getInputElement(inferenceElement): outputFieldMeta = FieldMetaInfo( name=inputFieldMeta.name + ".actual", type=inputFieldMeta.type, special=inputFieldMeta.special ) predictionField = FieldMetaInfo( name=inputFieldMeta.name + "." + inferenceLabel, type=inputFieldMeta.type, special=inputFieldMeta.special ) fieldMetaInfo.append(outputFieldMeta) fieldMetaInfo.append(predictionField) return fieldMetaInfo def __getDictMetaInfo(self, inferenceElement, inferenceDict): """Get field metadate information for inferences that are of dict type""" fieldMetaInfo = [] inferenceLabel = InferenceElement.getLabel(inferenceElement) if InferenceElement.getInputElement(inferenceElement): fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+".actual", type=FieldMetaType.string, special = '')) keys = sorted(inferenceDict.keys()) for key in keys: fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+"."+str(key), type=FieldMetaType.string, special='')) return fieldMetaInfo def append(self, modelResult): """ [virtual method override] Emits a single prediction as input versus predicted. modelResult: An opfutils.ModelResult object that contains the model input and output for the current timestep. """ #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,) # If there are no inferences, don't write anything inferences = modelResult.inferences hasInferences = False if inferences is not None: for value in inferences.itervalues(): hasInferences = hasInferences or (value is not None) if not hasInferences: return if self.__dataset is None: self.__openDatafile(modelResult) inputData = modelResult.sensorInput sequenceReset = int(bool(inputData.sequenceReset)) outputRow = [sequenceReset] # ----------------------------------------------------------------------- # Write out the raw inputs rawInput = modelResult.rawInput for field in self._rawInputNames: outputRow.append(str(rawInput[field])) # ----------------------------------------------------------------------- # Write out the inference element info for inferenceElement, outputVal in inferences.iteritems(): inputElement = InferenceElement.getInputElement(inferenceElement) if inputElement: inputVal = getattr(inputData, inputElement) else: inputVal = None if type(outputVal) in (list, tuple): assert type(inputVal) in (list, tuple, None) for iv, ov in zip(inputVal, outputVal): # Write actual outputRow.append(str(iv)) # Write inferred outputRow.append(str(ov)) elif isinstance(outputVal, dict): if inputVal is not None: # If we have a predicted field, include only that in the actuals if modelResult.predictedFieldName is not None: outputRow.append(str(inputVal[modelResult.predictedFieldName])) else: outputRow.append(str(inputVal)) for key in sorted(outputVal.keys()): outputRow.append(str(outputVal[key])) else: if inputVal is not None: outputRow.append(str(inputVal)) outputRow.append(str(outputVal)) metrics = modelResult.metrics for metricName in self.__metricNames: outputRow.append(metrics.get(metricName, 0.0)) #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,) self.__dataset.appendRecord(outputRow) self.__dataset.flush() return def checkpoint(self, checkpointSink, maxRows): """ [virtual method override] Save a checkpoint of the prediction output stream. The checkpoint comprises up to maxRows of the most recent inference records. Parameters: ---------------------------------------------------------------------- checkpointSink: A File-like object where predictions checkpoint data, if any, will be stored. maxRows: Maximum number of most recent inference rows to checkpoint. """ checkpointSink.truncate() if self.__dataset is None: if self.__checkpointCache is not None: self.__checkpointCache.seek(0) shutil.copyfileobj(self.__checkpointCache, checkpointSink) checkpointSink.flush() return else: # Nothing to checkpoint return self.__dataset.flush() totalDataRows = self.__dataset.getDataRowCount() if totalDataRows == 0: # Nothing to checkpoint return # Open reader of prediction file (suppress missingValues conversion) reader = FileRecordStream(self.__datasetPath, missingValues=[]) # Create CSV writer for writing checkpoint rows writer = csv.writer(checkpointSink) # Write the header row to checkpoint sink -- just field names writer.writerow(reader.getFieldNames()) # Determine number of rows to checkpoint numToWrite = min(maxRows, totalDataRows) # Skip initial rows to get to the rows that we actually need to checkpoint numRowsToSkip = totalDataRows - numToWrite for i in xrange(numRowsToSkip): reader.next() # Write the data rows to checkpoint sink numWritten = 0 while True: row = reader.getNextRecord() if row is None: break; row = [str(element) for element in row] #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,) writer.writerow(row) numWritten +=1 assert numWritten == numToWrite, \ "numWritten ({0!s}) != numToWrite ({1!s})".format(numWritten, numToWrite) checkpointSink.flush() return
def testBasic(self): """Runs basic FileRecordStream tests.""" filename = _getTempFileName() # Write a standard file fields = [('name', 'string', ''), ('timestamp', 'datetime', 'T'), ('integer', 'int', ''), ('real', 'float', ''), ('reset', 'int', 'R'), ('sid', 'string', 'S'), ('categoryField', 'int', 'C'),] fieldNames = ['name', 'timestamp', 'integer', 'real', 'reset', 'sid', 'categoryField'] print 'Creating temp file:', filename s = FileRecordStream(streamID=filename, write=True, fields=fields) self.assertTrue(s.getDataRowCount() == 0) # Records records = ( ['rec_1', datetime(day=1, month=3, year=2010), 5, 6.5, 1, 'seq-1', 10], ['rec_2', datetime(day=2, month=3, year=2010), 8, 7.5, 0, 'seq-1', 11], ['rec_3', datetime(day=3, month=3, year=2010), 12, 8.5, 0, 'seq-1', 12]) self.assertTrue(s.getFields() == fields) self.assertTrue(s.getNextRecordIdx() == 0) print 'Writing records ...' for r in records: print list(r) s.appendRecord(list(r)) self.assertTrue(s.getDataRowCount() == 3) recordsBatch = ( ['rec_4', datetime(day=4, month=3, year=2010), 2, 9.5, 1, 'seq-1', 13], ['rec_5', datetime(day=5, month=3, year=2010), 6, 10.5, 0, 'seq-1', 14], ['rec_6', datetime(day=6, month=3, year=2010), 11, 11.5, 0, 'seq-1', 15]) print 'Adding batch of records...' for rec in recordsBatch: print rec s.appendRecords(recordsBatch) self.assertTrue(s.getDataRowCount() == 6) s.close() # Read the standard file s = FileRecordStream(filename) self.assertTrue(s.getDataRowCount() == 6) self.assertTrue(s.getFieldNames() == fieldNames) # Note! this is the number of records read so far self.assertTrue(s.getNextRecordIdx() == 0) readStats = s.getStats() print 'Got stats:', readStats expectedStats = { 'max': [None, None, 12, 11.5, 1, None, 15], 'min': [None, None, 2, 6.5, 0, None, 10] } self.assertTrue(readStats == expectedStats) readRecords = [] print 'Reading records ...' while True: r = s.getNextRecord() print r if r is None: break readRecords.append(r) allRecords = records + recordsBatch for r1, r2 in zip(allRecords, readRecords): print 'Expected:', r1 print 'Read :', r2 self.assertTrue(r1 == r2) s.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--inputFile', '-d', dest='inputFile', type=str, default=None, help='Relative path to the input file.') parser.add_argument('--outputDir', '-o', dest='outputDir', type=str, default='results/traces', help='Relative path to the directory where the HTM ' 'network traces will be saved.') parser.add_argument('--htmConfig', '-c', dest='htmConfig', type=str, default='htm_network_config/6categories.json', help='Relative path to the HTM network config JSON. ' 'This option is ignored when the --model flag ' 'is used.') parser.add_argument('--inputModel', '-im', dest='inputModel', type=str, default=None, help='Relative path of the serialized HTM model to be ' 'loaded.') parser.add_argument('--outputModel', '-om', dest='outputModel', type=str, default=None, help='Relative path to serialize the HTM model.') parser.add_argument('--disableLearning', '-dl', dest='disableLearning', action='store_true', default=False, help='Use this flag to disable learning. If not ' 'provided, then learning is enabled by default.') parser.add_argument('--batch', '-b', dest='batchSize', type=int, default=1000, help='Size of each batch being processed.') # Parse input options options = parser.parse_args() outputDir = options.outputDir networkConfigPath = options.htmConfig batchSize = options.batchSize # FIXME RES-464: until the serialization process is fixed, don't save the # model. Run serially each phase (train -> validation -> test) # TODO: Re-introduce these command line args when serialization is fixed. # inputFile = options.inputFile # inputModelPath = options.inputModel # outputModelPath = options.outputModel # learningMode = not options.disableLearning inputModelPath = None outputModelPath = None phases = ['train', 'val', 'test'] inputDir = os.path.join('data', 'artificial') expName = 'binary_ampl=10.0_mean=0.0_noise=0.0' # 'body_acc_x_inertial_signals' network = None with open(networkConfigPath, 'r') as f: networkConfig = simplejson.load(f) for phase in phases: # Data source inputFile = os.path.join(inputDir, '%s_%s.csv' % (expName, phase)) dataSource = FileRecordStream(streamID=inputFile) numRecords = dataSource.getDataRowCount() _LOGGER.debug('Number of records to be processed: %s' % numRecords) # Trace output info traceFileName = getTraceFileName(inputFile) traceFilePath = os.path.join(outputDir, '%s.csv' % traceFileName) if not os.path.exists(outputDir): os.makedirs(outputDir) # If there is not network, create one and train it. if not network: assert phase == 'train' # Make sure that we create a network for learningMode = True network = createNetwork(dataSource, networkConfig, inputModelPath) else: learningMode = False regionName = networkConfig["sensorRegionConfig"]["regionName"] sensorRegion = network.regions[regionName].getSelf() sensorRegion.dataSource = dataSource if 'train' in sensorRegion.dataSource._filename: raise ValueError('Learning mode should not be disabled for the ' 'train set.') _LOGGER.debug('Running network with inputFile=%s ' 'and learningMode=%s' % (inputFile, learningMode)) # FIXME RES-464 (end) run(network, numRecords, traceFilePath, networkConfig, outputModelPath, batchSize, learningMode)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--inputFile', '-d', dest='inputFile', type=str, default=None, help='Relative path to the input file.') parser.add_argument('--outputDir', '-o', dest='outputDir', type=str, default='results/traces', help='Relative path to the directory where the HTM ' 'network traces will be saved.') parser.add_argument('--htmConfig', '-c', dest='htmConfig', type=str, default='htm_network_config/6categories.json', help='Relative path to the HTM network config JSON. ' 'This option is ignored when the --model flag ' 'is used.') parser.add_argument('--inputModel', '-im', dest='inputModel', type=str, default=None, help='Relative path of the serialized HTM model to be ' 'loaded.') parser.add_argument('--outputModel', '-om', dest='outputModel', type=str, default=None, help='Relative path to serialize the HTM model.') parser.add_argument('--disableLearning', '-dl', dest='disableLearning', action='store_true', default=False, help='Use this flag to disable learning. If not ' 'provided, then learning is enabled by default.') parser.add_argument('--batch', '-b', dest='batchSize', type=int, default=1000, help='Size of each batch being processed.') # Parse input options options = parser.parse_args() outputDir = options.outputDir networkConfigPath = options.htmConfig batchSize = options.batchSize # FIXME RES-464: until the serialization process is fixed, don't save the # model. Run serially each phase (train -> validation -> test) # TODO: Re-introduce these command line args when serialization is fixed. # inputFile = options.inputFile # inputModelPath = options.inputModel # outputModelPath = options.outputModel # learningMode = not options.disableLearning inputModelPath = None outputModelPath = None phases = ['train', 'val', 'test'] inputDir = os.path.join('data', 'artificial') expName = 'binary_ampl=10.0_mean=0.0_noise=0.0' # 'body_acc_x_inertial_signals' network = None with open(networkConfigPath, 'r') as f: networkConfig = simplejson.load(f) for phase in phases: # Data source inputFile = os.path.join(inputDir, '%s_%s.csv' % (expName, phase)) dataSource = FileRecordStream(streamID=inputFile) numRecords = dataSource.getDataRowCount() _LOGGER.debug('Number of records to be processed: %s' % numRecords) # Trace output info traceFileName = getTraceFileName(inputFile) traceFilePath = os.path.join(outputDir, '%s.csv' % traceFileName) if not os.path.exists(outputDir): os.makedirs(outputDir) # If there is not network, create one and train it. if not network: assert phase == 'train' # Make sure that we create a network for learningMode = True network = createNetwork(dataSource, networkConfig, inputModelPath) else: learningMode = False regionName = networkConfig["sensorRegionConfig"]["regionName"] sensorRegion = network.regions[regionName].getSelf() sensorRegion.dataSource = dataSource if 'train' in sensorRegion.dataSource._filename: raise ValueError( 'Learning mode should not be disabled for the ' 'train set.') _LOGGER.debug('Running network with inputFile=%s ' 'and learningMode=%s' % (inputFile, learningMode)) # FIXME RES-464 (end) run(network, numRecords, traceFilePath, networkConfig, outputModelPath, batchSize, learningMode)