Python FileRecordStream.getDataRowCount Beispiele

Programmiersprache: Python

Namespace / Paketname: nupic.data.file_record_stream

Klasse / Typ: FileRecordStream

Methode / Funktion: getDataRowCount

Beispiele auf hotexamples.com: 11

Python FileRecordStream.getDataRowCount - 11 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die nupic.data.file_record_stream.FileRecordStream.getDataRowCount, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

FileRecordStream(30)

appendRecord(14)

close(13)

getFields(8)

getDataRowCount(7)

getNextRecord(6)

getFieldNames(5)

rewind(3)

next(3)

getStats(2)

setAutoRewind(2)

getNextRecordIdx(2)

flush(1)

setError(1)

setCompleted(1)

recordsExistAfter(1)

getResetFieldIdx(1)

isCompleted(1)

getBookmark(1)

getNextRecordDict(1)

appendRecords(1)

clear(1)

getError(1)

clearStats(1)

setTimeout(1)

Beispiel #1

Datei anzeigen

Datei: network.py Projekt: keijinamba/nupic-example

def run(numRecords):
  '''
  Run the Hot Gym example.
  '''

  # Create a data source for the network.
  dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
  numRecords = min(numRecords, dataSource.getDataRowCount())
  network = createNetwork(dataSource)

  network.regions["sensor"].getSelf().predictedField = "price"

  # Set predicted field
  network.regions["sensor"].setParameter("predictedField", "price")

  # Enable learning for all regions.
  network.regions["SP"].setParameter("learningMode", 1)
  network.regions["TM"].setParameter("learningMode", 1)
  network.regions["classifier"].setParameter("learningMode", 1)

  # Enable inference for all regions.
  network.regions["SP"].setParameter("inferenceMode", 1)
  network.regions["TM"].setParameter("inferenceMode", 1)
  network.regions["classifier"].setParameter("inferenceMode", 1)

  results = []
  N = _RUN_EPOCH  # Run the network, N iterations at a time.
  graph = Graph({
    'title': 'Bitcoin Prediction',
    'y_label': 'price',
    'y_lim': 'auto',
    'prediction_num': 2,
    'line_labels': ['1-step', '5-step']
  })
  for iteration in range(0, numRecords, N):
    if iteration % _RUN_INTERVAL == 0:
      network.run(N)

      price = network.regions["sensor"].getOutputData("sourceOut")[0]

      predictionResults = getPredictionResults(network, "classifier")
      oneStep = predictionResults[1]["predictedValue"]
      oneStepConfidence = predictionResults[1]["predictionConfidence"]
      fiveStep = predictionResults[5]["predictedValue"]
      fiveStepConfidence = predictionResults[5]["predictionConfidence"]

      result = (oneStep, oneStepConfidence * 100,
                fiveStep, fiveStepConfidence * 100)
      
      if iteration % _PRINT_INTERVAL == 0:
        print "iteration: {}".format(iteration)
        print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(*result)
      
      results.append(result)

      graph.write(price, [oneStep, fiveStep])
  
  graph.close()

  return results

Beispiel #2

Datei anzeigen

Datei: hierarchy_network_demo.py Projekt: xiaochouxiaohai/nupic

def runDemo():
    dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
    numRecords = dataSource.getDataRowCount()
    print "Creating network"
    network = createNetwork(dataSource)
    outputPath = os.path.join(os.path.dirname(__file__), _OUTPUT_FILE_NAME)
    with open(outputPath, "w") as outputFile:
        writer = csv.writer(outputFile)
        print "Running network"
        print "Writing output to: %s" % outputPath
        runNetwork(network, numRecords, writer)
    print "Hierarchy demo finished"

Beispiel #3

Datei anzeigen

Datei: hierarchy_network_demo.py Projekt: JediKoder/nupic

def runDemo():
  dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
  numRecords = dataSource.getDataRowCount()
  print "Creating network"
  network = createNetwork(dataSource)
  outputPath = os.path.join(os.path.dirname(__file__), _OUTPUT_FILE_NAME)
  with open(outputPath, "w") as outputFile:
    writer = csv.writer(outputFile)
    print "Running network"
    print "Writing output to: %s" % outputPath
    runNetwork(network, numRecords, writer)
  print "Hierarchy demo finished"

Beispiel #4

Datei anzeigen

Datei: network.py Projekt: keijinamba/nupic-example

def run(numRecords):
    '''
  Run the Hot Gym example.
  '''

    # Create a data source for the network.
    dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
    numRecords = min(numRecords, dataSource.getDataRowCount())
    network = createNetwork(dataSource)

    network.regions["sensor"].getSelf().predictedField = "sine"

    # Set predicted field
    network.regions["sensor"].setParameter("predictedField", "sine")

    # Enable learning for all regions.
    network.regions["SP"].setParameter("learningMode", 1)
    network.regions["TM"].setParameter("learningMode", 1)
    network.regions["classifier"].setParameter("learningMode", 1)

    # Enable inference for all regions.
    network.regions["SP"].setParameter("inferenceMode", 1)
    network.regions["TM"].setParameter("inferenceMode", 1)
    network.regions["classifier"].setParameter("inferenceMode", 1)

    results = []
    N = 1  # Run the network, N iterations at a time.
    output = nupic_output.NuPICPlotOutput("Sine", show_anomaly_score=True)
    for iteration in range(0, numRecords, N):
        network.run(N)

        sine = network.regions["sensor"].getOutputData("sourceOut")[0]

        predictionResults = getPredictionResults(network, "classifier")
        oneStep = predictionResults[1]["predictedValue"]
        oneStepConfidence = predictionResults[1]["predictionConfidence"]
        fiveStep = predictionResults[10]["predictedValue"]
        fiveStepConfidence = predictionResults[10]["predictionConfidence"]

        result = (oneStep, oneStepConfidence * 100, fiveStep,
                  fiveStepConfidence * 100)
        print "1-step: {:16} ({:4.4}%)\t 10-step: {:16} ({:4.4}%)".format(
            *result)
        results.append(result)

        output.write(sine, oneStep, 0)

    output.close()

    return results

Beispiel #5

Datei anzeigen

Datei: complete-network-example.py Projekt: wjenkins3/nupic

def runHotgym(numRecords):
    """Run the Hot Gym example."""

    # Create a data source for the network.
    dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
    numRecords = min(numRecords, dataSource.getDataRowCount())
    network = createNetwork(dataSource)

    # Set predicted field index. It needs to be the same index as the data source.
    predictedIdx = dataSource.getFieldNames().index("consumption")
    network.regions["sensor"].setParameter("predictedFieldIdx", predictedIdx)

    # Enable learning for all regions.
    network.regions["SP"].setParameter("learningMode", 1)
    network.regions["TM"].setParameter("learningMode", 1)
    network.regions["classifier"].setParameter("learningMode", 1)

    # Enable inference for all regions.
    network.regions["SP"].setParameter("inferenceMode", 1)
    network.regions["TM"].setParameter("inferenceMode", 1)
    network.regions["classifier"].setParameter("inferenceMode", 1)

    results = []
    N = 1  # Run the network, N iterations at a time.
    for iteration in range(0, numRecords, N):
        network.run(N)

        predictionResults = getPredictionResults(network, "classifier")
        oneStep = predictionResults[1]["predictedValue"]
        oneStepConfidence = predictionResults[1]["predictionConfidence"]
        fiveStep = predictionResults[5]["predictedValue"]
        fiveStepConfidence = predictionResults[5]["predictionConfidence"]

        result = (oneStep, oneStepConfidence * 100, fiveStep,
                  fiveStepConfidence * 100)
        print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(
            *result)
        results.append(result)

    return results

Beispiel #6

Datei anzeigen

Datei: complete-network-example.py Projekt: mrcslws/nupic

def runHotgym(numRecords):
  """Run the Hot Gym example."""

  # Create a data source for the network.
  dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
  numRecords = min(numRecords, dataSource.getDataRowCount())
  network = createNetwork(dataSource)

  # Set predicted field index. It needs to be the same index as the data source.
  predictedIdx = dataSource.getFieldNames().index("consumption")
  network.regions["sensor"].setParameter("predictedFieldIdx", predictedIdx)

  # Enable learning for all regions.
  network.regions["SP"].setParameter("learningMode", 1)
  network.regions["TM"].setParameter("learningMode", 1)
  network.regions["classifier"].setParameter("learningMode", 1)

  # Enable inference for all regions.
  network.regions["SP"].setParameter("inferenceMode", 1)
  network.regions["TM"].setParameter("inferenceMode", 1)
  network.regions["classifier"].setParameter("inferenceMode", 1)

  results = []
  N = 1  # Run the network, N iterations at a time.
  for iteration in range(0, numRecords, N):
    network.run(N)

    predictionResults = getPredictionResults(network, "classifier")
    oneStep = predictionResults[1]["predictedValue"]
    oneStepConfidence = predictionResults[1]["predictionConfidence"]
    fiveStep = predictionResults[5]["predictedValue"]
    fiveStepConfidence = predictionResults[5]["predictionConfidence"]

    result = (oneStep, oneStepConfidence * 100,
              fiveStep, fiveStepConfidence * 100)
    print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(*result)
    results.append(result)

  return results

Beispiel #7

Datei anzeigen

class _BasicPredictionWriter(PredictionWriterIface):
    """ This class defines the basic (file-based) implementation of
  PredictionWriterIface, whose instances are returned by
  BasicPredictionWriterFactory
  """
    def __init__(self,
                 experimentDir,
                 label,
                 inferenceType,
                 fields,
                 metricNames=None,
                 checkpointSource=None):
        """ Constructor

    experimentDir:
                  experiment directory path that contains description.py

    label:        A label string to incorporate into the filename.


    inferenceElements:


    inferenceType:
                  An constant from opfutils.InferenceType for the
                  requested prediction writer

    fields:       a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo
                  representing fields that will be emitted to this prediction
                  writer

    metricNames:  OPTIONAL - A list of metric names that well be emiited by this
                  prediction writer

    checkpointSource:
                  If not None, a File-like object containing the
                  previously-checkpointed predictions for setting the initial
                  contents of this PredictionOutputStream.  Will be copied
                  before returning, if needed.
    """
        #assert len(fields) > 0

        self.__experimentDir = experimentDir

        # opfutils.InferenceType kind value
        self.__inferenceType = inferenceType

        # A tuple of nupic.data.fieldmeta.FieldMetaInfo
        self.__inputFieldsMeta = tuple(copy.deepcopy(fields))
        self.__numInputFields = len(self.__inputFieldsMeta)
        self.__label = label
        if metricNames is not None:
            metricNames.sort()
        self.__metricNames = metricNames

        # Define our output field meta info
        self.__outputFieldsMeta = []

        # The list of inputs that we include in the prediction output
        self._rawInputNames = []

        # Output dataset
        self.__datasetPath = None
        self.__dataset = None

        # Save checkpoint data until we're ready to create the output dataset
        self.__checkpointCache = None
        if checkpointSource is not None:
            checkpointSource.seek(0)
            self.__checkpointCache = StringIO.StringIO()
            shutil.copyfileobj(checkpointSource, self.__checkpointCache)

        return

    ############################################################################
    def __openDatafile(self, modelResult):
        """Open the data file and write the header row"""

        # Write reset bit
        resetFieldMeta = FieldMetaInfo(name="reset",
                                       type=FieldMetaType.integer,
                                       special=FieldMetaSpecial.reset)

        self.__outputFieldsMeta.append(resetFieldMeta)

        # -----------------------------------------------------------------------
        # Write each of the raw inputs that go into the encoders
        rawInput = modelResult.rawInput
        rawFields = rawInput.keys()
        rawFields.sort()
        for field in rawFields:
            if field.startswith('_') or field == 'reset':
                continue
            value = rawInput[field]
            meta = FieldMetaInfo(name=field,
                                 type=FieldMetaType.string,
                                 special=FieldMetaSpecial.none)
            self.__outputFieldsMeta.append(meta)
            self._rawInputNames.append(field)

        # -----------------------------------------------------------------------
        # Handle each of the inference elements
        for inferenceElement, value in modelResult.inferences.iteritems():
            inferenceLabel = InferenceElement.getLabel(inferenceElement)

            # TODO: Right now we assume list inferences are associated with
            # The input field metadata
            if type(value) in (list, tuple):
                # Append input and prediction field meta-info
                self.__outputFieldsMeta.extend(
                    self.__getListMetaInfo(inferenceElement))

            elif isinstance(value, dict):
                self.__outputFieldsMeta.extend(
                    self.__getDictMetaInfo(inferenceElement, value))
            else:

                if InferenceElement.getInputElement(inferenceElement):
                    self.__outputFieldsMeta.append(
                        FieldMetaInfo(name=inferenceLabel + ".actual",
                                      type=FieldMetaType.string,
                                      special=''))
                self.__outputFieldsMeta.append(
                    FieldMetaInfo(name=inferenceLabel,
                                  type=FieldMetaType.string,
                                  special=''))

        if self.__metricNames:
            for metricName in self.__metricNames:
                metricField = FieldMetaInfo(name=metricName,
                                            type=FieldMetaType.float,
                                            special=FieldMetaSpecial.none)

                self.__outputFieldsMeta.append(metricField)

        # Create the inference directory for our experiment
        inferenceDir = _FileUtils.createExperimentInferenceDir(
            self.__experimentDir)

        # Consctruct the prediction dataset file path
        filename = (self.__label + "." +
                    opfutils.InferenceType.getLabel(self.__inferenceType) +
                    ".predictionLog.csv")
        self.__datasetPath = os.path.join(inferenceDir, filename)

        # Create the output dataset
        print "OPENING OUTPUT FOR PREDICTION WRITER AT: %r" % self.__datasetPath
        print "Prediction field-meta: %r" % (
            [tuple(i) for i in self.__outputFieldsMeta], )
        self.__dataset = FileRecordStream(streamID=self.__datasetPath,
                                          write=True,
                                          fields=self.__outputFieldsMeta)

        # Copy data from checkpoint cache
        if self.__checkpointCache is not None:
            self.__checkpointCache.seek(0)

            reader = csv.reader(self.__checkpointCache, dialect='excel')

            # Skip header row
            try:
                header = reader.next()
            except StopIteration:
                print "Empty record checkpoint initializer for %r" % (
                    self.__datasetPath, )
            else:
                assert tuple(self.__dataset.getFieldNames()) == tuple(header), \
                  "dataset.getFieldNames(): %r; predictionCheckpointFieldNames: %r" % (
                  tuple(self.__dataset.getFieldNames()), tuple(header))

            # Copy the rows from checkpoint
            numRowsCopied = 0
            while True:
                try:
                    row = reader.next()
                except StopIteration:
                    break

                #print "DEBUG: restoring row from checkpoint: %r" % (row,)

                self.__dataset.appendRecord(row)
                numRowsCopied += 1

            self.__dataset.flush()

            print "Restored %d rows from checkpoint for %r" % (
                numRowsCopied, self.__datasetPath)

            # Dispose of our checkpoint cache
            self.__checkpointCache.close()
            self.__checkpointCache = None

        return

    ############################################################################
    def setLoggedMetrics(self, metricNames):
        """ Tell the writer which metrics should be written

    Parameters:
    -----------------------------------------------------------------------
    metricsNames: A list of metric lables to be written
    """
        if metricNames is None:
            self.__metricNames = set([])
        else:
            self.__metricNames = set(metricNames)

    ############################################################################
    def close(self):
        """ [virtual method override] Closes the writer (e.g., close the underlying
    file)
    """

        if self.__dataset:
            self.__dataset.close()
        self.__dataset = None

        return

    ############################################################################
    def __getListMetaInfo(self, inferenceElement):
        """ Get field metadata information for inferences that are of list type
    TODO: Right now we assume list inferences are associated with the input field
    metadata
    """
        fieldMetaInfo = []
        inferenceLabel = InferenceElement.getLabel(inferenceElement)

        for inputFieldMeta in self.__inputFieldsMeta:
            if InferenceElement.getInputElement(inferenceElement):
                outputFieldMeta = FieldMetaInfo(name=inputFieldMeta.name +
                                                ".actual",
                                                type=inputFieldMeta.type,
                                                special=inputFieldMeta.special)

            predictionField = FieldMetaInfo(name=inputFieldMeta.name + "." +
                                            inferenceLabel,
                                            type=inputFieldMeta.type,
                                            special=inputFieldMeta.special)

            fieldMetaInfo.append(outputFieldMeta)
            fieldMetaInfo.append(predictionField)

        return fieldMetaInfo

    ############################################################################
    def __getDictMetaInfo(self, inferenceElement, inferenceDict):
        """Get field metadate information for inferences that are of dict type"""
        fieldMetaInfo = []
        inferenceLabel = InferenceElement.getLabel(inferenceElement)

        if InferenceElement.getInputElement(inferenceElement):
            fieldMetaInfo.append(
                FieldMetaInfo(name=inferenceLabel + ".actual",
                              type=FieldMetaType.string,
                              special=''))

        keys = sorted(inferenceDict.keys())
        for key in keys:
            fieldMetaInfo.append(
                FieldMetaInfo(name=inferenceLabel + "." + str(key),
                              type=FieldMetaType.string,
                              special=''))

        return fieldMetaInfo

    ############################################################################
    def append(self, modelResult):
        """ [virtual method override] Emits a single prediction as input versus
    predicted.

    modelResult:    An opfutils.ModelResult object that contains the model input
                    and output for the current timestep.
    """

        #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,)

        # If there are no inferences, don't write anything
        inferences = modelResult.inferences
        hasInferences = False
        if inferences is not None:
            for value in inferences.itervalues():
                hasInferences = hasInferences or (value is not None)

        if not hasInferences:
            return

        if self.__dataset is None:
            self.__openDatafile(modelResult)

        inputData = modelResult.sensorInput

        sequenceReset = int(bool(inputData.sequenceReset))
        outputRow = [sequenceReset]

        # -----------------------------------------------------------------------
        # Write out the raw inputs
        rawInput = modelResult.rawInput
        for field in self._rawInputNames:
            outputRow.append(str(rawInput[field]))

        # -----------------------------------------------------------------------
        # Write out the inference element info
        for inferenceElement, outputVal in inferences.iteritems():
            inputElement = InferenceElement.getInputElement(inferenceElement)
            if inputElement:
                inputVal = getattr(inputData, inputElement)
            else:
                inputVal = None

            if type(outputVal) in (list, tuple):
                assert type(inputVal) in (list, tuple, None)

                for iv, ov in zip(inputVal, outputVal):
                    # Write actual
                    outputRow.append(str(iv))

                    # Write inferred
                    outputRow.append(str(ov))
            elif isinstance(outputVal, dict):
                if inputVal is not None:
                    # If we have a predicted field, include only that in the actuals
                    if modelResult.predictedFieldIdx is not None:
                        outputRow.append(
                            str(inputVal[modelResult.predictedFieldIdx]))
                    else:
                        outputRow.append(str(inputVal))
                for key in sorted(outputVal.keys()):
                    outputRow.append(str(outputVal[key]))
            else:
                if inputVal is not None:
                    outputRow.append(str(inputVal))
                outputRow.append(str(outputVal))

        metrics = modelResult.metrics
        for metricName in self.__metricNames:
            outputRow.append(metrics.get(metricName, 0.0))

        #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,)

        self.__dataset.appendRecord(outputRow)

        self.__dataset.flush()

        return

    def checkpoint(self, checkpointSink, maxRows):
        """ [virtual method override] Save a checkpoint of the prediction output
    stream. The checkpoint comprises up to maxRows of the most recent inference
    records.

    Parameters:
    ----------------------------------------------------------------------
    checkpointSink:     A File-like object where predictions checkpoint data, if
                        any, will be stored.
    maxRows:            Maximum number of most recent inference rows
                        to checkpoint.
    """

        checkpointSink.truncate()

        if self.__dataset is None:
            if self.__checkpointCache is not None:
                self.__checkpointCache.seek(0)
                shutil.copyfileobj(self.__checkpointCache, checkpointSink)
                checkpointSink.flush()
                return
            else:
                # Nothing to checkpoint
                return

        self.__dataset.flush()
        totalDataRows = self.__dataset.getDataRowCount()

        if totalDataRows == 0:
            # Nothing to checkpoint
            return

        # Open reader of prediction file (suppress missingValues conversion)
        reader = FileRecordStream(self.__datasetPath, missingValues=[])

        # Create CSV writer for writing checkpoint rows
        writer = csv.writer(checkpointSink)

        # Write the header row to checkpoint sink -- just field names
        writer.writerow(reader.getFieldNames())

        # Determine number of rows to checkpoint
        numToWrite = min(maxRows, totalDataRows)

        # Skip initial rows to get to the rows that we actually need to checkpoint
        numRowsToSkip = totalDataRows - numToWrite
        for i in xrange(numRowsToSkip):
            reader.next()

        # Write the data rows to checkpoint sink
        numWritten = 0
        while True:
            row = reader.getNextRecord()
            if row is None:
                break

            row = [str(element) for element in row]

            #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,)

            writer.writerow(row)

            numWritten += 1

        assert numWritten == numToWrite, \
          "numWritten (%s) != numToWrite (%s)" % (numWritten, numToWrite)

        checkpointSink.flush()

        return

Beispiel #8

Datei anzeigen

Datei: opfbasicenvironment.py Projekt: runt18/nupic

class _BasicPredictionWriter(PredictionWriterIface):
  """ This class defines the basic (file-based) implementation of
  PredictionWriterIface, whose instances are returned by
  BasicPredictionWriterFactory
  """
  def __init__(self, experimentDir, label, inferenceType,
               fields, metricNames=None, checkpointSource=None):
    """ Constructor

    experimentDir:
                  experiment directory path that contains description.py

    label:        A label string to incorporate into the filename.


    inferenceElements:


    inferenceType:
                  An constant from opfutils.InferenceType for the
                  requested prediction writer

    fields:       a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo
                  representing fields that will be emitted to this prediction
                  writer

    metricNames:  OPTIONAL - A list of metric names that well be emiited by this
                  prediction writer

    checkpointSource:
                  If not None, a File-like object containing the
                  previously-checkpointed predictions for setting the initial
                  contents of this PredictionOutputStream.  Will be copied
                  before returning, if needed.
    """
    #assert len(fields) > 0

    self.__experimentDir = experimentDir

    # opfutils.InferenceType kind value
    self.__inferenceType = inferenceType

    # A tuple of nupic.data.fieldmeta.FieldMetaInfo
    self.__inputFieldsMeta = tuple(copy.deepcopy(fields))
    self.__numInputFields = len(self.__inputFieldsMeta)
    self.__label = label
    if metricNames is not None:
      metricNames.sort()
    self.__metricNames = metricNames

    # Define our output field meta info
    self.__outputFieldsMeta = []

    # The list of inputs that we include in the prediction output
    self._rawInputNames = []

    # Output dataset
    self.__datasetPath = None
    self.__dataset = None

    # Save checkpoint data until we're ready to create the output dataset
    self.__checkpointCache = None
    if checkpointSource is not None:
      checkpointSource.seek(0)
      self.__checkpointCache = StringIO.StringIO()
      shutil.copyfileobj(checkpointSource, self.__checkpointCache)

    return


  def __openDatafile(self, modelResult):
    """Open the data file and write the header row"""

    # Write reset bit
    resetFieldMeta = FieldMetaInfo(
      name="reset",
      type=FieldMetaType.integer,
      special = FieldMetaSpecial.reset)

    self.__outputFieldsMeta.append(resetFieldMeta)


    # -----------------------------------------------------------------------
    # Write each of the raw inputs that go into the encoders
    rawInput = modelResult.rawInput
    rawFields = rawInput.keys()
    rawFields.sort()
    for field in rawFields:
      if field.startswith('_') or field == 'reset':
        continue
      value = rawInput[field]
      meta = FieldMetaInfo(name=field, type=FieldMetaType.string,
                           special=FieldMetaSpecial.none)
      self.__outputFieldsMeta.append(meta)
      self._rawInputNames.append(field)


    # -----------------------------------------------------------------------
    # Handle each of the inference elements
    for inferenceElement, value in modelResult.inferences.iteritems():
      inferenceLabel = InferenceElement.getLabel(inferenceElement)

      # TODO: Right now we assume list inferences are associated with
      # The input field metadata
      if type(value) in (list, tuple):
        # Append input and prediction field meta-info
        self.__outputFieldsMeta.extend(self.__getListMetaInfo(inferenceElement))

      elif isinstance(value, dict):
          self.__outputFieldsMeta.extend(self.__getDictMetaInfo(inferenceElement,
                                                                value))
      else:

        if InferenceElement.getInputElement(inferenceElement):
          self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel+".actual",
                type=FieldMetaType.string, special = ''))
        self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel,
                type=FieldMetaType.string, special = ''))

    if self.__metricNames:
      for metricName in self.__metricNames:
        metricField = FieldMetaInfo(
          name = metricName,
          type = FieldMetaType.float,
          special = FieldMetaSpecial.none)

        self.__outputFieldsMeta.append(metricField)

    # Create the inference directory for our experiment
    inferenceDir = _FileUtils.createExperimentInferenceDir(self.__experimentDir)

    # Consctruct the prediction dataset file path
    filename = (self.__label + "." +
               opfutils.InferenceType.getLabel(self.__inferenceType) +
               ".predictionLog.csv")
    self.__datasetPath = os.path.join(inferenceDir, filename)

    # Create the output dataset
    print "OPENING OUTPUT FOR PREDICTION WRITER AT: {0!r}".format(self.__datasetPath)
    print "Prediction field-meta: {0!r}".format([tuple(i) for i in self.__outputFieldsMeta])
    self.__dataset = FileRecordStream(streamID=self.__datasetPath, write=True,
                                     fields=self.__outputFieldsMeta)

    # Copy data from checkpoint cache
    if self.__checkpointCache is not None:
      self.__checkpointCache.seek(0)

      reader = csv.reader(self.__checkpointCache, dialect='excel')

      # Skip header row
      try:
        header = reader.next()
      except StopIteration:
        print "Empty record checkpoint initializer for {0!r}".format(self.__datasetPath)
      else:
        assert tuple(self.__dataset.getFieldNames()) == tuple(header), \
          "dataset.getFieldNames(): {0!r}; predictionCheckpointFieldNames: {1!r}".format(
          tuple(self.__dataset.getFieldNames()), tuple(header))

      # Copy the rows from checkpoint
      numRowsCopied = 0
      while True:
        try:
          row = reader.next()
        except StopIteration:
          break

        #print "DEBUG: restoring row from checkpoint: %r" % (row,)

        self.__dataset.appendRecord(row)
        numRowsCopied += 1

      self.__dataset.flush()

      print "Restored {0:d} rows from checkpoint for {1!r}".format(
        numRowsCopied, self.__datasetPath)

      # Dispose of our checkpoint cache
      self.__checkpointCache.close()
      self.__checkpointCache = None

    return


  def setLoggedMetrics(self, metricNames):
    """ Tell the writer which metrics should be written

    Parameters:
    -----------------------------------------------------------------------
    metricsNames: A list of metric lables to be written
    """
    if metricNames is None:
      self.__metricNames = set([])
    else:
      self.__metricNames = set(metricNames)


  def close(self):
    """ [virtual method override] Closes the writer (e.g., close the underlying
    file)
    """

    if self.__dataset:
      self.__dataset.close()
    self.__dataset = None

    return


  def __getListMetaInfo(self, inferenceElement):
    """ Get field metadata information for inferences that are of list type
    TODO: Right now we assume list inferences are associated with the input field
    metadata
    """
    fieldMetaInfo = []
    inferenceLabel = InferenceElement.getLabel(inferenceElement)

    for inputFieldMeta in self.__inputFieldsMeta:
      if InferenceElement.getInputElement(inferenceElement):
        outputFieldMeta = FieldMetaInfo(
          name=inputFieldMeta.name + ".actual",
          type=inputFieldMeta.type,
          special=inputFieldMeta.special
        )

      predictionField = FieldMetaInfo(
        name=inputFieldMeta.name + "." + inferenceLabel,
        type=inputFieldMeta.type,
        special=inputFieldMeta.special
      )

      fieldMetaInfo.append(outputFieldMeta)
      fieldMetaInfo.append(predictionField)

    return fieldMetaInfo


  def __getDictMetaInfo(self, inferenceElement, inferenceDict):
    """Get field metadate information for inferences that are of dict type"""
    fieldMetaInfo = []
    inferenceLabel = InferenceElement.getLabel(inferenceElement)

    if InferenceElement.getInputElement(inferenceElement):
      fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+".actual",
                                         type=FieldMetaType.string,
                                         special = ''))

    keys = sorted(inferenceDict.keys())
    for key in keys:
      fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+"."+str(key),
                                         type=FieldMetaType.string,
                                         special=''))


    return fieldMetaInfo


  def append(self, modelResult):
    """ [virtual method override] Emits a single prediction as input versus
    predicted.

    modelResult:    An opfutils.ModelResult object that contains the model input
                    and output for the current timestep.
    """

    #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,)

    # If there are no inferences, don't write anything
    inferences = modelResult.inferences
    hasInferences = False
    if inferences is not None:
      for value in inferences.itervalues():
        hasInferences = hasInferences or (value is not None)

    if not hasInferences:
      return

    if self.__dataset is None:
      self.__openDatafile(modelResult)

    inputData = modelResult.sensorInput

    sequenceReset = int(bool(inputData.sequenceReset))
    outputRow = [sequenceReset]


    # -----------------------------------------------------------------------
    # Write out the raw inputs
    rawInput = modelResult.rawInput
    for field in self._rawInputNames:
      outputRow.append(str(rawInput[field]))

    # -----------------------------------------------------------------------
    # Write out the inference element info
    for inferenceElement, outputVal in inferences.iteritems():
      inputElement = InferenceElement.getInputElement(inferenceElement)
      if inputElement:
        inputVal = getattr(inputData, inputElement)
      else:
        inputVal = None

      if type(outputVal) in (list, tuple):
        assert type(inputVal) in (list, tuple, None)

        for iv, ov in zip(inputVal, outputVal):
          # Write actual
          outputRow.append(str(iv))

          # Write inferred
          outputRow.append(str(ov))
      elif isinstance(outputVal, dict):
        if inputVal is not None:
          # If we have a predicted field, include only that in the actuals
          if modelResult.predictedFieldName is not None:
            outputRow.append(str(inputVal[modelResult.predictedFieldName]))
          else:
            outputRow.append(str(inputVal))
        for key in sorted(outputVal.keys()):
          outputRow.append(str(outputVal[key]))
      else:
        if inputVal is not None:
          outputRow.append(str(inputVal))
        outputRow.append(str(outputVal))

    metrics = modelResult.metrics
    for metricName in self.__metricNames:
      outputRow.append(metrics.get(metricName, 0.0))

    #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,)

    self.__dataset.appendRecord(outputRow)

    self.__dataset.flush()

    return

  def checkpoint(self, checkpointSink, maxRows):
    """ [virtual method override] Save a checkpoint of the prediction output
    stream. The checkpoint comprises up to maxRows of the most recent inference
    records.

    Parameters:
    ----------------------------------------------------------------------
    checkpointSink:     A File-like object where predictions checkpoint data, if
                        any, will be stored.
    maxRows:            Maximum number of most recent inference rows
                        to checkpoint.
    """

    checkpointSink.truncate()

    if self.__dataset is None:
      if self.__checkpointCache is not None:
        self.__checkpointCache.seek(0)
        shutil.copyfileobj(self.__checkpointCache, checkpointSink)
        checkpointSink.flush()
        return
      else:
        # Nothing to checkpoint
        return

    self.__dataset.flush()
    totalDataRows = self.__dataset.getDataRowCount()

    if totalDataRows == 0:
      # Nothing to checkpoint
      return

    # Open reader of prediction file (suppress missingValues conversion)
    reader = FileRecordStream(self.__datasetPath, missingValues=[])

    # Create CSV writer for writing checkpoint rows
    writer = csv.writer(checkpointSink)

    # Write the header row to checkpoint sink -- just field names
    writer.writerow(reader.getFieldNames())

    # Determine number of rows to checkpoint
    numToWrite = min(maxRows, totalDataRows)

    # Skip initial rows to get to the rows that we actually need to checkpoint
    numRowsToSkip = totalDataRows - numToWrite
    for i in xrange(numRowsToSkip):
      reader.next()

    # Write the data rows to checkpoint sink
    numWritten = 0
    while True:
      row = reader.getNextRecord()
      if row is None:
        break;

      row =  [str(element) for element in row]

      #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,)

      writer.writerow(row)

      numWritten +=1

    assert numWritten == numToWrite, \
      "numWritten ({0!s}) != numToWrite ({1!s})".format(numWritten, numToWrite)


    checkpointSink.flush()

    return

Beispiel #9

Datei anzeigen

Datei: file_record_stream_test.py Projekt: sveitser/nupic

  def testBasic(self):
    """Runs basic FileRecordStream tests."""
    filename = _getTempFileName()

    # Write a standard file
    fields = [('name', 'string', ''),
              ('timestamp', 'datetime', 'T'),
              ('integer', 'int', ''),
              ('real', 'float', ''),
              ('reset', 'int', 'R'),
              ('sid', 'string', 'S'),
              ('categoryField', 'int', 'C'),]
    fieldNames = ['name', 'timestamp', 'integer', 'real', 'reset', 'sid',
                  'categoryField']

    print 'Creating temp file:', filename

    s = FileRecordStream(streamID=filename, write=True, fields=fields)

    self.assertTrue(s.getDataRowCount() == 0)

    # Records
    records = (
      ['rec_1', datetime(day=1, month=3, year=2010), 5, 6.5, 1, 'seq-1', 10],
      ['rec_2', datetime(day=2, month=3, year=2010), 8, 7.5, 0, 'seq-1', 11],
      ['rec_3', datetime(day=3, month=3, year=2010), 12, 8.5, 0, 'seq-1', 12])

    self.assertTrue(s.getFields() == fields)
    self.assertTrue(s.getNextRecordIdx() == 0)

    print 'Writing records ...'
    for r in records:
      print list(r)
      s.appendRecord(list(r))

    self.assertTrue(s.getDataRowCount() == 3)

    recordsBatch = (
      ['rec_4', datetime(day=4, month=3, year=2010), 2, 9.5, 1, 'seq-1', 13],
      ['rec_5', datetime(day=5, month=3, year=2010), 6, 10.5, 0, 'seq-1', 14],
      ['rec_6', datetime(day=6, month=3, year=2010), 11, 11.5, 0, 'seq-1', 15])

    print 'Adding batch of records...'
    for rec in recordsBatch:
      print rec
    s.appendRecords(recordsBatch)
    self.assertTrue(s.getDataRowCount() == 6)

    s.close()

    # Read the standard file
    s = FileRecordStream(filename)
    self.assertTrue(s.getDataRowCount() == 6)
    self.assertTrue(s.getFieldNames() == fieldNames)

    # Note! this is the number of records read so far
    self.assertTrue(s.getNextRecordIdx() == 0)

    readStats = s.getStats()
    print 'Got stats:', readStats
    expectedStats = {
                     'max': [None, None, 12, 11.5, 1, None, 15],
                     'min': [None, None, 2, 6.5, 0, None, 10]
                    }
    self.assertTrue(readStats == expectedStats)

    readRecords = []
    print 'Reading records ...'
    while True:
      r = s.getNextRecord()
      print r
      if r is None:
        break

      readRecords.append(r)

    allRecords = records + recordsBatch
    for r1, r2 in zip(allRecords, readRecords):
      print 'Expected:', r1
      print 'Read    :', r2
      self.assertTrue(r1 == r2)

    s.close()

Beispiel #10

Datei anzeigen

Datei: run_htm_network.py Projekt: dubing12/htmresearch

def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--inputFile', '-d',
                      dest='inputFile',
                      type=str,
                      default=None,
                      help='Relative path to the input file.')

  parser.add_argument('--outputDir', '-o',
                      dest='outputDir',
                      type=str,
                      default='results/traces',
                      help='Relative path to the directory where the HTM '
                           'network traces will be saved.')

  parser.add_argument('--htmConfig', '-c',
                      dest='htmConfig',
                      type=str,
                      default='htm_network_config/6categories.json',
                      help='Relative path to the HTM network config JSON. '
                           'This option is ignored when the --model flag '
                           'is used.')

  parser.add_argument('--inputModel', '-im',
                      dest='inputModel',
                      type=str,
                      default=None,
                      help='Relative path of the serialized HTM model to be '
                           'loaded.')

  parser.add_argument('--outputModel', '-om',
                      dest='outputModel',
                      type=str,
                      default=None,
                      help='Relative path to serialize the HTM model.')

  parser.add_argument('--disableLearning', '-dl',
                      dest='disableLearning',
                      action='store_true',
                      default=False,
                      help='Use this flag to disable learning. If not '
                           'provided, then learning is enabled by default.')

  parser.add_argument('--batch', '-b',
                      dest='batchSize',
                      type=int,
                      default=1000,
                      help='Size of each batch being processed.')

  # Parse input options
  options = parser.parse_args()
  outputDir = options.outputDir
  networkConfigPath = options.htmConfig
  batchSize = options.batchSize

  # FIXME RES-464: until the serialization process is fixed, don't save the 
  # model. Run serially each phase (train -> validation -> test) 
  # TODO: Re-introduce these command line args when serialization is fixed.
  # inputFile = options.inputFile
  # inputModelPath = options.inputModel
  # outputModelPath = options.outputModel
  # learningMode = not options.disableLearning

  inputModelPath = None
  outputModelPath = None
  phases = ['train', 'val', 'test']
  inputDir = os.path.join('data', 'artificial')
  expName = 'binary_ampl=10.0_mean=0.0_noise=0.0'  # 'body_acc_x_inertial_signals'  
  network = None
  with open(networkConfigPath, 'r') as f:
    networkConfig = simplejson.load(f)
    for phase in phases:

      # Data source
      inputFile = os.path.join(inputDir, '%s_%s.csv' % (expName, phase))
      dataSource = FileRecordStream(streamID=inputFile)
      numRecords = dataSource.getDataRowCount()
      _LOGGER.debug('Number of records to be processed: %s' % numRecords)

      # Trace output info
      traceFileName = getTraceFileName(inputFile)
      traceFilePath = os.path.join(outputDir, '%s.csv' % traceFileName)
      if not os.path.exists(outputDir):
        os.makedirs(outputDir)

      # If there is not network, create one and train it.
      if not network:
        assert phase == 'train'  # Make sure that we create a network for 
        learningMode = True
        network = createNetwork(dataSource, networkConfig, inputModelPath)
      else:
        learningMode = False
        regionName = networkConfig["sensorRegionConfig"]["regionName"]
        sensorRegion = network.regions[regionName].getSelf()
        sensorRegion.dataSource = dataSource
        if 'train' in sensorRegion.dataSource._filename:
          raise ValueError('Learning mode should not be disabled for the '
                           'train set.')

      _LOGGER.debug('Running network with inputFile=%s '
                    'and learningMode=%s' % (inputFile, learningMode))

      # FIXME RES-464 (end)

      run(network,
          numRecords,
          traceFilePath,
          networkConfig,
          outputModelPath,
          batchSize,
          learningMode)

Beispiel #11

Datei anzeigen

Datei: run_htm_network.py Projekt: kaikun213/htm-bayesian-learning

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--inputFile',
                        '-d',
                        dest='inputFile',
                        type=str,
                        default=None,
                        help='Relative path to the input file.')

    parser.add_argument('--outputDir',
                        '-o',
                        dest='outputDir',
                        type=str,
                        default='results/traces',
                        help='Relative path to the directory where the HTM '
                        'network traces will be saved.')

    parser.add_argument('--htmConfig',
                        '-c',
                        dest='htmConfig',
                        type=str,
                        default='htm_network_config/6categories.json',
                        help='Relative path to the HTM network config JSON. '
                        'This option is ignored when the --model flag '
                        'is used.')

    parser.add_argument('--inputModel',
                        '-im',
                        dest='inputModel',
                        type=str,
                        default=None,
                        help='Relative path of the serialized HTM model to be '
                        'loaded.')

    parser.add_argument('--outputModel',
                        '-om',
                        dest='outputModel',
                        type=str,
                        default=None,
                        help='Relative path to serialize the HTM model.')

    parser.add_argument('--disableLearning',
                        '-dl',
                        dest='disableLearning',
                        action='store_true',
                        default=False,
                        help='Use this flag to disable learning. If not '
                        'provided, then learning is enabled by default.')

    parser.add_argument('--batch',
                        '-b',
                        dest='batchSize',
                        type=int,
                        default=1000,
                        help='Size of each batch being processed.')

    # Parse input options
    options = parser.parse_args()
    outputDir = options.outputDir
    networkConfigPath = options.htmConfig
    batchSize = options.batchSize

    # FIXME RES-464: until the serialization process is fixed, don't save the
    # model. Run serially each phase (train -> validation -> test)
    # TODO: Re-introduce these command line args when serialization is fixed.
    # inputFile = options.inputFile
    # inputModelPath = options.inputModel
    # outputModelPath = options.outputModel
    # learningMode = not options.disableLearning

    inputModelPath = None
    outputModelPath = None
    phases = ['train', 'val', 'test']
    inputDir = os.path.join('data', 'artificial')
    expName = 'binary_ampl=10.0_mean=0.0_noise=0.0'  # 'body_acc_x_inertial_signals'
    network = None
    with open(networkConfigPath, 'r') as f:
        networkConfig = simplejson.load(f)
        for phase in phases:

            # Data source
            inputFile = os.path.join(inputDir, '%s_%s.csv' % (expName, phase))
            dataSource = FileRecordStream(streamID=inputFile)
            numRecords = dataSource.getDataRowCount()
            _LOGGER.debug('Number of records to be processed: %s' % numRecords)

            # Trace output info
            traceFileName = getTraceFileName(inputFile)
            traceFilePath = os.path.join(outputDir, '%s.csv' % traceFileName)
            if not os.path.exists(outputDir):
                os.makedirs(outputDir)

            # If there is not network, create one and train it.
            if not network:
                assert phase == 'train'  # Make sure that we create a network for
                learningMode = True
                network = createNetwork(dataSource, networkConfig,
                                        inputModelPath)
            else:
                learningMode = False
                regionName = networkConfig["sensorRegionConfig"]["regionName"]
                sensorRegion = network.regions[regionName].getSelf()
                sensorRegion.dataSource = dataSource
                if 'train' in sensorRegion.dataSource._filename:
                    raise ValueError(
                        'Learning mode should not be disabled for the '
                        'train set.')

            _LOGGER.debug('Running network with inputFile=%s '
                          'and learningMode=%s' % (inputFile, learningMode))

            # FIXME RES-464 (end)

            run(network, numRecords, traceFilePath, networkConfig,
                outputModelPath, batchSize, learningMode)