Beispiel #1
0
class _BasicPredictionWriter(PredictionWriterIface):
    """ This class defines the basic (file-based) implementation of
  PredictionWriterIface, whose instances are returned by
  BasicPredictionWriterFactory
  """
    def __init__(self,
                 experimentDir,
                 label,
                 inferenceType,
                 fields,
                 metricNames=None,
                 checkpointSource=None):
        """ Constructor

    experimentDir:
                  experiment directory path that contains description.py

    label:        A label string to incorporate into the filename.


    inferenceElements:


    inferenceType:
                  An constant from opfutils.InferenceType for the
                  requested prediction writer

    fields:       a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo
                  representing fields that will be emitted to this prediction
                  writer

    metricNames:  OPTIONAL - A list of metric names that well be emiited by this
                  prediction writer

    checkpointSource:
                  If not None, a File-like object containing the
                  previously-checkpointed predictions for setting the initial
                  contents of this PredictionOutputStream.  Will be copied
                  before returning, if needed.
    """
        #assert len(fields) > 0

        self.__experimentDir = experimentDir

        # opfutils.InferenceType kind value
        self.__inferenceType = inferenceType

        # A tuple of nupic.data.fieldmeta.FieldMetaInfo
        self.__inputFieldsMeta = tuple(copy.deepcopy(fields))
        self.__numInputFields = len(self.__inputFieldsMeta)
        self.__label = label
        if metricNames is not None:
            metricNames.sort()
        self.__metricNames = metricNames

        # Define our output field meta info
        self.__outputFieldsMeta = []

        # The list of inputs that we include in the prediction output
        self._rawInputNames = []

        # Output dataset
        self.__datasetPath = None
        self.__dataset = None

        # Save checkpoint data until we're ready to create the output dataset
        self.__checkpointCache = None
        if checkpointSource is not None:
            checkpointSource.seek(0)
            self.__checkpointCache = StringIO.StringIO()
            shutil.copyfileobj(checkpointSource, self.__checkpointCache)

        return

    ############################################################################
    def __openDatafile(self, modelResult):
        """Open the data file and write the header row"""

        # Write reset bit
        resetFieldMeta = FieldMetaInfo(name="reset",
                                       type=FieldMetaType.integer,
                                       special=FieldMetaSpecial.reset)

        self.__outputFieldsMeta.append(resetFieldMeta)

        # -----------------------------------------------------------------------
        # Write each of the raw inputs that go into the encoders
        rawInput = modelResult.rawInput
        rawFields = rawInput.keys()
        rawFields.sort()
        for field in rawFields:
            if field.startswith('_') or field == 'reset':
                continue
            value = rawInput[field]
            meta = FieldMetaInfo(name=field,
                                 type=FieldMetaType.string,
                                 special=FieldMetaSpecial.none)
            self.__outputFieldsMeta.append(meta)
            self._rawInputNames.append(field)

        # -----------------------------------------------------------------------
        # Handle each of the inference elements
        for inferenceElement, value in modelResult.inferences.iteritems():
            inferenceLabel = InferenceElement.getLabel(inferenceElement)

            # TODO: Right now we assume list inferences are associated with
            # The input field metadata
            if type(value) in (list, tuple):
                # Append input and prediction field meta-info
                self.__outputFieldsMeta.extend(
                    self.__getListMetaInfo(inferenceElement))

            elif isinstance(value, dict):
                self.__outputFieldsMeta.extend(
                    self.__getDictMetaInfo(inferenceElement, value))
            else:

                if InferenceElement.getInputElement(inferenceElement):
                    self.__outputFieldsMeta.append(
                        FieldMetaInfo(name=inferenceLabel + ".actual",
                                      type=FieldMetaType.string,
                                      special=''))
                self.__outputFieldsMeta.append(
                    FieldMetaInfo(name=inferenceLabel,
                                  type=FieldMetaType.string,
                                  special=''))

        if self.__metricNames:
            for metricName in self.__metricNames:
                metricField = FieldMetaInfo(name=metricName,
                                            type=FieldMetaType.float,
                                            special=FieldMetaSpecial.none)

                self.__outputFieldsMeta.append(metricField)

        # Create the inference directory for our experiment
        inferenceDir = _FileUtils.createExperimentInferenceDir(
            self.__experimentDir)

        # Consctruct the prediction dataset file path
        filename = (self.__label + "." +
                    opfutils.InferenceType.getLabel(self.__inferenceType) +
                    ".predictionLog.csv")
        self.__datasetPath = os.path.join(inferenceDir, filename)

        # Create the output dataset
        print "OPENING OUTPUT FOR PREDICTION WRITER AT: %r" % self.__datasetPath
        print "Prediction field-meta: %r" % (
            [tuple(i) for i in self.__outputFieldsMeta], )
        self.__dataset = FileRecordStream(streamID=self.__datasetPath,
                                          write=True,
                                          fields=self.__outputFieldsMeta)

        # Copy data from checkpoint cache
        if self.__checkpointCache is not None:
            self.__checkpointCache.seek(0)

            reader = csv.reader(self.__checkpointCache, dialect='excel')

            # Skip header row
            try:
                header = reader.next()
            except StopIteration:
                print "Empty record checkpoint initializer for %r" % (
                    self.__datasetPath, )
            else:
                assert tuple(self.__dataset.getFieldNames()) == tuple(header), \
                  "dataset.getFieldNames(): %r; predictionCheckpointFieldNames: %r" % (
                  tuple(self.__dataset.getFieldNames()), tuple(header))

            # Copy the rows from checkpoint
            numRowsCopied = 0
            while True:
                try:
                    row = reader.next()
                except StopIteration:
                    break

                #print "DEBUG: restoring row from checkpoint: %r" % (row,)

                self.__dataset.appendRecord(row)
                numRowsCopied += 1

            self.__dataset.flush()

            print "Restored %d rows from checkpoint for %r" % (
                numRowsCopied, self.__datasetPath)

            # Dispose of our checkpoint cache
            self.__checkpointCache.close()
            self.__checkpointCache = None

        return

    ############################################################################
    def setLoggedMetrics(self, metricNames):
        """ Tell the writer which metrics should be written

    Parameters:
    -----------------------------------------------------------------------
    metricsNames: A list of metric lables to be written
    """
        if metricNames is None:
            self.__metricNames = set([])
        else:
            self.__metricNames = set(metricNames)

    ############################################################################
    def close(self):
        """ [virtual method override] Closes the writer (e.g., close the underlying
    file)
    """

        if self.__dataset:
            self.__dataset.close()
        self.__dataset = None

        return

    ############################################################################
    def __getListMetaInfo(self, inferenceElement):
        """ Get field metadata information for inferences that are of list type
    TODO: Right now we assume list inferences are associated with the input field
    metadata
    """
        fieldMetaInfo = []
        inferenceLabel = InferenceElement.getLabel(inferenceElement)

        for inputFieldMeta in self.__inputFieldsMeta:
            if InferenceElement.getInputElement(inferenceElement):
                outputFieldMeta = FieldMetaInfo(name=inputFieldMeta.name +
                                                ".actual",
                                                type=inputFieldMeta.type,
                                                special=inputFieldMeta.special)

            predictionField = FieldMetaInfo(name=inputFieldMeta.name + "." +
                                            inferenceLabel,
                                            type=inputFieldMeta.type,
                                            special=inputFieldMeta.special)

            fieldMetaInfo.append(outputFieldMeta)
            fieldMetaInfo.append(predictionField)

        return fieldMetaInfo

    ############################################################################
    def __getDictMetaInfo(self, inferenceElement, inferenceDict):
        """Get field metadate information for inferences that are of dict type"""
        fieldMetaInfo = []
        inferenceLabel = InferenceElement.getLabel(inferenceElement)

        if InferenceElement.getInputElement(inferenceElement):
            fieldMetaInfo.append(
                FieldMetaInfo(name=inferenceLabel + ".actual",
                              type=FieldMetaType.string,
                              special=''))

        keys = sorted(inferenceDict.keys())
        for key in keys:
            fieldMetaInfo.append(
                FieldMetaInfo(name=inferenceLabel + "." + str(key),
                              type=FieldMetaType.string,
                              special=''))

        return fieldMetaInfo

    ############################################################################
    def append(self, modelResult):
        """ [virtual method override] Emits a single prediction as input versus
    predicted.

    modelResult:    An opfutils.ModelResult object that contains the model input
                    and output for the current timestep.
    """

        #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,)

        # If there are no inferences, don't write anything
        inferences = modelResult.inferences
        hasInferences = False
        if inferences is not None:
            for value in inferences.itervalues():
                hasInferences = hasInferences or (value is not None)

        if not hasInferences:
            return

        if self.__dataset is None:
            self.__openDatafile(modelResult)

        inputData = modelResult.sensorInput

        sequenceReset = int(bool(inputData.sequenceReset))
        outputRow = [sequenceReset]

        # -----------------------------------------------------------------------
        # Write out the raw inputs
        rawInput = modelResult.rawInput
        for field in self._rawInputNames:
            outputRow.append(str(rawInput[field]))

        # -----------------------------------------------------------------------
        # Write out the inference element info
        for inferenceElement, outputVal in inferences.iteritems():
            inputElement = InferenceElement.getInputElement(inferenceElement)
            if inputElement:
                inputVal = getattr(inputData, inputElement)
            else:
                inputVal = None

            if type(outputVal) in (list, tuple):
                assert type(inputVal) in (list, tuple, None)

                for iv, ov in zip(inputVal, outputVal):
                    # Write actual
                    outputRow.append(str(iv))

                    # Write inferred
                    outputRow.append(str(ov))
            elif isinstance(outputVal, dict):
                if inputVal is not None:
                    # If we have a predicted field, include only that in the actuals
                    if modelResult.predictedFieldIdx is not None:
                        outputRow.append(
                            str(inputVal[modelResult.predictedFieldIdx]))
                    else:
                        outputRow.append(str(inputVal))
                for key in sorted(outputVal.keys()):
                    outputRow.append(str(outputVal[key]))
            else:
                if inputVal is not None:
                    outputRow.append(str(inputVal))
                outputRow.append(str(outputVal))

        metrics = modelResult.metrics
        for metricName in self.__metricNames:
            outputRow.append(metrics.get(metricName, 0.0))

        #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,)

        self.__dataset.appendRecord(outputRow)

        self.__dataset.flush()

        return

    def checkpoint(self, checkpointSink, maxRows):
        """ [virtual method override] Save a checkpoint of the prediction output
    stream. The checkpoint comprises up to maxRows of the most recent inference
    records.

    Parameters:
    ----------------------------------------------------------------------
    checkpointSink:     A File-like object where predictions checkpoint data, if
                        any, will be stored.
    maxRows:            Maximum number of most recent inference rows
                        to checkpoint.
    """

        checkpointSink.truncate()

        if self.__dataset is None:
            if self.__checkpointCache is not None:
                self.__checkpointCache.seek(0)
                shutil.copyfileobj(self.__checkpointCache, checkpointSink)
                checkpointSink.flush()
                return
            else:
                # Nothing to checkpoint
                return

        self.__dataset.flush()
        totalDataRows = self.__dataset.getDataRowCount()

        if totalDataRows == 0:
            # Nothing to checkpoint
            return

        # Open reader of prediction file (suppress missingValues conversion)
        reader = FileRecordStream(self.__datasetPath, missingValues=[])

        # Create CSV writer for writing checkpoint rows
        writer = csv.writer(checkpointSink)

        # Write the header row to checkpoint sink -- just field names
        writer.writerow(reader.getFieldNames())

        # Determine number of rows to checkpoint
        numToWrite = min(maxRows, totalDataRows)

        # Skip initial rows to get to the rows that we actually need to checkpoint
        numRowsToSkip = totalDataRows - numToWrite
        for i in xrange(numRowsToSkip):
            reader.next()

        # Write the data rows to checkpoint sink
        numWritten = 0
        while True:
            row = reader.getNextRecord()
            if row is None:
                break

            row = [str(element) for element in row]

            #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,)

            writer.writerow(row)

            numWritten += 1

        assert numWritten == numToWrite, \
          "numWritten (%s) != numToWrite (%s)" % (numWritten, numToWrite)

        checkpointSink.flush()

        return
Beispiel #2
0
class _BasicPredictionWriter(PredictionWriterIface):
  """ This class defines the basic (file-based) implementation of
  PredictionWriterIface, whose instances are returned by
  BasicPredictionWriterFactory
  """
  def __init__(self, experimentDir, label, inferenceType,
               fields, metricNames=None, checkpointSource=None):
    """ Constructor

    experimentDir:
                  experiment directory path that contains description.py

    label:        A label string to incorporate into the filename.


    inferenceElements:


    inferenceType:
                  An constant from opfutils.InferenceType for the
                  requested prediction writer

    fields:       a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo
                  representing fields that will be emitted to this prediction
                  writer

    metricNames:  OPTIONAL - A list of metric names that well be emiited by this
                  prediction writer

    checkpointSource:
                  If not None, a File-like object containing the
                  previously-checkpointed predictions for setting the initial
                  contents of this PredictionOutputStream.  Will be copied
                  before returning, if needed.
    """
    #assert len(fields) > 0

    self.__experimentDir = experimentDir

    # opfutils.InferenceType kind value
    self.__inferenceType = inferenceType

    # A tuple of nupic.data.fieldmeta.FieldMetaInfo
    self.__inputFieldsMeta = tuple(copy.deepcopy(fields))
    self.__numInputFields = len(self.__inputFieldsMeta)
    self.__label = label
    if metricNames is not None:
      metricNames.sort()
    self.__metricNames = metricNames

    # Define our output field meta info
    self.__outputFieldsMeta = []

    # The list of inputs that we include in the prediction output
    self._rawInputNames = []

    # Output dataset
    self.__datasetPath = None
    self.__dataset = None

    # Save checkpoint data until we're ready to create the output dataset
    self.__checkpointCache = None
    if checkpointSource is not None:
      checkpointSource.seek(0)
      self.__checkpointCache = StringIO.StringIO()
      shutil.copyfileobj(checkpointSource, self.__checkpointCache)

    return


  def __openDatafile(self, modelResult):
    """Open the data file and write the header row"""

    # Write reset bit
    resetFieldMeta = FieldMetaInfo(
      name="reset",
      type=FieldMetaType.integer,
      special = FieldMetaSpecial.reset)

    self.__outputFieldsMeta.append(resetFieldMeta)


    # -----------------------------------------------------------------------
    # Write each of the raw inputs that go into the encoders
    rawInput = modelResult.rawInput
    rawFields = rawInput.keys()
    rawFields.sort()
    for field in rawFields:
      if field.startswith('_') or field == 'reset':
        continue
      value = rawInput[field]
      meta = FieldMetaInfo(name=field, type=FieldMetaType.string,
                           special=FieldMetaSpecial.none)
      self.__outputFieldsMeta.append(meta)
      self._rawInputNames.append(field)


    # -----------------------------------------------------------------------
    # Handle each of the inference elements
    for inferenceElement, value in modelResult.inferences.iteritems():
      inferenceLabel = InferenceElement.getLabel(inferenceElement)

      # TODO: Right now we assume list inferences are associated with
      # The input field metadata
      if type(value) in (list, tuple):
        # Append input and prediction field meta-info
        self.__outputFieldsMeta.extend(self.__getListMetaInfo(inferenceElement))

      elif isinstance(value, dict):
          self.__outputFieldsMeta.extend(self.__getDictMetaInfo(inferenceElement,
                                                                value))
      else:

        if InferenceElement.getInputElement(inferenceElement):
          self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel+".actual",
                type=FieldMetaType.string, special = ''))
        self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel,
                type=FieldMetaType.string, special = ''))

    if self.__metricNames:
      for metricName in self.__metricNames:
        metricField = FieldMetaInfo(
          name = metricName,
          type = FieldMetaType.float,
          special = FieldMetaSpecial.none)

        self.__outputFieldsMeta.append(metricField)

    # Create the inference directory for our experiment
    inferenceDir = _FileUtils.createExperimentInferenceDir(self.__experimentDir)

    # Consctruct the prediction dataset file path
    filename = (self.__label + "." +
               opfutils.InferenceType.getLabel(self.__inferenceType) +
               ".predictionLog.csv")
    self.__datasetPath = os.path.join(inferenceDir, filename)

    # Create the output dataset
    print "OPENING OUTPUT FOR PREDICTION WRITER AT: {0!r}".format(self.__datasetPath)
    print "Prediction field-meta: {0!r}".format([tuple(i) for i in self.__outputFieldsMeta])
    self.__dataset = FileRecordStream(streamID=self.__datasetPath, write=True,
                                     fields=self.__outputFieldsMeta)

    # Copy data from checkpoint cache
    if self.__checkpointCache is not None:
      self.__checkpointCache.seek(0)

      reader = csv.reader(self.__checkpointCache, dialect='excel')

      # Skip header row
      try:
        header = reader.next()
      except StopIteration:
        print "Empty record checkpoint initializer for {0!r}".format(self.__datasetPath)
      else:
        assert tuple(self.__dataset.getFieldNames()) == tuple(header), \
          "dataset.getFieldNames(): {0!r}; predictionCheckpointFieldNames: {1!r}".format(
          tuple(self.__dataset.getFieldNames()), tuple(header))

      # Copy the rows from checkpoint
      numRowsCopied = 0
      while True:
        try:
          row = reader.next()
        except StopIteration:
          break

        #print "DEBUG: restoring row from checkpoint: %r" % (row,)

        self.__dataset.appendRecord(row)
        numRowsCopied += 1

      self.__dataset.flush()

      print "Restored {0:d} rows from checkpoint for {1!r}".format(
        numRowsCopied, self.__datasetPath)

      # Dispose of our checkpoint cache
      self.__checkpointCache.close()
      self.__checkpointCache = None

    return


  def setLoggedMetrics(self, metricNames):
    """ Tell the writer which metrics should be written

    Parameters:
    -----------------------------------------------------------------------
    metricsNames: A list of metric lables to be written
    """
    if metricNames is None:
      self.__metricNames = set([])
    else:
      self.__metricNames = set(metricNames)


  def close(self):
    """ [virtual method override] Closes the writer (e.g., close the underlying
    file)
    """

    if self.__dataset:
      self.__dataset.close()
    self.__dataset = None

    return


  def __getListMetaInfo(self, inferenceElement):
    """ Get field metadata information for inferences that are of list type
    TODO: Right now we assume list inferences are associated with the input field
    metadata
    """
    fieldMetaInfo = []
    inferenceLabel = InferenceElement.getLabel(inferenceElement)

    for inputFieldMeta in self.__inputFieldsMeta:
      if InferenceElement.getInputElement(inferenceElement):
        outputFieldMeta = FieldMetaInfo(
          name=inputFieldMeta.name + ".actual",
          type=inputFieldMeta.type,
          special=inputFieldMeta.special
        )

      predictionField = FieldMetaInfo(
        name=inputFieldMeta.name + "." + inferenceLabel,
        type=inputFieldMeta.type,
        special=inputFieldMeta.special
      )

      fieldMetaInfo.append(outputFieldMeta)
      fieldMetaInfo.append(predictionField)

    return fieldMetaInfo


  def __getDictMetaInfo(self, inferenceElement, inferenceDict):
    """Get field metadate information for inferences that are of dict type"""
    fieldMetaInfo = []
    inferenceLabel = InferenceElement.getLabel(inferenceElement)

    if InferenceElement.getInputElement(inferenceElement):
      fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+".actual",
                                         type=FieldMetaType.string,
                                         special = ''))

    keys = sorted(inferenceDict.keys())
    for key in keys:
      fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+"."+str(key),
                                         type=FieldMetaType.string,
                                         special=''))


    return fieldMetaInfo


  def append(self, modelResult):
    """ [virtual method override] Emits a single prediction as input versus
    predicted.

    modelResult:    An opfutils.ModelResult object that contains the model input
                    and output for the current timestep.
    """

    #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,)

    # If there are no inferences, don't write anything
    inferences = modelResult.inferences
    hasInferences = False
    if inferences is not None:
      for value in inferences.itervalues():
        hasInferences = hasInferences or (value is not None)

    if not hasInferences:
      return

    if self.__dataset is None:
      self.__openDatafile(modelResult)

    inputData = modelResult.sensorInput

    sequenceReset = int(bool(inputData.sequenceReset))
    outputRow = [sequenceReset]


    # -----------------------------------------------------------------------
    # Write out the raw inputs
    rawInput = modelResult.rawInput
    for field in self._rawInputNames:
      outputRow.append(str(rawInput[field]))

    # -----------------------------------------------------------------------
    # Write out the inference element info
    for inferenceElement, outputVal in inferences.iteritems():
      inputElement = InferenceElement.getInputElement(inferenceElement)
      if inputElement:
        inputVal = getattr(inputData, inputElement)
      else:
        inputVal = None

      if type(outputVal) in (list, tuple):
        assert type(inputVal) in (list, tuple, None)

        for iv, ov in zip(inputVal, outputVal):
          # Write actual
          outputRow.append(str(iv))

          # Write inferred
          outputRow.append(str(ov))
      elif isinstance(outputVal, dict):
        if inputVal is not None:
          # If we have a predicted field, include only that in the actuals
          if modelResult.predictedFieldName is not None:
            outputRow.append(str(inputVal[modelResult.predictedFieldName]))
          else:
            outputRow.append(str(inputVal))
        for key in sorted(outputVal.keys()):
          outputRow.append(str(outputVal[key]))
      else:
        if inputVal is not None:
          outputRow.append(str(inputVal))
        outputRow.append(str(outputVal))

    metrics = modelResult.metrics
    for metricName in self.__metricNames:
      outputRow.append(metrics.get(metricName, 0.0))

    #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,)

    self.__dataset.appendRecord(outputRow)

    self.__dataset.flush()

    return

  def checkpoint(self, checkpointSink, maxRows):
    """ [virtual method override] Save a checkpoint of the prediction output
    stream. The checkpoint comprises up to maxRows of the most recent inference
    records.

    Parameters:
    ----------------------------------------------------------------------
    checkpointSink:     A File-like object where predictions checkpoint data, if
                        any, will be stored.
    maxRows:            Maximum number of most recent inference rows
                        to checkpoint.
    """

    checkpointSink.truncate()

    if self.__dataset is None:
      if self.__checkpointCache is not None:
        self.__checkpointCache.seek(0)
        shutil.copyfileobj(self.__checkpointCache, checkpointSink)
        checkpointSink.flush()
        return
      else:
        # Nothing to checkpoint
        return

    self.__dataset.flush()
    totalDataRows = self.__dataset.getDataRowCount()

    if totalDataRows == 0:
      # Nothing to checkpoint
      return

    # Open reader of prediction file (suppress missingValues conversion)
    reader = FileRecordStream(self.__datasetPath, missingValues=[])

    # Create CSV writer for writing checkpoint rows
    writer = csv.writer(checkpointSink)

    # Write the header row to checkpoint sink -- just field names
    writer.writerow(reader.getFieldNames())

    # Determine number of rows to checkpoint
    numToWrite = min(maxRows, totalDataRows)

    # Skip initial rows to get to the rows that we actually need to checkpoint
    numRowsToSkip = totalDataRows - numToWrite
    for i in xrange(numRowsToSkip):
      reader.next()

    # Write the data rows to checkpoint sink
    numWritten = 0
    while True:
      row = reader.getNextRecord()
      if row is None:
        break;

      row =  [str(element) for element in row]

      #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,)

      writer.writerow(row)

      numWritten +=1

    assert numWritten == numToWrite, \
      "numWritten ({0!s}) != numToWrite ({1!s})".format(numWritten, numToWrite)


    checkpointSink.flush()

    return