Beispiel #1
0
def run(numRecords):
  '''
  Run the Hot Gym example.
  '''

  # Create a data source for the network.
  dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
  numRecords = min(numRecords, dataSource.getDataRowCount())
  network = createNetwork(dataSource)

  network.regions["sensor"].getSelf().predictedField = "price"

  # Set predicted field
  network.regions["sensor"].setParameter("predictedField", "price")

  # Enable learning for all regions.
  network.regions["SP"].setParameter("learningMode", 1)
  network.regions["TM"].setParameter("learningMode", 1)
  network.regions["classifier"].setParameter("learningMode", 1)

  # Enable inference for all regions.
  network.regions["SP"].setParameter("inferenceMode", 1)
  network.regions["TM"].setParameter("inferenceMode", 1)
  network.regions["classifier"].setParameter("inferenceMode", 1)

  results = []
  N = _RUN_EPOCH  # Run the network, N iterations at a time.
  graph = Graph({
    'title': 'Bitcoin Prediction',
    'y_label': 'price',
    'y_lim': 'auto',
    'prediction_num': 2,
    'line_labels': ['1-step', '5-step']
  })
  for iteration in range(0, numRecords, N):
    if iteration % _RUN_INTERVAL == 0:
      network.run(N)

      price = network.regions["sensor"].getOutputData("sourceOut")[0]

      predictionResults = getPredictionResults(network, "classifier")
      oneStep = predictionResults[1]["predictedValue"]
      oneStepConfidence = predictionResults[1]["predictionConfidence"]
      fiveStep = predictionResults[5]["predictedValue"]
      fiveStepConfidence = predictionResults[5]["predictionConfidence"]

      result = (oneStep, oneStepConfidence * 100,
                fiveStep, fiveStepConfidence * 100)
      
      if iteration % _PRINT_INTERVAL == 0:
        print "iteration: {}".format(iteration)
        print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(*result)
      
      results.append(result)

      graph.write(price, [oneStep, fiveStep])
  
  graph.close()

  return results
def runDemo():
    dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
    numRecords = dataSource.getDataRowCount()
    print "Creating network"
    network = createNetwork(dataSource)
    outputPath = os.path.join(os.path.dirname(__file__), _OUTPUT_FILE_NAME)
    with open(outputPath, "w") as outputFile:
        writer = csv.writer(outputFile)
        print "Running network"
        print "Writing output to: %s" % outputPath
        runNetwork(network, numRecords, writer)
    print "Hierarchy demo finished"
def runDemo():
  dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
  numRecords = dataSource.getDataRowCount()
  print "Creating network"
  network = createNetwork(dataSource)
  outputPath = os.path.join(os.path.dirname(__file__), _OUTPUT_FILE_NAME)
  with open(outputPath, "w") as outputFile:
    writer = csv.writer(outputFile)
    print "Running network"
    print "Writing output to: %s" % outputPath
    runNetwork(network, numRecords, writer)
  print "Hierarchy demo finished"
Beispiel #4
0
def run(numRecords):
    '''
  Run the Hot Gym example.
  '''

    # Create a data source for the network.
    dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
    numRecords = min(numRecords, dataSource.getDataRowCount())
    network = createNetwork(dataSource)

    network.regions["sensor"].getSelf().predictedField = "sine"

    # Set predicted field
    network.regions["sensor"].setParameter("predictedField", "sine")

    # Enable learning for all regions.
    network.regions["SP"].setParameter("learningMode", 1)
    network.regions["TM"].setParameter("learningMode", 1)
    network.regions["classifier"].setParameter("learningMode", 1)

    # Enable inference for all regions.
    network.regions["SP"].setParameter("inferenceMode", 1)
    network.regions["TM"].setParameter("inferenceMode", 1)
    network.regions["classifier"].setParameter("inferenceMode", 1)

    results = []
    N = 1  # Run the network, N iterations at a time.
    output = nupic_output.NuPICPlotOutput("Sine", show_anomaly_score=True)
    for iteration in range(0, numRecords, N):
        network.run(N)

        sine = network.regions["sensor"].getOutputData("sourceOut")[0]

        predictionResults = getPredictionResults(network, "classifier")
        oneStep = predictionResults[1]["predictedValue"]
        oneStepConfidence = predictionResults[1]["predictionConfidence"]
        fiveStep = predictionResults[10]["predictedValue"]
        fiveStepConfidence = predictionResults[10]["predictionConfidence"]

        result = (oneStep, oneStepConfidence * 100, fiveStep,
                  fiveStepConfidence * 100)
        print "1-step: {:16} ({:4.4}%)\t 10-step: {:16} ({:4.4}%)".format(
            *result)
        results.append(result)

        output.write(sine, oneStep, 0)

    output.close()

    return results
def runHotgym(numRecords):
    """Run the Hot Gym example."""

    # Create a data source for the network.
    dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
    numRecords = min(numRecords, dataSource.getDataRowCount())
    network = createNetwork(dataSource)

    # Set predicted field index. It needs to be the same index as the data source.
    predictedIdx = dataSource.getFieldNames().index("consumption")
    network.regions["sensor"].setParameter("predictedFieldIdx", predictedIdx)

    # Enable learning for all regions.
    network.regions["SP"].setParameter("learningMode", 1)
    network.regions["TM"].setParameter("learningMode", 1)
    network.regions["classifier"].setParameter("learningMode", 1)

    # Enable inference for all regions.
    network.regions["SP"].setParameter("inferenceMode", 1)
    network.regions["TM"].setParameter("inferenceMode", 1)
    network.regions["classifier"].setParameter("inferenceMode", 1)

    results = []
    N = 1  # Run the network, N iterations at a time.
    for iteration in range(0, numRecords, N):
        network.run(N)

        predictionResults = getPredictionResults(network, "classifier")
        oneStep = predictionResults[1]["predictedValue"]
        oneStepConfidence = predictionResults[1]["predictionConfidence"]
        fiveStep = predictionResults[5]["predictedValue"]
        fiveStepConfidence = predictionResults[5]["predictionConfidence"]

        result = (oneStep, oneStepConfidence * 100, fiveStep,
                  fiveStepConfidence * 100)
        print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(
            *result)
        results.append(result)

    return results
def runHotgym(numRecords):
  """Run the Hot Gym example."""

  # Create a data source for the network.
  dataSource = FileRecordStream(streamID=_INPUT_FILE_PATH)
  numRecords = min(numRecords, dataSource.getDataRowCount())
  network = createNetwork(dataSource)

  # Set predicted field index. It needs to be the same index as the data source.
  predictedIdx = dataSource.getFieldNames().index("consumption")
  network.regions["sensor"].setParameter("predictedFieldIdx", predictedIdx)

  # Enable learning for all regions.
  network.regions["SP"].setParameter("learningMode", 1)
  network.regions["TM"].setParameter("learningMode", 1)
  network.regions["classifier"].setParameter("learningMode", 1)

  # Enable inference for all regions.
  network.regions["SP"].setParameter("inferenceMode", 1)
  network.regions["TM"].setParameter("inferenceMode", 1)
  network.regions["classifier"].setParameter("inferenceMode", 1)

  results = []
  N = 1  # Run the network, N iterations at a time.
  for iteration in range(0, numRecords, N):
    network.run(N)

    predictionResults = getPredictionResults(network, "classifier")
    oneStep = predictionResults[1]["predictedValue"]
    oneStepConfidence = predictionResults[1]["predictionConfidence"]
    fiveStep = predictionResults[5]["predictedValue"]
    fiveStepConfidence = predictionResults[5]["predictionConfidence"]

    result = (oneStep, oneStepConfidence * 100,
              fiveStep, fiveStepConfidence * 100)
    print "1-step: {:16} ({:4.4}%)\t 5-step: {:16} ({:4.4}%)".format(*result)
    results.append(result)

  return results
Beispiel #7
0
class _BasicPredictionWriter(PredictionWriterIface):
    """ This class defines the basic (file-based) implementation of
  PredictionWriterIface, whose instances are returned by
  BasicPredictionWriterFactory
  """
    def __init__(self,
                 experimentDir,
                 label,
                 inferenceType,
                 fields,
                 metricNames=None,
                 checkpointSource=None):
        """ Constructor

    experimentDir:
                  experiment directory path that contains description.py

    label:        A label string to incorporate into the filename.


    inferenceElements:


    inferenceType:
                  An constant from opfutils.InferenceType for the
                  requested prediction writer

    fields:       a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo
                  representing fields that will be emitted to this prediction
                  writer

    metricNames:  OPTIONAL - A list of metric names that well be emiited by this
                  prediction writer

    checkpointSource:
                  If not None, a File-like object containing the
                  previously-checkpointed predictions for setting the initial
                  contents of this PredictionOutputStream.  Will be copied
                  before returning, if needed.
    """
        #assert len(fields) > 0

        self.__experimentDir = experimentDir

        # opfutils.InferenceType kind value
        self.__inferenceType = inferenceType

        # A tuple of nupic.data.fieldmeta.FieldMetaInfo
        self.__inputFieldsMeta = tuple(copy.deepcopy(fields))
        self.__numInputFields = len(self.__inputFieldsMeta)
        self.__label = label
        if metricNames is not None:
            metricNames.sort()
        self.__metricNames = metricNames

        # Define our output field meta info
        self.__outputFieldsMeta = []

        # The list of inputs that we include in the prediction output
        self._rawInputNames = []

        # Output dataset
        self.__datasetPath = None
        self.__dataset = None

        # Save checkpoint data until we're ready to create the output dataset
        self.__checkpointCache = None
        if checkpointSource is not None:
            checkpointSource.seek(0)
            self.__checkpointCache = StringIO.StringIO()
            shutil.copyfileobj(checkpointSource, self.__checkpointCache)

        return

    ############################################################################
    def __openDatafile(self, modelResult):
        """Open the data file and write the header row"""

        # Write reset bit
        resetFieldMeta = FieldMetaInfo(name="reset",
                                       type=FieldMetaType.integer,
                                       special=FieldMetaSpecial.reset)

        self.__outputFieldsMeta.append(resetFieldMeta)

        # -----------------------------------------------------------------------
        # Write each of the raw inputs that go into the encoders
        rawInput = modelResult.rawInput
        rawFields = rawInput.keys()
        rawFields.sort()
        for field in rawFields:
            if field.startswith('_') or field == 'reset':
                continue
            value = rawInput[field]
            meta = FieldMetaInfo(name=field,
                                 type=FieldMetaType.string,
                                 special=FieldMetaSpecial.none)
            self.__outputFieldsMeta.append(meta)
            self._rawInputNames.append(field)

        # -----------------------------------------------------------------------
        # Handle each of the inference elements
        for inferenceElement, value in modelResult.inferences.iteritems():
            inferenceLabel = InferenceElement.getLabel(inferenceElement)

            # TODO: Right now we assume list inferences are associated with
            # The input field metadata
            if type(value) in (list, tuple):
                # Append input and prediction field meta-info
                self.__outputFieldsMeta.extend(
                    self.__getListMetaInfo(inferenceElement))

            elif isinstance(value, dict):
                self.__outputFieldsMeta.extend(
                    self.__getDictMetaInfo(inferenceElement, value))
            else:

                if InferenceElement.getInputElement(inferenceElement):
                    self.__outputFieldsMeta.append(
                        FieldMetaInfo(name=inferenceLabel + ".actual",
                                      type=FieldMetaType.string,
                                      special=''))
                self.__outputFieldsMeta.append(
                    FieldMetaInfo(name=inferenceLabel,
                                  type=FieldMetaType.string,
                                  special=''))

        if self.__metricNames:
            for metricName in self.__metricNames:
                metricField = FieldMetaInfo(name=metricName,
                                            type=FieldMetaType.float,
                                            special=FieldMetaSpecial.none)

                self.__outputFieldsMeta.append(metricField)

        # Create the inference directory for our experiment
        inferenceDir = _FileUtils.createExperimentInferenceDir(
            self.__experimentDir)

        # Consctruct the prediction dataset file path
        filename = (self.__label + "." +
                    opfutils.InferenceType.getLabel(self.__inferenceType) +
                    ".predictionLog.csv")
        self.__datasetPath = os.path.join(inferenceDir, filename)

        # Create the output dataset
        print "OPENING OUTPUT FOR PREDICTION WRITER AT: %r" % self.__datasetPath
        print "Prediction field-meta: %r" % (
            [tuple(i) for i in self.__outputFieldsMeta], )
        self.__dataset = FileRecordStream(streamID=self.__datasetPath,
                                          write=True,
                                          fields=self.__outputFieldsMeta)

        # Copy data from checkpoint cache
        if self.__checkpointCache is not None:
            self.__checkpointCache.seek(0)

            reader = csv.reader(self.__checkpointCache, dialect='excel')

            # Skip header row
            try:
                header = reader.next()
            except StopIteration:
                print "Empty record checkpoint initializer for %r" % (
                    self.__datasetPath, )
            else:
                assert tuple(self.__dataset.getFieldNames()) == tuple(header), \
                  "dataset.getFieldNames(): %r; predictionCheckpointFieldNames: %r" % (
                  tuple(self.__dataset.getFieldNames()), tuple(header))

            # Copy the rows from checkpoint
            numRowsCopied = 0
            while True:
                try:
                    row = reader.next()
                except StopIteration:
                    break

                #print "DEBUG: restoring row from checkpoint: %r" % (row,)

                self.__dataset.appendRecord(row)
                numRowsCopied += 1

            self.__dataset.flush()

            print "Restored %d rows from checkpoint for %r" % (
                numRowsCopied, self.__datasetPath)

            # Dispose of our checkpoint cache
            self.__checkpointCache.close()
            self.__checkpointCache = None

        return

    ############################################################################
    def setLoggedMetrics(self, metricNames):
        """ Tell the writer which metrics should be written

    Parameters:
    -----------------------------------------------------------------------
    metricsNames: A list of metric lables to be written
    """
        if metricNames is None:
            self.__metricNames = set([])
        else:
            self.__metricNames = set(metricNames)

    ############################################################################
    def close(self):
        """ [virtual method override] Closes the writer (e.g., close the underlying
    file)
    """

        if self.__dataset:
            self.__dataset.close()
        self.__dataset = None

        return

    ############################################################################
    def __getListMetaInfo(self, inferenceElement):
        """ Get field metadata information for inferences that are of list type
    TODO: Right now we assume list inferences are associated with the input field
    metadata
    """
        fieldMetaInfo = []
        inferenceLabel = InferenceElement.getLabel(inferenceElement)

        for inputFieldMeta in self.__inputFieldsMeta:
            if InferenceElement.getInputElement(inferenceElement):
                outputFieldMeta = FieldMetaInfo(name=inputFieldMeta.name +
                                                ".actual",
                                                type=inputFieldMeta.type,
                                                special=inputFieldMeta.special)

            predictionField = FieldMetaInfo(name=inputFieldMeta.name + "." +
                                            inferenceLabel,
                                            type=inputFieldMeta.type,
                                            special=inputFieldMeta.special)

            fieldMetaInfo.append(outputFieldMeta)
            fieldMetaInfo.append(predictionField)

        return fieldMetaInfo

    ############################################################################
    def __getDictMetaInfo(self, inferenceElement, inferenceDict):
        """Get field metadate information for inferences that are of dict type"""
        fieldMetaInfo = []
        inferenceLabel = InferenceElement.getLabel(inferenceElement)

        if InferenceElement.getInputElement(inferenceElement):
            fieldMetaInfo.append(
                FieldMetaInfo(name=inferenceLabel + ".actual",
                              type=FieldMetaType.string,
                              special=''))

        keys = sorted(inferenceDict.keys())
        for key in keys:
            fieldMetaInfo.append(
                FieldMetaInfo(name=inferenceLabel + "." + str(key),
                              type=FieldMetaType.string,
                              special=''))

        return fieldMetaInfo

    ############################################################################
    def append(self, modelResult):
        """ [virtual method override] Emits a single prediction as input versus
    predicted.

    modelResult:    An opfutils.ModelResult object that contains the model input
                    and output for the current timestep.
    """

        #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,)

        # If there are no inferences, don't write anything
        inferences = modelResult.inferences
        hasInferences = False
        if inferences is not None:
            for value in inferences.itervalues():
                hasInferences = hasInferences or (value is not None)

        if not hasInferences:
            return

        if self.__dataset is None:
            self.__openDatafile(modelResult)

        inputData = modelResult.sensorInput

        sequenceReset = int(bool(inputData.sequenceReset))
        outputRow = [sequenceReset]

        # -----------------------------------------------------------------------
        # Write out the raw inputs
        rawInput = modelResult.rawInput
        for field in self._rawInputNames:
            outputRow.append(str(rawInput[field]))

        # -----------------------------------------------------------------------
        # Write out the inference element info
        for inferenceElement, outputVal in inferences.iteritems():
            inputElement = InferenceElement.getInputElement(inferenceElement)
            if inputElement:
                inputVal = getattr(inputData, inputElement)
            else:
                inputVal = None

            if type(outputVal) in (list, tuple):
                assert type(inputVal) in (list, tuple, None)

                for iv, ov in zip(inputVal, outputVal):
                    # Write actual
                    outputRow.append(str(iv))

                    # Write inferred
                    outputRow.append(str(ov))
            elif isinstance(outputVal, dict):
                if inputVal is not None:
                    # If we have a predicted field, include only that in the actuals
                    if modelResult.predictedFieldIdx is not None:
                        outputRow.append(
                            str(inputVal[modelResult.predictedFieldIdx]))
                    else:
                        outputRow.append(str(inputVal))
                for key in sorted(outputVal.keys()):
                    outputRow.append(str(outputVal[key]))
            else:
                if inputVal is not None:
                    outputRow.append(str(inputVal))
                outputRow.append(str(outputVal))

        metrics = modelResult.metrics
        for metricName in self.__metricNames:
            outputRow.append(metrics.get(metricName, 0.0))

        #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,)

        self.__dataset.appendRecord(outputRow)

        self.__dataset.flush()

        return

    def checkpoint(self, checkpointSink, maxRows):
        """ [virtual method override] Save a checkpoint of the prediction output
    stream. The checkpoint comprises up to maxRows of the most recent inference
    records.

    Parameters:
    ----------------------------------------------------------------------
    checkpointSink:     A File-like object where predictions checkpoint data, if
                        any, will be stored.
    maxRows:            Maximum number of most recent inference rows
                        to checkpoint.
    """

        checkpointSink.truncate()

        if self.__dataset is None:
            if self.__checkpointCache is not None:
                self.__checkpointCache.seek(0)
                shutil.copyfileobj(self.__checkpointCache, checkpointSink)
                checkpointSink.flush()
                return
            else:
                # Nothing to checkpoint
                return

        self.__dataset.flush()
        totalDataRows = self.__dataset.getDataRowCount()

        if totalDataRows == 0:
            # Nothing to checkpoint
            return

        # Open reader of prediction file (suppress missingValues conversion)
        reader = FileRecordStream(self.__datasetPath, missingValues=[])

        # Create CSV writer for writing checkpoint rows
        writer = csv.writer(checkpointSink)

        # Write the header row to checkpoint sink -- just field names
        writer.writerow(reader.getFieldNames())

        # Determine number of rows to checkpoint
        numToWrite = min(maxRows, totalDataRows)

        # Skip initial rows to get to the rows that we actually need to checkpoint
        numRowsToSkip = totalDataRows - numToWrite
        for i in xrange(numRowsToSkip):
            reader.next()

        # Write the data rows to checkpoint sink
        numWritten = 0
        while True:
            row = reader.getNextRecord()
            if row is None:
                break

            row = [str(element) for element in row]

            #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,)

            writer.writerow(row)

            numWritten += 1

        assert numWritten == numToWrite, \
          "numWritten (%s) != numToWrite (%s)" % (numWritten, numToWrite)

        checkpointSink.flush()

        return
Beispiel #8
0
class _BasicPredictionWriter(PredictionWriterIface):
  """ This class defines the basic (file-based) implementation of
  PredictionWriterIface, whose instances are returned by
  BasicPredictionWriterFactory
  """
  def __init__(self, experimentDir, label, inferenceType,
               fields, metricNames=None, checkpointSource=None):
    """ Constructor

    experimentDir:
                  experiment directory path that contains description.py

    label:        A label string to incorporate into the filename.


    inferenceElements:


    inferenceType:
                  An constant from opfutils.InferenceType for the
                  requested prediction writer

    fields:       a non-empty sequence of nupic.data.fieldmeta.FieldMetaInfo
                  representing fields that will be emitted to this prediction
                  writer

    metricNames:  OPTIONAL - A list of metric names that well be emiited by this
                  prediction writer

    checkpointSource:
                  If not None, a File-like object containing the
                  previously-checkpointed predictions for setting the initial
                  contents of this PredictionOutputStream.  Will be copied
                  before returning, if needed.
    """
    #assert len(fields) > 0

    self.__experimentDir = experimentDir

    # opfutils.InferenceType kind value
    self.__inferenceType = inferenceType

    # A tuple of nupic.data.fieldmeta.FieldMetaInfo
    self.__inputFieldsMeta = tuple(copy.deepcopy(fields))
    self.__numInputFields = len(self.__inputFieldsMeta)
    self.__label = label
    if metricNames is not None:
      metricNames.sort()
    self.__metricNames = metricNames

    # Define our output field meta info
    self.__outputFieldsMeta = []

    # The list of inputs that we include in the prediction output
    self._rawInputNames = []

    # Output dataset
    self.__datasetPath = None
    self.__dataset = None

    # Save checkpoint data until we're ready to create the output dataset
    self.__checkpointCache = None
    if checkpointSource is not None:
      checkpointSource.seek(0)
      self.__checkpointCache = StringIO.StringIO()
      shutil.copyfileobj(checkpointSource, self.__checkpointCache)

    return


  def __openDatafile(self, modelResult):
    """Open the data file and write the header row"""

    # Write reset bit
    resetFieldMeta = FieldMetaInfo(
      name="reset",
      type=FieldMetaType.integer,
      special = FieldMetaSpecial.reset)

    self.__outputFieldsMeta.append(resetFieldMeta)


    # -----------------------------------------------------------------------
    # Write each of the raw inputs that go into the encoders
    rawInput = modelResult.rawInput
    rawFields = rawInput.keys()
    rawFields.sort()
    for field in rawFields:
      if field.startswith('_') or field == 'reset':
        continue
      value = rawInput[field]
      meta = FieldMetaInfo(name=field, type=FieldMetaType.string,
                           special=FieldMetaSpecial.none)
      self.__outputFieldsMeta.append(meta)
      self._rawInputNames.append(field)


    # -----------------------------------------------------------------------
    # Handle each of the inference elements
    for inferenceElement, value in modelResult.inferences.iteritems():
      inferenceLabel = InferenceElement.getLabel(inferenceElement)

      # TODO: Right now we assume list inferences are associated with
      # The input field metadata
      if type(value) in (list, tuple):
        # Append input and prediction field meta-info
        self.__outputFieldsMeta.extend(self.__getListMetaInfo(inferenceElement))

      elif isinstance(value, dict):
          self.__outputFieldsMeta.extend(self.__getDictMetaInfo(inferenceElement,
                                                                value))
      else:

        if InferenceElement.getInputElement(inferenceElement):
          self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel+".actual",
                type=FieldMetaType.string, special = ''))
        self.__outputFieldsMeta.append(FieldMetaInfo(name=inferenceLabel,
                type=FieldMetaType.string, special = ''))

    if self.__metricNames:
      for metricName in self.__metricNames:
        metricField = FieldMetaInfo(
          name = metricName,
          type = FieldMetaType.float,
          special = FieldMetaSpecial.none)

        self.__outputFieldsMeta.append(metricField)

    # Create the inference directory for our experiment
    inferenceDir = _FileUtils.createExperimentInferenceDir(self.__experimentDir)

    # Consctruct the prediction dataset file path
    filename = (self.__label + "." +
               opfutils.InferenceType.getLabel(self.__inferenceType) +
               ".predictionLog.csv")
    self.__datasetPath = os.path.join(inferenceDir, filename)

    # Create the output dataset
    print "OPENING OUTPUT FOR PREDICTION WRITER AT: {0!r}".format(self.__datasetPath)
    print "Prediction field-meta: {0!r}".format([tuple(i) for i in self.__outputFieldsMeta])
    self.__dataset = FileRecordStream(streamID=self.__datasetPath, write=True,
                                     fields=self.__outputFieldsMeta)

    # Copy data from checkpoint cache
    if self.__checkpointCache is not None:
      self.__checkpointCache.seek(0)

      reader = csv.reader(self.__checkpointCache, dialect='excel')

      # Skip header row
      try:
        header = reader.next()
      except StopIteration:
        print "Empty record checkpoint initializer for {0!r}".format(self.__datasetPath)
      else:
        assert tuple(self.__dataset.getFieldNames()) == tuple(header), \
          "dataset.getFieldNames(): {0!r}; predictionCheckpointFieldNames: {1!r}".format(
          tuple(self.__dataset.getFieldNames()), tuple(header))

      # Copy the rows from checkpoint
      numRowsCopied = 0
      while True:
        try:
          row = reader.next()
        except StopIteration:
          break

        #print "DEBUG: restoring row from checkpoint: %r" % (row,)

        self.__dataset.appendRecord(row)
        numRowsCopied += 1

      self.__dataset.flush()

      print "Restored {0:d} rows from checkpoint for {1!r}".format(
        numRowsCopied, self.__datasetPath)

      # Dispose of our checkpoint cache
      self.__checkpointCache.close()
      self.__checkpointCache = None

    return


  def setLoggedMetrics(self, metricNames):
    """ Tell the writer which metrics should be written

    Parameters:
    -----------------------------------------------------------------------
    metricsNames: A list of metric lables to be written
    """
    if metricNames is None:
      self.__metricNames = set([])
    else:
      self.__metricNames = set(metricNames)


  def close(self):
    """ [virtual method override] Closes the writer (e.g., close the underlying
    file)
    """

    if self.__dataset:
      self.__dataset.close()
    self.__dataset = None

    return


  def __getListMetaInfo(self, inferenceElement):
    """ Get field metadata information for inferences that are of list type
    TODO: Right now we assume list inferences are associated with the input field
    metadata
    """
    fieldMetaInfo = []
    inferenceLabel = InferenceElement.getLabel(inferenceElement)

    for inputFieldMeta in self.__inputFieldsMeta:
      if InferenceElement.getInputElement(inferenceElement):
        outputFieldMeta = FieldMetaInfo(
          name=inputFieldMeta.name + ".actual",
          type=inputFieldMeta.type,
          special=inputFieldMeta.special
        )

      predictionField = FieldMetaInfo(
        name=inputFieldMeta.name + "." + inferenceLabel,
        type=inputFieldMeta.type,
        special=inputFieldMeta.special
      )

      fieldMetaInfo.append(outputFieldMeta)
      fieldMetaInfo.append(predictionField)

    return fieldMetaInfo


  def __getDictMetaInfo(self, inferenceElement, inferenceDict):
    """Get field metadate information for inferences that are of dict type"""
    fieldMetaInfo = []
    inferenceLabel = InferenceElement.getLabel(inferenceElement)

    if InferenceElement.getInputElement(inferenceElement):
      fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+".actual",
                                         type=FieldMetaType.string,
                                         special = ''))

    keys = sorted(inferenceDict.keys())
    for key in keys:
      fieldMetaInfo.append(FieldMetaInfo(name=inferenceLabel+"."+str(key),
                                         type=FieldMetaType.string,
                                         special=''))


    return fieldMetaInfo


  def append(self, modelResult):
    """ [virtual method override] Emits a single prediction as input versus
    predicted.

    modelResult:    An opfutils.ModelResult object that contains the model input
                    and output for the current timestep.
    """

    #print "DEBUG: _BasicPredictionWriter: writing modelResult: %r" % (modelResult,)

    # If there are no inferences, don't write anything
    inferences = modelResult.inferences
    hasInferences = False
    if inferences is not None:
      for value in inferences.itervalues():
        hasInferences = hasInferences or (value is not None)

    if not hasInferences:
      return

    if self.__dataset is None:
      self.__openDatafile(modelResult)

    inputData = modelResult.sensorInput

    sequenceReset = int(bool(inputData.sequenceReset))
    outputRow = [sequenceReset]


    # -----------------------------------------------------------------------
    # Write out the raw inputs
    rawInput = modelResult.rawInput
    for field in self._rawInputNames:
      outputRow.append(str(rawInput[field]))

    # -----------------------------------------------------------------------
    # Write out the inference element info
    for inferenceElement, outputVal in inferences.iteritems():
      inputElement = InferenceElement.getInputElement(inferenceElement)
      if inputElement:
        inputVal = getattr(inputData, inputElement)
      else:
        inputVal = None

      if type(outputVal) in (list, tuple):
        assert type(inputVal) in (list, tuple, None)

        for iv, ov in zip(inputVal, outputVal):
          # Write actual
          outputRow.append(str(iv))

          # Write inferred
          outputRow.append(str(ov))
      elif isinstance(outputVal, dict):
        if inputVal is not None:
          # If we have a predicted field, include only that in the actuals
          if modelResult.predictedFieldName is not None:
            outputRow.append(str(inputVal[modelResult.predictedFieldName]))
          else:
            outputRow.append(str(inputVal))
        for key in sorted(outputVal.keys()):
          outputRow.append(str(outputVal[key]))
      else:
        if inputVal is not None:
          outputRow.append(str(inputVal))
        outputRow.append(str(outputVal))

    metrics = modelResult.metrics
    for metricName in self.__metricNames:
      outputRow.append(metrics.get(metricName, 0.0))

    #print "DEBUG: _BasicPredictionWriter: writing outputRow: %r" % (outputRow,)

    self.__dataset.appendRecord(outputRow)

    self.__dataset.flush()

    return

  def checkpoint(self, checkpointSink, maxRows):
    """ [virtual method override] Save a checkpoint of the prediction output
    stream. The checkpoint comprises up to maxRows of the most recent inference
    records.

    Parameters:
    ----------------------------------------------------------------------
    checkpointSink:     A File-like object where predictions checkpoint data, if
                        any, will be stored.
    maxRows:            Maximum number of most recent inference rows
                        to checkpoint.
    """

    checkpointSink.truncate()

    if self.__dataset is None:
      if self.__checkpointCache is not None:
        self.__checkpointCache.seek(0)
        shutil.copyfileobj(self.__checkpointCache, checkpointSink)
        checkpointSink.flush()
        return
      else:
        # Nothing to checkpoint
        return

    self.__dataset.flush()
    totalDataRows = self.__dataset.getDataRowCount()

    if totalDataRows == 0:
      # Nothing to checkpoint
      return

    # Open reader of prediction file (suppress missingValues conversion)
    reader = FileRecordStream(self.__datasetPath, missingValues=[])

    # Create CSV writer for writing checkpoint rows
    writer = csv.writer(checkpointSink)

    # Write the header row to checkpoint sink -- just field names
    writer.writerow(reader.getFieldNames())

    # Determine number of rows to checkpoint
    numToWrite = min(maxRows, totalDataRows)

    # Skip initial rows to get to the rows that we actually need to checkpoint
    numRowsToSkip = totalDataRows - numToWrite
    for i in xrange(numRowsToSkip):
      reader.next()

    # Write the data rows to checkpoint sink
    numWritten = 0
    while True:
      row = reader.getNextRecord()
      if row is None:
        break;

      row =  [str(element) for element in row]

      #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,)

      writer.writerow(row)

      numWritten +=1

    assert numWritten == numToWrite, \
      "numWritten ({0!s}) != numToWrite ({1!s})".format(numWritten, numToWrite)


    checkpointSink.flush()

    return
  def testBasic(self):
    """Runs basic FileRecordStream tests."""
    filename = _getTempFileName()

    # Write a standard file
    fields = [('name', 'string', ''),
              ('timestamp', 'datetime', 'T'),
              ('integer', 'int', ''),
              ('real', 'float', ''),
              ('reset', 'int', 'R'),
              ('sid', 'string', 'S'),
              ('categoryField', 'int', 'C'),]
    fieldNames = ['name', 'timestamp', 'integer', 'real', 'reset', 'sid',
                  'categoryField']

    print 'Creating temp file:', filename

    s = FileRecordStream(streamID=filename, write=True, fields=fields)

    self.assertTrue(s.getDataRowCount() == 0)

    # Records
    records = (
      ['rec_1', datetime(day=1, month=3, year=2010), 5, 6.5, 1, 'seq-1', 10],
      ['rec_2', datetime(day=2, month=3, year=2010), 8, 7.5, 0, 'seq-1', 11],
      ['rec_3', datetime(day=3, month=3, year=2010), 12, 8.5, 0, 'seq-1', 12])

    self.assertTrue(s.getFields() == fields)
    self.assertTrue(s.getNextRecordIdx() == 0)

    print 'Writing records ...'
    for r in records:
      print list(r)
      s.appendRecord(list(r))

    self.assertTrue(s.getDataRowCount() == 3)

    recordsBatch = (
      ['rec_4', datetime(day=4, month=3, year=2010), 2, 9.5, 1, 'seq-1', 13],
      ['rec_5', datetime(day=5, month=3, year=2010), 6, 10.5, 0, 'seq-1', 14],
      ['rec_6', datetime(day=6, month=3, year=2010), 11, 11.5, 0, 'seq-1', 15])

    print 'Adding batch of records...'
    for rec in recordsBatch:
      print rec
    s.appendRecords(recordsBatch)
    self.assertTrue(s.getDataRowCount() == 6)

    s.close()

    # Read the standard file
    s = FileRecordStream(filename)
    self.assertTrue(s.getDataRowCount() == 6)
    self.assertTrue(s.getFieldNames() == fieldNames)

    # Note! this is the number of records read so far
    self.assertTrue(s.getNextRecordIdx() == 0)

    readStats = s.getStats()
    print 'Got stats:', readStats
    expectedStats = {
                     'max': [None, None, 12, 11.5, 1, None, 15],
                     'min': [None, None, 2, 6.5, 0, None, 10]
                    }
    self.assertTrue(readStats == expectedStats)

    readRecords = []
    print 'Reading records ...'
    while True:
      r = s.getNextRecord()
      print r
      if r is None:
        break

      readRecords.append(r)

    allRecords = records + recordsBatch
    for r1, r2 in zip(allRecords, readRecords):
      print 'Expected:', r1
      print 'Read    :', r2
      self.assertTrue(r1 == r2)

    s.close()
Beispiel #10
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--inputFile', '-d',
                      dest='inputFile',
                      type=str,
                      default=None,
                      help='Relative path to the input file.')

  parser.add_argument('--outputDir', '-o',
                      dest='outputDir',
                      type=str,
                      default='results/traces',
                      help='Relative path to the directory where the HTM '
                           'network traces will be saved.')

  parser.add_argument('--htmConfig', '-c',
                      dest='htmConfig',
                      type=str,
                      default='htm_network_config/6categories.json',
                      help='Relative path to the HTM network config JSON. '
                           'This option is ignored when the --model flag '
                           'is used.')

  parser.add_argument('--inputModel', '-im',
                      dest='inputModel',
                      type=str,
                      default=None,
                      help='Relative path of the serialized HTM model to be '
                           'loaded.')

  parser.add_argument('--outputModel', '-om',
                      dest='outputModel',
                      type=str,
                      default=None,
                      help='Relative path to serialize the HTM model.')

  parser.add_argument('--disableLearning', '-dl',
                      dest='disableLearning',
                      action='store_true',
                      default=False,
                      help='Use this flag to disable learning. If not '
                           'provided, then learning is enabled by default.')

  parser.add_argument('--batch', '-b',
                      dest='batchSize',
                      type=int,
                      default=1000,
                      help='Size of each batch being processed.')

  # Parse input options
  options = parser.parse_args()
  outputDir = options.outputDir
  networkConfigPath = options.htmConfig
  batchSize = options.batchSize

  # FIXME RES-464: until the serialization process is fixed, don't save the 
  # model. Run serially each phase (train -> validation -> test) 
  # TODO: Re-introduce these command line args when serialization is fixed.
  # inputFile = options.inputFile
  # inputModelPath = options.inputModel
  # outputModelPath = options.outputModel
  # learningMode = not options.disableLearning

  inputModelPath = None
  outputModelPath = None
  phases = ['train', 'val', 'test']
  inputDir = os.path.join('data', 'artificial')
  expName = 'binary_ampl=10.0_mean=0.0_noise=0.0'  # 'body_acc_x_inertial_signals'  
  network = None
  with open(networkConfigPath, 'r') as f:
    networkConfig = simplejson.load(f)
    for phase in phases:

      # Data source
      inputFile = os.path.join(inputDir, '%s_%s.csv' % (expName, phase))
      dataSource = FileRecordStream(streamID=inputFile)
      numRecords = dataSource.getDataRowCount()
      _LOGGER.debug('Number of records to be processed: %s' % numRecords)

      # Trace output info
      traceFileName = getTraceFileName(inputFile)
      traceFilePath = os.path.join(outputDir, '%s.csv' % traceFileName)
      if not os.path.exists(outputDir):
        os.makedirs(outputDir)

      # If there is not network, create one and train it.
      if not network:
        assert phase == 'train'  # Make sure that we create a network for 
        learningMode = True
        network = createNetwork(dataSource, networkConfig, inputModelPath)
      else:
        learningMode = False
        regionName = networkConfig["sensorRegionConfig"]["regionName"]
        sensorRegion = network.regions[regionName].getSelf()
        sensorRegion.dataSource = dataSource
        if 'train' in sensorRegion.dataSource._filename:
          raise ValueError('Learning mode should not be disabled for the '
                           'train set.')

      _LOGGER.debug('Running network with inputFile=%s '
                    'and learningMode=%s' % (inputFile, learningMode))

      # FIXME RES-464 (end)

      run(network,
          numRecords,
          traceFilePath,
          networkConfig,
          outputModelPath,
          batchSize,
          learningMode)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--inputFile',
                        '-d',
                        dest='inputFile',
                        type=str,
                        default=None,
                        help='Relative path to the input file.')

    parser.add_argument('--outputDir',
                        '-o',
                        dest='outputDir',
                        type=str,
                        default='results/traces',
                        help='Relative path to the directory where the HTM '
                        'network traces will be saved.')

    parser.add_argument('--htmConfig',
                        '-c',
                        dest='htmConfig',
                        type=str,
                        default='htm_network_config/6categories.json',
                        help='Relative path to the HTM network config JSON. '
                        'This option is ignored when the --model flag '
                        'is used.')

    parser.add_argument('--inputModel',
                        '-im',
                        dest='inputModel',
                        type=str,
                        default=None,
                        help='Relative path of the serialized HTM model to be '
                        'loaded.')

    parser.add_argument('--outputModel',
                        '-om',
                        dest='outputModel',
                        type=str,
                        default=None,
                        help='Relative path to serialize the HTM model.')

    parser.add_argument('--disableLearning',
                        '-dl',
                        dest='disableLearning',
                        action='store_true',
                        default=False,
                        help='Use this flag to disable learning. If not '
                        'provided, then learning is enabled by default.')

    parser.add_argument('--batch',
                        '-b',
                        dest='batchSize',
                        type=int,
                        default=1000,
                        help='Size of each batch being processed.')

    # Parse input options
    options = parser.parse_args()
    outputDir = options.outputDir
    networkConfigPath = options.htmConfig
    batchSize = options.batchSize

    # FIXME RES-464: until the serialization process is fixed, don't save the
    # model. Run serially each phase (train -> validation -> test)
    # TODO: Re-introduce these command line args when serialization is fixed.
    # inputFile = options.inputFile
    # inputModelPath = options.inputModel
    # outputModelPath = options.outputModel
    # learningMode = not options.disableLearning

    inputModelPath = None
    outputModelPath = None
    phases = ['train', 'val', 'test']
    inputDir = os.path.join('data', 'artificial')
    expName = 'binary_ampl=10.0_mean=0.0_noise=0.0'  # 'body_acc_x_inertial_signals'
    network = None
    with open(networkConfigPath, 'r') as f:
        networkConfig = simplejson.load(f)
        for phase in phases:

            # Data source
            inputFile = os.path.join(inputDir, '%s_%s.csv' % (expName, phase))
            dataSource = FileRecordStream(streamID=inputFile)
            numRecords = dataSource.getDataRowCount()
            _LOGGER.debug('Number of records to be processed: %s' % numRecords)

            # Trace output info
            traceFileName = getTraceFileName(inputFile)
            traceFilePath = os.path.join(outputDir, '%s.csv' % traceFileName)
            if not os.path.exists(outputDir):
                os.makedirs(outputDir)

            # If there is not network, create one and train it.
            if not network:
                assert phase == 'train'  # Make sure that we create a network for
                learningMode = True
                network = createNetwork(dataSource, networkConfig,
                                        inputModelPath)
            else:
                learningMode = False
                regionName = networkConfig["sensorRegionConfig"]["regionName"]
                sensorRegion = network.regions[regionName].getSelf()
                sensorRegion.dataSource = dataSource
                if 'train' in sensorRegion.dataSource._filename:
                    raise ValueError(
                        'Learning mode should not be disabled for the '
                        'train set.')

            _LOGGER.debug('Running network with inputFile=%s '
                          'and learningMode=%s' % (inputFile, learningMode))

            # FIXME RES-464 (end)

            run(network, numRecords, traceFilePath, networkConfig,
                outputModelPath, batchSize, learningMode)