예제 #1
0
  def _testSamePredictions(self, experiment, predSteps, checkpointAt,
                           predictionsFilename, additionalFields=None,
                           newSerialization=False):
    """ Test that we get the same predictions out from the following two
    scenarios:

    a_plus_b: Run the network for 'a' iterations followed by 'b' iterations
    a, followed by b: Run the network for 'a' iterations, save it, load it
                      back in, then run for 'b' iterations.

    Parameters:
    -----------------------------------------------------------------------
    experiment:   base directory of the experiment. This directory should
                    contain the following:
                        base.py
                        a_plus_b/description.py
                        a/description.py
                        b/description.py
                    The sub-directory description files should import the
                    base.py and only change the first and last record used
                    from the data file.
    predSteps:   Number of steps ahead predictions are for
    checkpointAt: Number of iterations that 'a' runs for.
                 IMPORTANT: This must match the number of records that
                 a/description.py runs for - it is NOT dynamically stuffed into
                 the a/description.py.
    predictionsFilename: The name of the predictions file that the OPF
                  generates for this experiment (for example
                  'DefaulTask.NontemporalMultiStep.predictionLog.csv')
    newSerialization: Whether to use new capnproto serialization.
    """

    # Get the 3 sub-experiment directories
    aPlusBExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "a_plus_b")
    aExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "a")
    bExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "b")

    # Run a+b
    args = self._createExperimentArgs(aPlusBExpDir,
                                      newSerialization=newSerialization)
    _aPlusBExp = runExperiment(args)

    # Run a, the copy the saved checkpoint into the b directory
    args = self._createExperimentArgs(aExpDir,
                                      newSerialization=newSerialization)
    _aExp = runExperiment(args)
    if os.path.exists(os.path.join(bExpDir, 'savedmodels')):
      shutil.rmtree(os.path.join(bExpDir, 'savedmodels'))
    shutil.copytree(src=os.path.join(aExpDir, 'savedmodels'),
                    dst=os.path.join(bExpDir, 'savedmodels'))

    args = self._createExperimentArgs(bExpDir,
                                      newSerialization=newSerialization,
                                      additionalArgs=['--load=DefaultTask'])
    _bExp = runExperiment(args)

    # Now, compare the predictions at the end of a+b to those in b.
    aPlusBPred = FileRecordStream(os.path.join(aPlusBExpDir, 'inference',
                                   predictionsFilename))
    bPred = FileRecordStream(os.path.join(bExpDir, 'inference',
                                   predictionsFilename))

    colNames = [x[0] for x in aPlusBPred.getFields()]
    actValueColIdx = colNames.index('multiStepPredictions.actual')
    predValueColIdx = colNames.index('multiStepPredictions.%d' % (predSteps))

    # Skip past the 'a' records in aPlusB
    for i in range(checkpointAt):
      aPlusBPred.next()

    # Now, read through the records that don't have predictions yet
    for i in range(predSteps):
      aPlusBPred.next()
      bPred.next()

    # Now, compare predictions in the two files
    rowIdx = checkpointAt + predSteps + 4 - 1
    epsilon = 0.0001
    while True:
      rowIdx += 1
      try:
        rowAPB = aPlusBPred.next()
        rowB = bPred.next()

        # Compare actuals
        self.assertEqual(rowAPB[actValueColIdx], rowB[actValueColIdx],
              "Mismatch in actual values: row %d of a+b has %s and row %d of "
              "b has %s" % (rowIdx, rowAPB[actValueColIdx], rowIdx-checkpointAt,
                            rowB[actValueColIdx]))

        # Compare predictions, within nearest epsilon
        predAPB = eval(rowAPB[predValueColIdx])
        predB = eval(rowB[predValueColIdx])

        # Sort with highest probabilities first
        predAPB = [(a, b) for b, a in predAPB.items()]
        predB = [(a, b) for b, a in predB.items()]
        predAPB.sort(reverse=True)
        predB.sort(reverse=True)

        if additionalFields is not None:
          for additionalField in additionalFields:
            fieldIdx = colNames.index(additionalField)
            self.assertEqual(rowAPB[fieldIdx], rowB[fieldIdx],
              "Mismatch in field \'%s\' values: row %d of a+b has value: (%s)\n"
              " and row %d of b has value: %s" % \
              (additionalField, rowIdx, rowAPB[fieldIdx],
                rowIdx-checkpointAt, rowB[fieldIdx]))

        self.assertEqual(len(predAPB), len(predB),
              "Mismatch in predicted values: row %d of a+b has %d predictions: "
              "\n  (%s) and row %d of b has %d predictions:\n  (%s)" % \
              (rowIdx, len(predAPB), predAPB, rowIdx-checkpointAt, len(predB),
               predB))

        for i in range(len(predAPB)):
          (aProb, aValue) = predAPB[i]
          (bProb, bValue) = predB[i]
          self.assertLess(abs(aValue-bValue), epsilon,
              "Mismatch in predicted values: row %d of a+b predicts value %s "
              "and row %d of b predicts %s" % (rowIdx, aValue,
                                               rowIdx-checkpointAt, bValue))
          self.assertLess(abs(aProb-bProb), epsilon,
              "Mismatch in probabilities: row %d of a+b predicts %s with "
              "probability %s and row %d of b predicts %s with probability %s" \
               % (rowIdx, aValue, aProb, rowIdx-checkpointAt, bValue, bProb))

      except StopIteration:
        break

    # clean up model checkpoint directories
    shutil.rmtree(getCheckpointParentDir(aExpDir))
    shutil.rmtree(getCheckpointParentDir(bExpDir))
    shutil.rmtree(getCheckpointParentDir(aPlusBExpDir))

    print "Predictions match!"
예제 #2
0
  def testExperimentResults(self):
    """Run specific experiments and verify that they are producing the correct
    results.

    opfDir is the examples/opf directory in the install path
    and is used to find run_opf_experiment.py

    The testdir is the directory that contains the experiments we will be
    running. When running in the auto-build setup, this will be a temporary
    directory that has had this script, as well as the specific experiments
    we will be running, copied into it by the qa/autotest/prediction_results.py
    script.
    When running stand-alone from the command line, this will point to the
    examples/prediction directory in the install tree (same as predictionDir)

    """

    nupic_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             "..", "..", "..", "..")

    opfDir = os.path.join(nupic_dir, "examples", "opf")

    testDir = opfDir

    # The testdir is the directory that contains the experiments we will be
    #  running. When running in the auto-build setup, this will be a temporary
    #  directory that has had this script, as well as the specific experiments
    #  we will be running, copied into it by the
    #  qa/autotest/prediction_results.py script.
    # When running stand-alone from the command line, we can simply point to the
    #  examples/prediction directory in the install tree.
    if not os.path.exists(os.path.join(testDir, "experiments/classification")):
      testDir = opfDir

    # Generate any dynamically generated datasets now
    command = ['python', os.path.join(testDir, 'experiments', 'classification',
                                       'makeDatasets.py')]
    retval = call(command)
    self.assertEqual(retval, 0)


    # Generate any dynamically generated datasets now
    command = ['python', os.path.join(testDir, 'experiments', 'multistep',
                                       'make_datasets.py')]
    retval = call(command)
    self.assertEqual(retval, 0)


    # Generate any dynamically generated datasets now
    command = ['python', os.path.join(testDir, 'experiments',
                                'spatial_classification', 'make_datasets.py')]
    retval = call(command)
    self.assertEqual(retval, 0)


    # Run from the test directory so that we can find our experiments
    os.chdir(testDir)

    runExperiment = os.path.join(nupic_dir, "scripts", "run_opf_experiment.py")

    # A list of experiments to run.  Valid attributes:
    #   experimentDir - Required, path to the experiment directory containing
    #                       description.py
    #   args          - optional. List of arguments for run_opf_experiment
    #   results       - A dictionary of expected results. The keys are tuples
    #                    containing (predictionLogFileName, columnName). The
    #                    value is a (min, max) expected value from the last row
    #                    in the prediction log.
    multistepTests = [
      # For this one, in theory the error for 1 step should be < 0.20
      { 'experimentDir': 'experiments/multistep/simple_0',
        'results': {
          ('DefaultTask.TemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"):
                    (0.0, 0.20),
        }
      },

      # For this one, in theory the error for 1 step should be < 0.50, but we
      #  get slightly higher because our sample size is smaller than ideal
      { 'experimentDir': 'experiments/multistep/simple_0_f2',
        'results': {
          ('DefaultTask.TemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"):
                    (0.0, 0.66),
        }
      },

      # For this one, in theory the error for 1 step should be < 0.20
      { 'experimentDir': 'experiments/multistep/simple_1',
        'results': {
          ('DefaultTask.TemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"):
                    (0.0, 0.20),
        }
      },

      # For this test, we haven't figured out the theoretical error, this
      #  error is determined empirically from actual results
      { 'experimentDir': 'experiments/multistep/simple_1_f2',
        'results': {
          ('DefaultTask.TemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"):
                    (0.0, 3.76),
        }
      },

      # For this one, in theory the error for 1 step should be < 0.20, but we
      #  get slightly higher because our sample size is smaller than ideal
      { 'experimentDir': 'experiments/multistep/simple_2',
        'results': {
          ('DefaultTask.TemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"):
                    (0.0, 0.31),
        }
      },

      # For this one, in theory the error for 1 step should be < 0.10 and for
      #  3 step < 0.30, but our actual results are better.
      { 'experimentDir': 'experiments/multistep/simple_3',
        'results': {
          ('DefaultTask.TemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"):
                    (0.0, 0.06),
          ('DefaultTask.TemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=3:window=200:field=field1"):
                    (0.0, 0.20),
        }
      },

      # For this test, we haven't figured out the theoretical error, this
      #  error is determined empirically from actual results
      { 'experimentDir': 'experiments/multistep/simple_3_f2',
        'results': {
          ('DefaultTask.TemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"):
                    (0.0, 0.6),
          ('DefaultTask.TemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='aae':steps=3:window=200:field=field2"):
                    (0.0, 1.8),
        }
      },

      # Test missing record support.
      # Should have 0 error by the end of the dataset
      { 'experimentDir': 'experiments/missing_record/simple_0',
        'results': {
          ('DefaultTask.NontemporalMultiStep.predictionLog.csv',
           "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=25:field=field1"):
                    (1.0, 1.0),
        }
      },

    ] # end of multistepTests

    classificationTests = [
      # ----------------------------------------------------------------------
      # Classification Experiments
      { 'experimentDir': 'experiments/classification/category_hub_TP_0',
        'results': {
            ('OnlineLearning.TemporalClassification.predictionLog.csv',
             'classification:avg_err:window=200'): (0.0, 0.020),
            }
      },

      { 'experimentDir': 'experiments/classification/category_TM_0',
        'results': {
            ('OnlineLearning.TemporalClassification.predictionLog.csv',
             'classification:avg_err:window=200'): (0.0, 0.045),

            ('OnlineLearning.TemporalClassification.predictionLog.csv',
             'classConfidences:neg_auc:computeEvery=10:window=200'): (-1.0, -0.98),
            }
      },

      { 'experimentDir': 'experiments/classification/category_TM_1',
        'results': {
            ('OnlineLearning.TemporalClassification.predictionLog.csv',
             'classification:avg_err:window=200'): (0.0, 0.005),
            }
      },

      { 'experimentDir': 'experiments/classification/scalar_TP_0',
        'results': {
            ('OnlineLearning.TemporalClassification.predictionLog.csv',
             'classification:avg_err:window=200'): (0.0, 0.155),

            ('OnlineLearning.TemporalClassification.predictionLog.csv',
             'classConfidences:neg_auc:computeEvery=10:window=200'): (-1.0, -0.900),
            }
      },

      { 'experimentDir': 'experiments/classification/scalar_TP_1',
        'results': {
            ('OnlineLearning.TemporalClassification.predictionLog.csv',
             'classification:avg_err:window=200'):  (0.0, 0.03),
            }
      },

    ] # End of classification tests
    
    spatialClassificationTests = [
      { 'experimentDir': 'experiments/spatial_classification/category_0',
        'results': {
            ('DefaultTask.NontemporalClassification.predictionLog.csv',
             "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=0:window=100:field=classification"): 
                    (0.0, 0.05),
            }

      },

      { 'experimentDir': 'experiments/spatial_classification/category_1',
        'results': {
            ('DefaultTask.NontemporalClassification.predictionLog.csv',
             "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=0:window=100:field=classification"): 
                    (0.0, 0.0),
            }
      },
      
      { 'experimentDir': 'experiments/spatial_classification/scalar_0',
        'results': {
            ('DefaultTask.NontemporalClassification.predictionLog.csv',
             "multiStepBestPredictions:multiStep:errorMetric='aae':steps=0:window=100:field=classification"): 
                    (0.0, 0.025),
            }
      },

      { 'experimentDir': 'experiments/spatial_classification/scalar_1',
        'results': {
            ('DefaultTask.NontemporalClassification.predictionLog.csv',
             "multiStepBestPredictions:multiStep:errorMetric='aae':steps=0:window=100:field=classification"): 
                    (-1e-10, 0.01),
            }
      },


    ]

    anomalyTests = [
      # ----------------------------------------------------------------------
      # Classification Experiments
      { 'experimentDir': 'experiments/anomaly/temporal/simple',
        'results': {
            ('DefaultTask.TemporalAnomaly.predictionLog.csv',
             'anomalyScore:passThruPrediction:window=1000:field=f'): (0.02,
                                                                      0.04),
          }
      },



    ] # End of anomaly tests

    tests = []
    tests += multistepTests
    tests += classificationTests
    tests += spatialClassificationTests
    tests += anomalyTests

    # Uncomment this to only run a specific experiment(s)
    #tests = tests[7:8]

    # This contains a list of tuples: (expDir, key, results)
    summaryOfResults = []
    startTime = time.time()

    testIdx = -1
    for test in tests:
      testIdx += 1
      expDirectory = test['experimentDir']

      # -------------------------------------------------------------------
      # Remove files/directories generated by previous tests:
      toDelete = []

      # Remove inference results
      path = os.path.join(expDirectory, "inference")
      toDelete.append(path)
      path = os.path.join(expDirectory, "savedmodels")
      toDelete.append(path)

      for path in toDelete:
        if not os.path.exists(path):
          continue
        print "Removing %s ..." % path
        if os.path.isfile(path):
          os.remove(path)
        else:
          shutil.rmtree(path)


      # ------------------------------------------------------------------------
      # Run the test.
      args = test.get('args', [])
      print "Running experiment %s ..." % (expDirectory)
      command = ['python', runExperiment, expDirectory] + args
      retVal = call(command)

      # If retVal is non-zero and this was not a negative test or if retVal is
      # zero and this is a negative test something went wrong.
      if retVal:
        print "Details of failed test: %s" % test
        print("TestIdx %d, OPF experiment '%s' failed with return code %i." %
              (testIdx, expDirectory, retVal))
      self.assertFalse(retVal)


      # -----------------------------------------------------------------------
      # Check the results
      for (key, expValues) in test['results'].items():
        (logFilename, colName) = key

        # Open the prediction log file
        logFile = FileRecordStream(os.path.join(expDirectory, 'inference',
                                                logFilename))
        colNames = [x[0] for x in logFile.getFields()]
        if not colName in colNames:
          print "TestIdx %d: %s not one of the columns in " \
            "prediction log file. Available column names are: %s" % (testIdx,
                    colName, colNames)
        self.assertTrue(colName in colNames)
        colIndex = colNames.index(colName)

        # Read till we get to the last line
        while True:
          try:
            row = logFile.next()
          except StopIteration:
            break
        result = row[colIndex]

        # Save summary of results
        summaryOfResults.append((expDirectory, colName, result))

        print "Actual result for %s, %s:" % (expDirectory, colName), result
        print "Expected range:", expValues
        failed = (expValues[0] is not None and result < expValues[0]) \
            or (expValues[1] is not None and result > expValues[1])
        if failed:
          print ("TestIdx %d: Experiment %s failed. \nThe actual result"
             " for %s (%s) was outside the allowed range of %s" % (testIdx,
              expDirectory, colName, result, expValues))
        else:
          print "  Within expected range."
        self.assertFalse(failed)


    # =======================================================================
    # Print summary of results:
    print
    print "Summary of results in all experiments run:"
    print "========================================="
    prevExpDir = None
    for (expDir, key, results) in summaryOfResults:
      if expDir != prevExpDir:
        print
        print expDir
        prevExpDir = expDir
      print "  %s: %s" % (key, results)

    print "\nElapsed time: %.1f seconds" % (time.time() - startTime)
예제 #3
0
    def checkpoint(self, checkpointSink, maxRows):
        """ [virtual method override] Save a checkpoint of the prediction output
    stream. The checkpoint comprises up to maxRows of the most recent inference
    records.

    Parameters:
    ----------------------------------------------------------------------
    checkpointSink:     A File-like object where predictions checkpoint data, if
                        any, will be stored.
    maxRows:            Maximum number of most recent inference rows
                        to checkpoint.
    """

        checkpointSink.truncate()

        if self.__dataset is None:
            if self.__checkpointCache is not None:
                self.__checkpointCache.seek(0)
                shutil.copyfileobj(self.__checkpointCache, checkpointSink)
                checkpointSink.flush()
                return
            else:
                # Nothing to checkpoint
                return

        self.__dataset.flush()
        totalDataRows = self.__dataset.getDataRowCount()

        if totalDataRows == 0:
            # Nothing to checkpoint
            return

        # Open reader of prediction file (suppress missingValues conversion)
        reader = FileRecordStream(self.__datasetPath, missingValues=[])

        # Create CSV writer for writing checkpoint rows
        writer = csv.writer(checkpointSink)

        # Write the header row to checkpoint sink -- just field names
        writer.writerow(reader.getFieldNames())

        # Determine number of rows to checkpoint
        numToWrite = min(maxRows, totalDataRows)

        # Skip initial rows to get to the rows that we actually need to checkpoint
        numRowsToSkip = totalDataRows - numToWrite
        for i in xrange(numRowsToSkip):
            reader.next()

        # Write the data rows to checkpoint sink
        numWritten = 0
        while True:
            row = reader.getNextRecord()
            if row is None:
                break

            row = [str(element) for element in row]

            #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,)

            writer.writerow(row)

            numWritten += 1

        assert numWritten == numToWrite, \
          "numWritten (%s) != numToWrite (%s)" % (numWritten, numToWrite)

        checkpointSink.flush()

        return
예제 #4
0
  def checkpoint(self, checkpointSink, maxRows):
    """ [virtual method override] Save a checkpoint of the prediction output
    stream. The checkpoint comprises up to maxRows of the most recent inference
    records.

    Parameters:
    ----------------------------------------------------------------------
    checkpointSink:     A File-like object where predictions checkpoint data, if
                        any, will be stored.
    maxRows:            Maximum number of most recent inference rows
                        to checkpoint.
    """

    checkpointSink.truncate()

    if self.__dataset is None:
      if self.__checkpointCache is not None:
        self.__checkpointCache.seek(0)
        shutil.copyfileobj(self.__checkpointCache, checkpointSink)
        checkpointSink.flush()
        return
      else:
        # Nothing to checkpoint
        return

    self.__dataset.flush()
    totalDataRows = self.__dataset.getDataRowCount()

    if totalDataRows == 0:
      # Nothing to checkpoint
      return

    # Open reader of prediction file (suppress missingValues conversion)
    reader = FileRecordStream(self.__datasetPath, missingValues=[])

    # Create CSV writer for writing checkpoint rows
    writer = csv.writer(checkpointSink)

    # Write the header row to checkpoint sink -- just field names
    writer.writerow(reader.getFieldNames())

    # Determine number of rows to checkpoint
    numToWrite = min(maxRows, totalDataRows)

    # Skip initial rows to get to the rows that we actually need to checkpoint
    numRowsToSkip = totalDataRows - numToWrite
    for i in xrange(numRowsToSkip):
      reader.next()

    # Write the data rows to checkpoint sink
    numWritten = 0
    while True:
      row = reader.getNextRecord()
      if row is None:
        break;

      row =  [str(element) for element in row]

      #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,)

      writer.writerow(row)

      numWritten +=1

    assert numWritten == numToWrite, \
      "numWritten ({0!s}) != numToWrite ({1!s})".format(numWritten, numToWrite)


    checkpointSink.flush()

    return
예제 #5
0
    def testExperimentResults(self):
        """Run specific experiments and verify that they are producing the correct
    results.

    opfDir is the examples/opf directory in the install path
    and is used to find run_opf_experiment.py

    The testdir is the directory that contains the experiments we will be
    running. When running in the auto-build setup, this will be a temporary
    directory that has had this script, as well as the specific experiments
    we will be running, copied into it by the qa/autotest/prediction_results.py
    script.
    When running stand-alone from the command line, this will point to the
    examples/prediction directory in the install tree (same as predictionDir)

    """

        nupic_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 "..", "..", "..", "..")

        opfDir = os.path.join(nupic_dir, "examples", "opf")

        testDir = opfDir

        # The testdir is the directory that contains the experiments we will be
        #  running. When running in the auto-build setup, this will be a temporary
        #  directory that has had this script, as well as the specific experiments
        #  we will be running, copied into it by the
        #  qa/autotest/prediction_results.py script.
        # When running stand-alone from the command line, we can simply point to the
        #  examples/prediction directory in the install tree.
        if not os.path.exists(
                os.path.join(testDir, "experiments/classification")):
            testDir = opfDir

        # Generate any dynamically generated datasets now
        command = [
            'python',
            os.path.join(testDir, 'experiments', 'classification',
                         'makeDatasets.py')
        ]
        retval = call(command)
        self.assertEqual(retval, 0)

        # Generate any dynamically generated datasets now
        command = [
            'python',
            os.path.join(testDir, 'experiments', 'multistep',
                         'make_datasets.py')
        ]
        retval = call(command)
        self.assertEqual(retval, 0)

        # Generate any dynamically generated datasets now
        command = [
            'python',
            os.path.join(testDir, 'experiments', 'spatial_classification',
                         'make_datasets.py')
        ]
        retval = call(command)
        self.assertEqual(retval, 0)

        # Run from the test directory so that we can find our experiments
        os.chdir(testDir)

        runExperiment = os.path.join(nupic_dir, "scripts",
                                     "run_opf_experiment.py")

        # A list of experiments to run.  Valid attributes:
        #   experimentDir - Required, path to the experiment directory containing
        #                       description.py
        #   args          - optional. List of arguments for run_opf_experiment
        #   results       - A dictionary of expected results. The keys are tuples
        #                    containing (predictionLogFileName, columnName). The
        #                    value is a (min, max) expected value from the last row
        #                    in the prediction log.
        multistepTests = [
            # For this one, in theory the error for 1 step should be < 0.20
            {
                'experimentDir': 'experiments/multistep/simple_0',
                'results': {
                    ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"):
                    (0.0, 0.20),
                }
            },

            # For this one, in theory the error for 1 step should be < 0.50, but we
            #  get slightly higher because our sample size is smaller than ideal
            {
                'experimentDir': 'experiments/multistep/simple_0_f2',
                'results': {
                    ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"):
                    (0.0, 0.66),
                }
            },

            # For this one, in theory the error for 1 step should be < 0.20
            {
                'experimentDir': 'experiments/multistep/simple_1',
                'results': {
                    ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"):
                    (0.0, 0.20),
                }
            },

            # For this test, we haven't figured out the theoretical error, this
            #  error is determined empirically from actual results
            {
                'experimentDir': 'experiments/multistep/simple_1_f2',
                'results': {
                    ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"):
                    (0.0, 3.76),
                }
            },

            # For this one, in theory the error for 1 step should be < 0.20, but we
            #  get slightly higher because our sample size is smaller than ideal
            {
                'experimentDir': 'experiments/multistep/simple_2',
                'results': {
                    ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"):
                    (0.0, 0.31),
                }
            },

            # For this one, in theory the error for 1 step should be < 0.10 and for
            #  3 step < 0.30, but our actual results are better.
            {
                'experimentDir': 'experiments/multistep/simple_3',
                'results': {
                    ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"):
                    (0.0, 0.06),
                    ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=3:window=200:field=field1"):
                    (0.0, 0.20),
                }
            },

            # For this test, we haven't figured out the theoretical error, this
            #  error is determined empirically from actual results
            {
                'experimentDir': 'experiments/multistep/simple_3_f2',
                'results': {
                    ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"):
                    (0.0, 0.6),
                    ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=3:window=200:field=field2"):
                    (0.0, 1.8),
                }
            },

            # Test missing record support.
            # Should have 0 error by the end of the dataset
            {
                'experimentDir': 'experiments/missing_record/simple_0',
                'results': {
                    ('DefaultTask.NontemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=25:field=field1"):
                    (1.0, 1.0),
                }
            },
        ]  # end of multistepTests

        classificationTests = [
            # ----------------------------------------------------------------------
            # Classification Experiments
            {
                'experimentDir':
                'experiments/classification/category_hub_TP_0',
                'results': {
                    ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'):
                    (0.0, 0.020),
                }
            },
            {
                'experimentDir': 'experiments/classification/category_TM_0',
                'results': {
                    ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'):
                    (0.0, 0.045),
                    ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classConfidences:neg_auc:computeEvery=10:window=200'):
                    (-1.0, -0.98),
                }
            },
            {
                'experimentDir': 'experiments/classification/category_TM_1',
                'results': {
                    ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'):
                    (0.0, 0.005),
                }
            },
            {
                'experimentDir': 'experiments/classification/scalar_TP_0',
                'results': {
                    ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'):
                    (0.0, 0.155),
                    ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classConfidences:neg_auc:computeEvery=10:window=200'):
                    (-1.0, -0.900),
                }
            },
            {
                'experimentDir': 'experiments/classification/scalar_TP_1',
                'results': {
                    ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'):
                    (0.0, 0.03),
                }
            },
        ]  # End of classification tests

        spatialClassificationTests = [
            {
                'experimentDir':
                'experiments/spatial_classification/category_0',
                'results': {
                    ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=0:window=100:field=classification"):
                    (0.0, 0.05),
                }
            },
            {
                'experimentDir':
                'experiments/spatial_classification/category_1',
                'results': {
                    ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=0:window=100:field=classification"):
                    (0.0, 0.0),
                }
            },
            {
                'experimentDir': 'experiments/spatial_classification/scalar_0',
                'results': {
                    ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=0:window=100:field=classification"):
                    (0.0, 0.025),
                }
            },
            {
                'experimentDir': 'experiments/spatial_classification/scalar_1',
                'results': {
                    ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=0:window=100:field=classification"):
                    (-1e-10, 0.01),
                }
            },
        ]

        anomalyTests = [
            # ----------------------------------------------------------------------
            # Classification Experiments
            {
                'experimentDir': 'experiments/anomaly/temporal/simple',
                'results': {
                    ('DefaultTask.TemporalAnomaly.predictionLog.csv', 'anomalyScore:passThruPrediction:window=1000:field=f'):
                    (0.02, 0.04),
                }
            },
        ]  # End of anomaly tests

        tests = []
        tests += multistepTests
        tests += classificationTests
        tests += spatialClassificationTests
        tests += anomalyTests

        # Uncomment this to only run a specific experiment(s)
        #tests = tests[7:8]

        # This contains a list of tuples: (expDir, key, results)
        summaryOfResults = []
        startTime = time.time()

        testIdx = -1
        for test in tests:
            testIdx += 1
            expDirectory = test['experimentDir']

            # -------------------------------------------------------------------
            # Remove files/directories generated by previous tests:
            toDelete = []

            # Remove inference results
            path = os.path.join(expDirectory, "inference")
            toDelete.append(path)
            path = os.path.join(expDirectory, "savedmodels")
            toDelete.append(path)

            for path in toDelete:
                if not os.path.exists(path):
                    continue
                print "Removing %s ..." % path
                if os.path.isfile(path):
                    os.remove(path)
                else:
                    shutil.rmtree(path)

            # ------------------------------------------------------------------------
            # Run the test.
            args = test.get('args', [])
            print "Running experiment %s ..." % (expDirectory)
            command = ['python', runExperiment, expDirectory] + args
            retVal = call(command)

            # If retVal is non-zero and this was not a negative test or if retVal is
            # zero and this is a negative test something went wrong.
            if retVal:
                print "Details of failed test: %s" % test
                print(
                    "TestIdx %d, OPF experiment '%s' failed with return code %i."
                    % (testIdx, expDirectory, retVal))
            self.assertFalse(retVal)

            # -----------------------------------------------------------------------
            # Check the results
            for (key, expValues) in test['results'].items():
                (logFilename, colName) = key

                # Open the prediction log file
                logFile = FileRecordStream(
                    os.path.join(expDirectory, 'inference', logFilename))
                colNames = [x[0] for x in logFile.getFields()]
                if not colName in colNames:
                    print "TestIdx %d: %s not one of the columns in " \
                      "prediction log file. Available column names are: %s" % (testIdx,
                              colName, colNames)
                self.assertTrue(colName in colNames)
                colIndex = colNames.index(colName)

                # Read till we get to the last line
                while True:
                    try:
                        row = logFile.next()
                    except StopIteration:
                        break
                result = row[colIndex]

                # Save summary of results
                summaryOfResults.append((expDirectory, colName, result))

                print "Actual result for %s, %s:" % (expDirectory,
                                                     colName), result
                print "Expected range:", expValues
                failed = (expValues[0] is not None and result < expValues[0]) \
                    or (expValues[1] is not None and result > expValues[1])
                if failed:
                    print(
                        "TestIdx %d: Experiment %s failed. \nThe actual result"
                        " for %s (%s) was outside the allowed range of %s" %
                        (testIdx, expDirectory, colName, result, expValues))
                else:
                    print "  Within expected range."
                self.assertFalse(failed)

        # =======================================================================
        # Print summary of results:
        print
        print "Summary of results in all experiments run:"
        print "========================================="
        prevExpDir = None
        for (expDir, key, results) in summaryOfResults:
            if expDir != prevExpDir:
                print
                print expDir
                prevExpDir = expDir
            print "  %s: %s" % (key, results)

        print "\nElapsed time: %.1f seconds" % (time.time() - startTime)
예제 #6
0
    def _testSamePredictions(self,
                             experiment,
                             predSteps,
                             checkpointAt,
                             predictionsFilename,
                             additionalFields=None):
        """ Test that we get the same predictions out from the following two
    scenarios:

    a_plus_b: Run the network for 'a' iterations followed by 'b' iterations
    a, followed by b: Run the network for 'a' iterations, save it, load it
                      back in, then run for 'b' iterations.

    Parameters:
    -----------------------------------------------------------------------
    experiment:   base directory of the experiment. This directory should
                    contain the following:
                        base.py
                        a_plus_b/description.py
                        a/description.py
                        b/description.py
                    The sub-directory description files should import the
                    base.py and only change the first and last record used
                    from the data file.
    predSteps:   Number of steps ahead predictions are for
    checkpointAt: Number of iterations that 'a' runs for.
                 IMPORTANT: This must match the number of records that
                 a/description.py runs for - it is NOT dynamically stuffed into
                 the a/description.py.
    predictionsFilename: The name of the predictions file that the OPF
                  generates for this experiment (for example
                  'DefaulTask.NontemporalMultiStep.predictionLog.csv')
    """

        # Get the 3 sub-experiment directories
        aPlusBExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "a_plus_b")
        aExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "a")
        bExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "b")

        # Run a+b
        _aPlusBExp = runExperiment(args=[aPlusBExpDir])

        # Run a, the copy the saved checkpoint into the b directory
        _aExp = runExperiment(args=[aExpDir])
        if os.path.exists(os.path.join(bExpDir, 'savedmodels')):
            shutil.rmtree(os.path.join(bExpDir, 'savedmodels'))
        shutil.copytree(src=os.path.join(aExpDir, 'savedmodels'),
                        dst=os.path.join(bExpDir, 'savedmodels'))

        _bExp = runExperiment(args=[bExpDir, '--load=DefaultTask'])

        # Now, compare the predictions at the end of a+b to those in b.
        aPlusBPred = FileRecordStream(
            os.path.join(aPlusBExpDir, 'inference', predictionsFilename))
        bPred = FileRecordStream(
            os.path.join(bExpDir, 'inference', predictionsFilename))

        colNames = [x[0] for x in aPlusBPred.getFields()]
        actValueColIdx = colNames.index('multiStepPredictions.actual')
        predValueColIdx = colNames.index('multiStepPredictions.%d' %
                                         (predSteps))

        # Skip past the 'a' records in aPlusB
        for i in range(checkpointAt):
            aPlusBPred.next()

        # Now, read through the records that don't have predictions yet
        for i in range(predSteps):
            aPlusBPred.next()
            bPred.next()

        # Now, compare predictions in the two files
        rowIdx = checkpointAt + predSteps + 4 - 1
        epsilon = 0.0001
        while True:
            rowIdx += 1
            try:
                rowAPB = aPlusBPred.next()
                rowB = bPred.next()

                # Compare actuals
                self.assertEqual(
                    rowAPB[actValueColIdx], rowB[actValueColIdx],
                    "Mismatch in actual values: row %d of a+b has %s and row %d of "
                    "b has %s" % (rowIdx, rowAPB[actValueColIdx],
                                  rowIdx - checkpointAt, rowB[actValueColIdx]))

                # Compare predictions, within nearest epsilon
                predAPB = eval(rowAPB[predValueColIdx])
                predB = eval(rowB[predValueColIdx])

                # Sort with highest probabilities first
                predAPB = [(a, b) for b, a in predAPB.items()]
                predB = [(a, b) for b, a in predB.items()]
                predAPB.sort(reverse=True)
                predB.sort(reverse=True)

                if additionalFields is not None:
                    for additionalField in additionalFields:
                        fieldIdx = colNames.index(additionalField)
                        self.assertEqual(rowAPB[fieldIdx], rowB[fieldIdx],
                          "Mismatch in field \'%s\' values: row %d of a+b has value: (%s)\n"
                          " and row %d of b has value: %s" % \
                          (additionalField, rowIdx, rowAPB[fieldIdx],
                            rowIdx-checkpointAt, rowB[fieldIdx]))

                self.assertEqual(len(predAPB), len(predB),
                      "Mismatch in predicted values: row %d of a+b has %d predictions: "
                      "\n  (%s) and row %d of b has %d predictions:\n  (%s)" % \
                      (rowIdx, len(predAPB), predAPB, rowIdx-checkpointAt, len(predB),
                       predB))

                for i in range(len(predAPB)):
                    (aProb, aValue) = predAPB[i]
                    (bProb, bValue) = predB[i]
                    self.assertLess(
                        abs(aValue - bValue), epsilon,
                        "Mismatch in predicted values: row %d of a+b predicts value %s "
                        "and row %d of b predicts %s" %
                        (rowIdx, aValue, rowIdx - checkpointAt, bValue))
                    self.assertLess(abs(aProb-bProb), epsilon,
                        "Mismatch in probabilities: row %d of a+b predicts %s with "
                        "probability %s and row %d of b predicts %s with probability %s" \
                         % (rowIdx, aValue, aProb, rowIdx-checkpointAt, bValue, bProb))

            except StopIteration:
                break

        # clean up model checkpoint directories
        shutil.rmtree(getCheckpointParentDir(aExpDir))
        shutil.rmtree(getCheckpointParentDir(bExpDir))
        shutil.rmtree(getCheckpointParentDir(aPlusBExpDir))

        print "Predictions match!"