def _testSamePredictions(self, experiment, predSteps, checkpointAt, predictionsFilename, additionalFields=None, newSerialization=False): """ Test that we get the same predictions out from the following two scenarios: a_plus_b: Run the network for 'a' iterations followed by 'b' iterations a, followed by b: Run the network for 'a' iterations, save it, load it back in, then run for 'b' iterations. Parameters: ----------------------------------------------------------------------- experiment: base directory of the experiment. This directory should contain the following: base.py a_plus_b/description.py a/description.py b/description.py The sub-directory description files should import the base.py and only change the first and last record used from the data file. predSteps: Number of steps ahead predictions are for checkpointAt: Number of iterations that 'a' runs for. IMPORTANT: This must match the number of records that a/description.py runs for - it is NOT dynamically stuffed into the a/description.py. predictionsFilename: The name of the predictions file that the OPF generates for this experiment (for example 'DefaulTask.NontemporalMultiStep.predictionLog.csv') newSerialization: Whether to use new capnproto serialization. """ # Get the 3 sub-experiment directories aPlusBExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "a_plus_b") aExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "a") bExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "b") # Run a+b args = self._createExperimentArgs(aPlusBExpDir, newSerialization=newSerialization) _aPlusBExp = runExperiment(args) # Run a, the copy the saved checkpoint into the b directory args = self._createExperimentArgs(aExpDir, newSerialization=newSerialization) _aExp = runExperiment(args) if os.path.exists(os.path.join(bExpDir, 'savedmodels')): shutil.rmtree(os.path.join(bExpDir, 'savedmodels')) shutil.copytree(src=os.path.join(aExpDir, 'savedmodels'), dst=os.path.join(bExpDir, 'savedmodels')) args = self._createExperimentArgs(bExpDir, newSerialization=newSerialization, additionalArgs=['--load=DefaultTask']) _bExp = runExperiment(args) # Now, compare the predictions at the end of a+b to those in b. aPlusBPred = FileRecordStream(os.path.join(aPlusBExpDir, 'inference', predictionsFilename)) bPred = FileRecordStream(os.path.join(bExpDir, 'inference', predictionsFilename)) colNames = [x[0] for x in aPlusBPred.getFields()] actValueColIdx = colNames.index('multiStepPredictions.actual') predValueColIdx = colNames.index('multiStepPredictions.%d' % (predSteps)) # Skip past the 'a' records in aPlusB for i in range(checkpointAt): aPlusBPred.next() # Now, read through the records that don't have predictions yet for i in range(predSteps): aPlusBPred.next() bPred.next() # Now, compare predictions in the two files rowIdx = checkpointAt + predSteps + 4 - 1 epsilon = 0.0001 while True: rowIdx += 1 try: rowAPB = aPlusBPred.next() rowB = bPred.next() # Compare actuals self.assertEqual(rowAPB[actValueColIdx], rowB[actValueColIdx], "Mismatch in actual values: row %d of a+b has %s and row %d of " "b has %s" % (rowIdx, rowAPB[actValueColIdx], rowIdx-checkpointAt, rowB[actValueColIdx])) # Compare predictions, within nearest epsilon predAPB = eval(rowAPB[predValueColIdx]) predB = eval(rowB[predValueColIdx]) # Sort with highest probabilities first predAPB = [(a, b) for b, a in predAPB.items()] predB = [(a, b) for b, a in predB.items()] predAPB.sort(reverse=True) predB.sort(reverse=True) if additionalFields is not None: for additionalField in additionalFields: fieldIdx = colNames.index(additionalField) self.assertEqual(rowAPB[fieldIdx], rowB[fieldIdx], "Mismatch in field \'%s\' values: row %d of a+b has value: (%s)\n" " and row %d of b has value: %s" % \ (additionalField, rowIdx, rowAPB[fieldIdx], rowIdx-checkpointAt, rowB[fieldIdx])) self.assertEqual(len(predAPB), len(predB), "Mismatch in predicted values: row %d of a+b has %d predictions: " "\n (%s) and row %d of b has %d predictions:\n (%s)" % \ (rowIdx, len(predAPB), predAPB, rowIdx-checkpointAt, len(predB), predB)) for i in range(len(predAPB)): (aProb, aValue) = predAPB[i] (bProb, bValue) = predB[i] self.assertLess(abs(aValue-bValue), epsilon, "Mismatch in predicted values: row %d of a+b predicts value %s " "and row %d of b predicts %s" % (rowIdx, aValue, rowIdx-checkpointAt, bValue)) self.assertLess(abs(aProb-bProb), epsilon, "Mismatch in probabilities: row %d of a+b predicts %s with " "probability %s and row %d of b predicts %s with probability %s" \ % (rowIdx, aValue, aProb, rowIdx-checkpointAt, bValue, bProb)) except StopIteration: break # clean up model checkpoint directories shutil.rmtree(getCheckpointParentDir(aExpDir)) shutil.rmtree(getCheckpointParentDir(bExpDir)) shutil.rmtree(getCheckpointParentDir(aPlusBExpDir)) print "Predictions match!"
def testExperimentResults(self): """Run specific experiments and verify that they are producing the correct results. opfDir is the examples/opf directory in the install path and is used to find run_opf_experiment.py The testdir is the directory that contains the experiments we will be running. When running in the auto-build setup, this will be a temporary directory that has had this script, as well as the specific experiments we will be running, copied into it by the qa/autotest/prediction_results.py script. When running stand-alone from the command line, this will point to the examples/prediction directory in the install tree (same as predictionDir) """ nupic_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "..", "..", "..") opfDir = os.path.join(nupic_dir, "examples", "opf") testDir = opfDir # The testdir is the directory that contains the experiments we will be # running. When running in the auto-build setup, this will be a temporary # directory that has had this script, as well as the specific experiments # we will be running, copied into it by the # qa/autotest/prediction_results.py script. # When running stand-alone from the command line, we can simply point to the # examples/prediction directory in the install tree. if not os.path.exists(os.path.join(testDir, "experiments/classification")): testDir = opfDir # Generate any dynamically generated datasets now command = ['python', os.path.join(testDir, 'experiments', 'classification', 'makeDatasets.py')] retval = call(command) self.assertEqual(retval, 0) # Generate any dynamically generated datasets now command = ['python', os.path.join(testDir, 'experiments', 'multistep', 'make_datasets.py')] retval = call(command) self.assertEqual(retval, 0) # Generate any dynamically generated datasets now command = ['python', os.path.join(testDir, 'experiments', 'spatial_classification', 'make_datasets.py')] retval = call(command) self.assertEqual(retval, 0) # Run from the test directory so that we can find our experiments os.chdir(testDir) runExperiment = os.path.join(nupic_dir, "scripts", "run_opf_experiment.py") # A list of experiments to run. Valid attributes: # experimentDir - Required, path to the experiment directory containing # description.py # args - optional. List of arguments for run_opf_experiment # results - A dictionary of expected results. The keys are tuples # containing (predictionLogFileName, columnName). The # value is a (min, max) expected value from the last row # in the prediction log. multistepTests = [ # For this one, in theory the error for 1 step should be < 0.20 { 'experimentDir': 'experiments/multistep/simple_0', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"): (0.0, 0.20), } }, # For this one, in theory the error for 1 step should be < 0.50, but we # get slightly higher because our sample size is smaller than ideal { 'experimentDir': 'experiments/multistep/simple_0_f2', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"): (0.0, 0.66), } }, # For this one, in theory the error for 1 step should be < 0.20 { 'experimentDir': 'experiments/multistep/simple_1', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"): (0.0, 0.20), } }, # For this test, we haven't figured out the theoretical error, this # error is determined empirically from actual results { 'experimentDir': 'experiments/multistep/simple_1_f2', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"): (0.0, 3.76), } }, # For this one, in theory the error for 1 step should be < 0.20, but we # get slightly higher because our sample size is smaller than ideal { 'experimentDir': 'experiments/multistep/simple_2', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"): (0.0, 0.31), } }, # For this one, in theory the error for 1 step should be < 0.10 and for # 3 step < 0.30, but our actual results are better. { 'experimentDir': 'experiments/multistep/simple_3', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"): (0.0, 0.06), ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=3:window=200:field=field1"): (0.0, 0.20), } }, # For this test, we haven't figured out the theoretical error, this # error is determined empirically from actual results { 'experimentDir': 'experiments/multistep/simple_3_f2', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"): (0.0, 0.6), ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=3:window=200:field=field2"): (0.0, 1.8), } }, # Test missing record support. # Should have 0 error by the end of the dataset { 'experimentDir': 'experiments/missing_record/simple_0', 'results': { ('DefaultTask.NontemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=25:field=field1"): (1.0, 1.0), } }, ] # end of multistepTests classificationTests = [ # ---------------------------------------------------------------------- # Classification Experiments { 'experimentDir': 'experiments/classification/category_hub_TP_0', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.020), } }, { 'experimentDir': 'experiments/classification/category_TM_0', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.045), ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classConfidences:neg_auc:computeEvery=10:window=200'): (-1.0, -0.98), } }, { 'experimentDir': 'experiments/classification/category_TM_1', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.005), } }, { 'experimentDir': 'experiments/classification/scalar_TP_0', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.155), ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classConfidences:neg_auc:computeEvery=10:window=200'): (-1.0, -0.900), } }, { 'experimentDir': 'experiments/classification/scalar_TP_1', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.03), } }, ] # End of classification tests spatialClassificationTests = [ { 'experimentDir': 'experiments/spatial_classification/category_0', 'results': { ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=0:window=100:field=classification"): (0.0, 0.05), } }, { 'experimentDir': 'experiments/spatial_classification/category_1', 'results': { ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=0:window=100:field=classification"): (0.0, 0.0), } }, { 'experimentDir': 'experiments/spatial_classification/scalar_0', 'results': { ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=0:window=100:field=classification"): (0.0, 0.025), } }, { 'experimentDir': 'experiments/spatial_classification/scalar_1', 'results': { ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=0:window=100:field=classification"): (-1e-10, 0.01), } }, ] anomalyTests = [ # ---------------------------------------------------------------------- # Classification Experiments { 'experimentDir': 'experiments/anomaly/temporal/simple', 'results': { ('DefaultTask.TemporalAnomaly.predictionLog.csv', 'anomalyScore:passThruPrediction:window=1000:field=f'): (0.02, 0.04), } }, ] # End of anomaly tests tests = [] tests += multistepTests tests += classificationTests tests += spatialClassificationTests tests += anomalyTests # Uncomment this to only run a specific experiment(s) #tests = tests[7:8] # This contains a list of tuples: (expDir, key, results) summaryOfResults = [] startTime = time.time() testIdx = -1 for test in tests: testIdx += 1 expDirectory = test['experimentDir'] # ------------------------------------------------------------------- # Remove files/directories generated by previous tests: toDelete = [] # Remove inference results path = os.path.join(expDirectory, "inference") toDelete.append(path) path = os.path.join(expDirectory, "savedmodels") toDelete.append(path) for path in toDelete: if not os.path.exists(path): continue print "Removing %s ..." % path if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path) # ------------------------------------------------------------------------ # Run the test. args = test.get('args', []) print "Running experiment %s ..." % (expDirectory) command = ['python', runExperiment, expDirectory] + args retVal = call(command) # If retVal is non-zero and this was not a negative test or if retVal is # zero and this is a negative test something went wrong. if retVal: print "Details of failed test: %s" % test print("TestIdx %d, OPF experiment '%s' failed with return code %i." % (testIdx, expDirectory, retVal)) self.assertFalse(retVal) # ----------------------------------------------------------------------- # Check the results for (key, expValues) in test['results'].items(): (logFilename, colName) = key # Open the prediction log file logFile = FileRecordStream(os.path.join(expDirectory, 'inference', logFilename)) colNames = [x[0] for x in logFile.getFields()] if not colName in colNames: print "TestIdx %d: %s not one of the columns in " \ "prediction log file. Available column names are: %s" % (testIdx, colName, colNames) self.assertTrue(colName in colNames) colIndex = colNames.index(colName) # Read till we get to the last line while True: try: row = logFile.next() except StopIteration: break result = row[colIndex] # Save summary of results summaryOfResults.append((expDirectory, colName, result)) print "Actual result for %s, %s:" % (expDirectory, colName), result print "Expected range:", expValues failed = (expValues[0] is not None and result < expValues[0]) \ or (expValues[1] is not None and result > expValues[1]) if failed: print ("TestIdx %d: Experiment %s failed. \nThe actual result" " for %s (%s) was outside the allowed range of %s" % (testIdx, expDirectory, colName, result, expValues)) else: print " Within expected range." self.assertFalse(failed) # ======================================================================= # Print summary of results: print print "Summary of results in all experiments run:" print "=========================================" prevExpDir = None for (expDir, key, results) in summaryOfResults: if expDir != prevExpDir: print print expDir prevExpDir = expDir print " %s: %s" % (key, results) print "\nElapsed time: %.1f seconds" % (time.time() - startTime)
def checkpoint(self, checkpointSink, maxRows): """ [virtual method override] Save a checkpoint of the prediction output stream. The checkpoint comprises up to maxRows of the most recent inference records. Parameters: ---------------------------------------------------------------------- checkpointSink: A File-like object where predictions checkpoint data, if any, will be stored. maxRows: Maximum number of most recent inference rows to checkpoint. """ checkpointSink.truncate() if self.__dataset is None: if self.__checkpointCache is not None: self.__checkpointCache.seek(0) shutil.copyfileobj(self.__checkpointCache, checkpointSink) checkpointSink.flush() return else: # Nothing to checkpoint return self.__dataset.flush() totalDataRows = self.__dataset.getDataRowCount() if totalDataRows == 0: # Nothing to checkpoint return # Open reader of prediction file (suppress missingValues conversion) reader = FileRecordStream(self.__datasetPath, missingValues=[]) # Create CSV writer for writing checkpoint rows writer = csv.writer(checkpointSink) # Write the header row to checkpoint sink -- just field names writer.writerow(reader.getFieldNames()) # Determine number of rows to checkpoint numToWrite = min(maxRows, totalDataRows) # Skip initial rows to get to the rows that we actually need to checkpoint numRowsToSkip = totalDataRows - numToWrite for i in xrange(numRowsToSkip): reader.next() # Write the data rows to checkpoint sink numWritten = 0 while True: row = reader.getNextRecord() if row is None: break row = [str(element) for element in row] #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,) writer.writerow(row) numWritten += 1 assert numWritten == numToWrite, \ "numWritten (%s) != numToWrite (%s)" % (numWritten, numToWrite) checkpointSink.flush() return
def checkpoint(self, checkpointSink, maxRows): """ [virtual method override] Save a checkpoint of the prediction output stream. The checkpoint comprises up to maxRows of the most recent inference records. Parameters: ---------------------------------------------------------------------- checkpointSink: A File-like object where predictions checkpoint data, if any, will be stored. maxRows: Maximum number of most recent inference rows to checkpoint. """ checkpointSink.truncate() if self.__dataset is None: if self.__checkpointCache is not None: self.__checkpointCache.seek(0) shutil.copyfileobj(self.__checkpointCache, checkpointSink) checkpointSink.flush() return else: # Nothing to checkpoint return self.__dataset.flush() totalDataRows = self.__dataset.getDataRowCount() if totalDataRows == 0: # Nothing to checkpoint return # Open reader of prediction file (suppress missingValues conversion) reader = FileRecordStream(self.__datasetPath, missingValues=[]) # Create CSV writer for writing checkpoint rows writer = csv.writer(checkpointSink) # Write the header row to checkpoint sink -- just field names writer.writerow(reader.getFieldNames()) # Determine number of rows to checkpoint numToWrite = min(maxRows, totalDataRows) # Skip initial rows to get to the rows that we actually need to checkpoint numRowsToSkip = totalDataRows - numToWrite for i in xrange(numRowsToSkip): reader.next() # Write the data rows to checkpoint sink numWritten = 0 while True: row = reader.getNextRecord() if row is None: break; row = [str(element) for element in row] #print "DEBUG: _BasicPredictionWriter: checkpointing row: %r" % (row,) writer.writerow(row) numWritten +=1 assert numWritten == numToWrite, \ "numWritten ({0!s}) != numToWrite ({1!s})".format(numWritten, numToWrite) checkpointSink.flush() return
def testExperimentResults(self): """Run specific experiments and verify that they are producing the correct results. opfDir is the examples/opf directory in the install path and is used to find run_opf_experiment.py The testdir is the directory that contains the experiments we will be running. When running in the auto-build setup, this will be a temporary directory that has had this script, as well as the specific experiments we will be running, copied into it by the qa/autotest/prediction_results.py script. When running stand-alone from the command line, this will point to the examples/prediction directory in the install tree (same as predictionDir) """ nupic_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "..", "..", "..") opfDir = os.path.join(nupic_dir, "examples", "opf") testDir = opfDir # The testdir is the directory that contains the experiments we will be # running. When running in the auto-build setup, this will be a temporary # directory that has had this script, as well as the specific experiments # we will be running, copied into it by the # qa/autotest/prediction_results.py script. # When running stand-alone from the command line, we can simply point to the # examples/prediction directory in the install tree. if not os.path.exists( os.path.join(testDir, "experiments/classification")): testDir = opfDir # Generate any dynamically generated datasets now command = [ 'python', os.path.join(testDir, 'experiments', 'classification', 'makeDatasets.py') ] retval = call(command) self.assertEqual(retval, 0) # Generate any dynamically generated datasets now command = [ 'python', os.path.join(testDir, 'experiments', 'multistep', 'make_datasets.py') ] retval = call(command) self.assertEqual(retval, 0) # Generate any dynamically generated datasets now command = [ 'python', os.path.join(testDir, 'experiments', 'spatial_classification', 'make_datasets.py') ] retval = call(command) self.assertEqual(retval, 0) # Run from the test directory so that we can find our experiments os.chdir(testDir) runExperiment = os.path.join(nupic_dir, "scripts", "run_opf_experiment.py") # A list of experiments to run. Valid attributes: # experimentDir - Required, path to the experiment directory containing # description.py # args - optional. List of arguments for run_opf_experiment # results - A dictionary of expected results. The keys are tuples # containing (predictionLogFileName, columnName). The # value is a (min, max) expected value from the last row # in the prediction log. multistepTests = [ # For this one, in theory the error for 1 step should be < 0.20 { 'experimentDir': 'experiments/multistep/simple_0', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"): (0.0, 0.20), } }, # For this one, in theory the error for 1 step should be < 0.50, but we # get slightly higher because our sample size is smaller than ideal { 'experimentDir': 'experiments/multistep/simple_0_f2', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"): (0.0, 0.66), } }, # For this one, in theory the error for 1 step should be < 0.20 { 'experimentDir': 'experiments/multistep/simple_1', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"): (0.0, 0.20), } }, # For this test, we haven't figured out the theoretical error, this # error is determined empirically from actual results { 'experimentDir': 'experiments/multistep/simple_1_f2', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"): (0.0, 3.76), } }, # For this one, in theory the error for 1 step should be < 0.20, but we # get slightly higher because our sample size is smaller than ideal { 'experimentDir': 'experiments/multistep/simple_2', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"): (0.0, 0.31), } }, # For this one, in theory the error for 1 step should be < 0.10 and for # 3 step < 0.30, but our actual results are better. { 'experimentDir': 'experiments/multistep/simple_3', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=200:field=field1"): (0.0, 0.06), ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=3:window=200:field=field1"): (0.0, 0.20), } }, # For this test, we haven't figured out the theoretical error, this # error is determined empirically from actual results { 'experimentDir': 'experiments/multistep/simple_3_f2', 'results': { ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=1:window=200:field=field2"): (0.0, 0.6), ('DefaultTask.TemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=3:window=200:field=field2"): (0.0, 1.8), } }, # Test missing record support. # Should have 0 error by the end of the dataset { 'experimentDir': 'experiments/missing_record/simple_0', 'results': { ('DefaultTask.NontemporalMultiStep.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=1:window=25:field=field1"): (1.0, 1.0), } }, ] # end of multistepTests classificationTests = [ # ---------------------------------------------------------------------- # Classification Experiments { 'experimentDir': 'experiments/classification/category_hub_TP_0', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.020), } }, { 'experimentDir': 'experiments/classification/category_TM_0', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.045), ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classConfidences:neg_auc:computeEvery=10:window=200'): (-1.0, -0.98), } }, { 'experimentDir': 'experiments/classification/category_TM_1', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.005), } }, { 'experimentDir': 'experiments/classification/scalar_TP_0', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.155), ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classConfidences:neg_auc:computeEvery=10:window=200'): (-1.0, -0.900), } }, { 'experimentDir': 'experiments/classification/scalar_TP_1', 'results': { ('OnlineLearning.TemporalClassification.predictionLog.csv', 'classification:avg_err:window=200'): (0.0, 0.03), } }, ] # End of classification tests spatialClassificationTests = [ { 'experimentDir': 'experiments/spatial_classification/category_0', 'results': { ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=0:window=100:field=classification"): (0.0, 0.05), } }, { 'experimentDir': 'experiments/spatial_classification/category_1', 'results': { ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='avg_err':steps=0:window=100:field=classification"): (0.0, 0.0), } }, { 'experimentDir': 'experiments/spatial_classification/scalar_0', 'results': { ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=0:window=100:field=classification"): (0.0, 0.025), } }, { 'experimentDir': 'experiments/spatial_classification/scalar_1', 'results': { ('DefaultTask.NontemporalClassification.predictionLog.csv', "multiStepBestPredictions:multiStep:errorMetric='aae':steps=0:window=100:field=classification"): (-1e-10, 0.01), } }, ] anomalyTests = [ # ---------------------------------------------------------------------- # Classification Experiments { 'experimentDir': 'experiments/anomaly/temporal/simple', 'results': { ('DefaultTask.TemporalAnomaly.predictionLog.csv', 'anomalyScore:passThruPrediction:window=1000:field=f'): (0.02, 0.04), } }, ] # End of anomaly tests tests = [] tests += multistepTests tests += classificationTests tests += spatialClassificationTests tests += anomalyTests # Uncomment this to only run a specific experiment(s) #tests = tests[7:8] # This contains a list of tuples: (expDir, key, results) summaryOfResults = [] startTime = time.time() testIdx = -1 for test in tests: testIdx += 1 expDirectory = test['experimentDir'] # ------------------------------------------------------------------- # Remove files/directories generated by previous tests: toDelete = [] # Remove inference results path = os.path.join(expDirectory, "inference") toDelete.append(path) path = os.path.join(expDirectory, "savedmodels") toDelete.append(path) for path in toDelete: if not os.path.exists(path): continue print "Removing %s ..." % path if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path) # ------------------------------------------------------------------------ # Run the test. args = test.get('args', []) print "Running experiment %s ..." % (expDirectory) command = ['python', runExperiment, expDirectory] + args retVal = call(command) # If retVal is non-zero and this was not a negative test or if retVal is # zero and this is a negative test something went wrong. if retVal: print "Details of failed test: %s" % test print( "TestIdx %d, OPF experiment '%s' failed with return code %i." % (testIdx, expDirectory, retVal)) self.assertFalse(retVal) # ----------------------------------------------------------------------- # Check the results for (key, expValues) in test['results'].items(): (logFilename, colName) = key # Open the prediction log file logFile = FileRecordStream( os.path.join(expDirectory, 'inference', logFilename)) colNames = [x[0] for x in logFile.getFields()] if not colName in colNames: print "TestIdx %d: %s not one of the columns in " \ "prediction log file. Available column names are: %s" % (testIdx, colName, colNames) self.assertTrue(colName in colNames) colIndex = colNames.index(colName) # Read till we get to the last line while True: try: row = logFile.next() except StopIteration: break result = row[colIndex] # Save summary of results summaryOfResults.append((expDirectory, colName, result)) print "Actual result for %s, %s:" % (expDirectory, colName), result print "Expected range:", expValues failed = (expValues[0] is not None and result < expValues[0]) \ or (expValues[1] is not None and result > expValues[1]) if failed: print( "TestIdx %d: Experiment %s failed. \nThe actual result" " for %s (%s) was outside the allowed range of %s" % (testIdx, expDirectory, colName, result, expValues)) else: print " Within expected range." self.assertFalse(failed) # ======================================================================= # Print summary of results: print print "Summary of results in all experiments run:" print "=========================================" prevExpDir = None for (expDir, key, results) in summaryOfResults: if expDir != prevExpDir: print print expDir prevExpDir = expDir print " %s: %s" % (key, results) print "\nElapsed time: %.1f seconds" % (time.time() - startTime)
def _testSamePredictions(self, experiment, predSteps, checkpointAt, predictionsFilename, additionalFields=None): """ Test that we get the same predictions out from the following two scenarios: a_plus_b: Run the network for 'a' iterations followed by 'b' iterations a, followed by b: Run the network for 'a' iterations, save it, load it back in, then run for 'b' iterations. Parameters: ----------------------------------------------------------------------- experiment: base directory of the experiment. This directory should contain the following: base.py a_plus_b/description.py a/description.py b/description.py The sub-directory description files should import the base.py and only change the first and last record used from the data file. predSteps: Number of steps ahead predictions are for checkpointAt: Number of iterations that 'a' runs for. IMPORTANT: This must match the number of records that a/description.py runs for - it is NOT dynamically stuffed into the a/description.py. predictionsFilename: The name of the predictions file that the OPF generates for this experiment (for example 'DefaulTask.NontemporalMultiStep.predictionLog.csv') """ # Get the 3 sub-experiment directories aPlusBExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "a_plus_b") aExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "a") bExpDir = os.path.join(_EXPERIMENT_BASE, experiment, "b") # Run a+b _aPlusBExp = runExperiment(args=[aPlusBExpDir]) # Run a, the copy the saved checkpoint into the b directory _aExp = runExperiment(args=[aExpDir]) if os.path.exists(os.path.join(bExpDir, 'savedmodels')): shutil.rmtree(os.path.join(bExpDir, 'savedmodels')) shutil.copytree(src=os.path.join(aExpDir, 'savedmodels'), dst=os.path.join(bExpDir, 'savedmodels')) _bExp = runExperiment(args=[bExpDir, '--load=DefaultTask']) # Now, compare the predictions at the end of a+b to those in b. aPlusBPred = FileRecordStream( os.path.join(aPlusBExpDir, 'inference', predictionsFilename)) bPred = FileRecordStream( os.path.join(bExpDir, 'inference', predictionsFilename)) colNames = [x[0] for x in aPlusBPred.getFields()] actValueColIdx = colNames.index('multiStepPredictions.actual') predValueColIdx = colNames.index('multiStepPredictions.%d' % (predSteps)) # Skip past the 'a' records in aPlusB for i in range(checkpointAt): aPlusBPred.next() # Now, read through the records that don't have predictions yet for i in range(predSteps): aPlusBPred.next() bPred.next() # Now, compare predictions in the two files rowIdx = checkpointAt + predSteps + 4 - 1 epsilon = 0.0001 while True: rowIdx += 1 try: rowAPB = aPlusBPred.next() rowB = bPred.next() # Compare actuals self.assertEqual( rowAPB[actValueColIdx], rowB[actValueColIdx], "Mismatch in actual values: row %d of a+b has %s and row %d of " "b has %s" % (rowIdx, rowAPB[actValueColIdx], rowIdx - checkpointAt, rowB[actValueColIdx])) # Compare predictions, within nearest epsilon predAPB = eval(rowAPB[predValueColIdx]) predB = eval(rowB[predValueColIdx]) # Sort with highest probabilities first predAPB = [(a, b) for b, a in predAPB.items()] predB = [(a, b) for b, a in predB.items()] predAPB.sort(reverse=True) predB.sort(reverse=True) if additionalFields is not None: for additionalField in additionalFields: fieldIdx = colNames.index(additionalField) self.assertEqual(rowAPB[fieldIdx], rowB[fieldIdx], "Mismatch in field \'%s\' values: row %d of a+b has value: (%s)\n" " and row %d of b has value: %s" % \ (additionalField, rowIdx, rowAPB[fieldIdx], rowIdx-checkpointAt, rowB[fieldIdx])) self.assertEqual(len(predAPB), len(predB), "Mismatch in predicted values: row %d of a+b has %d predictions: " "\n (%s) and row %d of b has %d predictions:\n (%s)" % \ (rowIdx, len(predAPB), predAPB, rowIdx-checkpointAt, len(predB), predB)) for i in range(len(predAPB)): (aProb, aValue) = predAPB[i] (bProb, bValue) = predB[i] self.assertLess( abs(aValue - bValue), epsilon, "Mismatch in predicted values: row %d of a+b predicts value %s " "and row %d of b predicts %s" % (rowIdx, aValue, rowIdx - checkpointAt, bValue)) self.assertLess(abs(aProb-bProb), epsilon, "Mismatch in probabilities: row %d of a+b predicts %s with " "probability %s and row %d of b predicts %s with probability %s" \ % (rowIdx, aValue, aProb, rowIdx-checkpointAt, bValue, bProb)) except StopIteration: break # clean up model checkpoint directories shutil.rmtree(getCheckpointParentDir(aExpDir)) shutil.rmtree(getCheckpointParentDir(bExpDir)) shutil.rmtree(getCheckpointParentDir(aPlusBExpDir)) print "Predictions match!"