def test_evaluate(self):
     evaluator = eval_metrics.Evaluator([
         metrics_online.StddevWithinRuns(),
         metrics_online.StddevWithinRuns()
     ])
     results = evaluator.evaluate(self.run_dirs)
     self.assertEqual(list(results.keys()), ['StddevWithinRuns'])
     self.assertTrue(np.greater(list(results.values()), 0.).all())
 def test_evaluate_using_environment_steps(self):
     gin.bind_parameter('metrics_online.StddevWithinRuns.eval_points',
                        [2001])
     metric_instances = [
         metrics_online.StddevWithinRuns(),
         metrics_online.StddevWithinRuns()
     ]
     evaluator = eval_metrics.Evaluator(
         metric_instances, timepoint_variable='Metrics/EnvironmentSteps')
     results = evaluator.evaluate(self.run_dirs)
     self.assertEqual(list(results.keys()), ['StddevWithinRuns'])
     self.assertTrue(np.greater(list(results.values()), 0.).all())
    def test_evaluate_with_permutations(self):
        evaluator = eval_metrics.Evaluator([metrics_online.StddevWithinRuns()])
        n_permutations = 3
        permutation_start_idx = 100
        random_seed = 50
        outfile_prefix = os.path.join(FLAGS.test_tmpdir,
                                      'robustness_results_permuted_')
        results = evaluator.evaluate_with_permutations(
            self.run_dirs, self.run_dirs, outfile_prefix, n_permutations,
            permutation_start_idx, random_seed)

        # Check length of results.
        self.assertLen(results, n_permutations)

        # Check a single result.
        one_result = list(results.values())[0]['curves1']
        self.assertEqual(list(one_result.keys()), ['StddevWithinRuns'])
        self.assertTrue(np.greater(list(one_result.values()), 0.).all())

        # Check the output files.
        results_files = io_utils.paths_glob('%s*results.json' % outfile_prefix)
        self.assertLen(results_files, 1)

        # If run again with the same seed, the results should be the same
        results_same = evaluator.evaluate_with_permutations(
            self.run_dirs, self.run_dirs, outfile_prefix, n_permutations,
            permutation_start_idx, random_seed)
        self._assert_results_same(results, results_same)

        # If run again with a different seed, the results should be different
        results_different = evaluator.evaluate_with_permutations(
            self.run_dirs, self.run_dirs, outfile_prefix, n_permutations,
            permutation_start_idx, random_seed + 1)
        self._assert_results_different(results, results_different)
예제 #4
0
 def testCorrectStddevWithinRuns(self, timepoints, window_size, baseline,
                                 expected):
     curves = [
         np.array([[5, 7, 9], [1, 1, 1]]),
         np.array([[5, 7, 9, 11], [2, 3, 4, 5]]),
         np.array([[5, 7, 9, 10], [5, 4, 2, 1]])
     ]
     metric = metrics_online.StddevWithinRuns(window_size, timepoints,
                                              baseline)
     result = metric(curves)
     self.assertEqual(metric.name, 'StddevWithinRuns')
     np.testing.assert_allclose(result, expected)