Beispiel #1
0
    def test_streaming(self):
        input_iter = iter(xrange(int(10000)))
        doubled_stream = vimap.ext.sugar.imap_ordered(
            lambda x: 2 * x,
            input_iter
        )

        # take a few from the doubled output stream
        consumed = tuple(itertools.islice(doubled_stream, 40))

        # exhaust the input
        unspooled_input = tuple(input_iter)

        # now take the rest from the output stream
        rest = tuple(doubled_stream)

        num_processed = len(consumed) + len(rest)

        T.assert_gt(
            len(unspooled_input),
            9000,
            message="Most inputs should not be processed (too much spooling / "
                    "not lazy). Only {0} remained.".format(len(unspooled_input))
        )
        assert num_processed + len(unspooled_input) == 10000, "Something got dropped"

        T.assert_equal(
            consumed + rest,
            tuple(2 * i for i in xrange(num_processed)),
            message="Processed inputs weren't the first in the stream, or are out of order."
        )
Beispiel #2
0
    def test_streaming(self):
        input_iter = iter(xrange(int(10000)))
        doubled_stream = vimap.ext.sugar.imap_ordered(
            lambda x: 2 * x,
            input_iter
        )

        # take a few from the doubled output stream
        consumed = tuple(itertools.islice(doubled_stream, 40))

        # exhaust the input
        unspooled_input = tuple(input_iter)

        # now take the rest from the output stream
        rest = tuple(doubled_stream)

        num_processed = len(consumed) + len(rest)

        T.assert_gt(
            len(unspooled_input),
            9000,
            message="Most inputs should not be processed (too much spooling / "
                    "not lazy). Only {0} remained.".format(len(unspooled_input))
        )
        assert num_processed + len(unspooled_input) == 10000, "Something got dropped"

        T.assert_equal(
            consumed + rest,
            tuple(2 * i for i in xrange(num_processed)),
            message="Processed inputs weren't the first in the stream, or are out of order."
        )
Beispiel #3
0
    def test_monthly(self):
        cfg = parse_daily('1st day')
        sch = scheduler.GeneralScheduler(**cfg._asdict())
        next_run_date = sch.next_run_time(None)

        assert_gt(next_run_date, self.now)
        assert_equal(next_run_date.month, 7)
Beispiel #4
0
    def test_performance(self):
        # NOTE: Avoid hyperthreading, which doesn't help performance
        # in our test case.
        num_workers = min(4, multiprocessing.cpu_count() / 2)
        T.assert_gt(num_workers, 1, "Too few cores to run performance test.")

        start = 15000
        num_inputs = 2 * num_workers
        inputs = tuple(xrange(start, start + num_inputs))

        def factor_sequential():
            for i in inputs:
                factorial(i)

        pool = vimap.pool.fork_identical(factorial_worker,
                                         num_workers=num_workers)

        def factor_parallel():
            pool.imap(inputs).block_ignore_output(close_if_done=False)

        speedup_ratio = self.get_speedup_factor(factor_sequential,
                                                factor_parallel, 4)
        efficiency = speedup_ratio / num_workers
        print("Easy performance test efficiency: {0:.1f}% ({1:.1f}x speedup)".
              format(efficiency * 100., speedup_ratio))
        T.assert_gt(efficiency, 0.65, "Failed performance test!!")
Beispiel #5
0
    def test_gradient_descent_optimizer_constrained(self):
        """Check that gradient descent can find the global optimum (in a domain) when the true optimum is outside."""
        # Domain where the optimum, (0.5, 0.5, 0.5), lies outside the domain
        domain_bounds = [ClosedInterval(0.05, 0.32), ClosedInterval(0.05, 0.6), ClosedInterval(0.05, 0.32)]
        domain = TensorProductDomain(domain_bounds)
        gradient_descent_optimizer = GradientDescentOptimizer(domain, self.polynomial, self.gd_parameters)

        # Work out what the maxima point woudl be given the domain constraints (i.e., project to the nearest point on domain)
        constrained_optimum_point = self.polynomial.optimum_point
        for i, bounds in enumerate(domain_bounds):
            if constrained_optimum_point[i] > bounds.max:
                constrained_optimum_point[i] = bounds.max
            elif constrained_optimum_point[i] < bounds.min:
                constrained_optimum_point[i] = bounds.min

        tolerance = 2.0e-13
        initial_guess = numpy.full(self.polynomial.dim, 0.2)
        gradient_descent_optimizer.objective_function.current_point = initial_guess
        initial_value = gradient_descent_optimizer.objective_function.compute_objective_function()
        gradient_descent_optimizer.optimize()
        output = gradient_descent_optimizer.objective_function.current_point
        # Verify coordinates
        self.assert_vector_within_relative(output, constrained_optimum_point, tolerance)

        # Verify optimized value is better than initial guess
        final_value = self.polynomial.compute_objective_function()
        T.assert_gt(final_value, initial_value)

        # Verify derivative: only get 0 derivative if the coordinate lies inside domain boundaries
        gradient = self.polynomial.compute_grad_objective_function()
        for i, bounds in enumerate(domain_bounds):
            if bounds.is_inside(self.polynomial.optimum_point[i]):
                self.assert_scalar_within_relative(gradient[i], 0.0, tolerance)
Beispiel #6
0
    def test_performance(self):
        # NOTE: Avoid hyperthreading, which doesn't help performance
        # in our test case.
        num_workers = min(4, multiprocessing.cpu_count() / 2)
        T.assert_gt(num_workers, 1,
                    "Too few cores to run performance test.")

        start = 15000
        num_inputs = 2 * num_workers
        inputs = tuple(xrange(start, start + num_inputs))

        def factor_sequential():
            for i in inputs:
                factorial(i)

        pool = vimap.pool.fork_identical(factorial_worker,
                                         num_workers=num_workers)

        def factor_parallel():
            pool.imap(inputs).block_ignore_output(close_if_done=False)

        speedup_ratio = self.get_speedup_factor(factor_sequential,
                                                factor_parallel, 4)
        efficiency = speedup_ratio / num_workers
        print(
            "Easy performance test efficiency: "
            "{0:.1f}% ({1:.1f}x speedup)".format(
                efficiency * 100., speedup_ratio))
        T.assert_gt(efficiency, 0.65, "Failed performance test!!")
Beispiel #7
0
    def test_chunking_really_is_faster(self):
        """Chunking should be faster when the tasks are really small (so queue
        communication overhead is the biggest factor).
        """
        inputs = tuple(xrange(1, 10)) * 1000
        normal_pool = vimap.pool.fork_identical(factorial_worker,
                                                num_workers=2)
        chunked_pool = vimap.pool.fork_identical_chunked(factorial_worker,
                                                         num_workers=2)

        def factor_normal():
            normal_pool.imap(inputs).block_ignore_output(
                close_if_done=False)

        def factor_chunked():
            chunked_pool.imap(inputs).block_ignore_output(
                close_if_done=False)

        speedup_ratio = self.get_speedup_factor(factor_normal,
                                                factor_chunked,
                                                2)
        print(
            "Chunked performance test: {0:.1f}x speedup".format(
                speedup_ratio))
        T.assert_gt(speedup_ratio, 5)
Beispiel #8
0
    def test_cleanup_on_failure(self):
        FAIL_CONFIG = BASIC_CONFIG + dedent("""
        jobs:
          - name: "failjob"
            node: local
            schedule: "constant"
            actions:
              - name: "failaction"
                command: "failplz"
        """) + TOUCH_CLEANUP_FMT

        client = self.sandbox.client
        self.sandbox.save_config(FAIL_CONFIG)
        self.sandbox.trond()

        action_run_url = client.get_url('MASTER.failjob.0.failaction')
        def wait_on_failaction():
            return client.action(action_run_url)['state'] == 'FAIL'
        sandbox.wait_on_sandbox(wait_on_failaction)

        action_run_url = client.get_url('MASTER.failjob.1.cleanup')
        def wait_on_cleanup():
            return client.action(action_run_url)['state'] == 'SUCC'
        sandbox.wait_on_sandbox(wait_on_cleanup)

        assert_gt(len(client.job(client.get_url('MASTER.failjob'))['runs']), 1)
Beispiel #9
0
 def test_big_fork_test(self):
     """Tests that if we have one more input, the big fork performance test
     would fail. This makes sure the above test is really doing something.
     """
     time_sleep_s = 0.2
     test_time = self.run_big_fork_test(time_sleep_s, 70, 71, 1)
     T.assert_gt(test_time, time_sleep_s * 2)
Beispiel #10
0
    def test_cleanup_on_failure(self):
        config = BASIC_CONFIG + dedent("""
        jobs:
          - name: "failjob"
            node: local
            schedule: "constant"
            actions:
              - name: "failaction"
                command: "failplz"
        """) + TOUCH_CLEANUP_FMT
        self.start_with_config(config)

        action_run_url = self.client.get_url('MASTER.failjob.0.failaction')
        sandbox.wait_on_state(
            self.client.action_runs,
            action_run_url,
            actionrun.ActionRun.STATE_FAILED.name,
        )

        action_run_url = self.client.get_url('MASTER.failjob.1.cleanup')
        sandbox.wait_on_state(
            self.client.action_runs,
            action_run_url,
            actionrun.ActionRun.STATE_SUCCEEDED.name,
        )
        job_runs = self.client.job(
            self.client.get_url('MASTER.failjob'), )['runs']
        assert_gt(len(job_runs), 1)
Beispiel #11
0
 def get_speedup_factor(self, baseline_fcn, optimized_fcn, num_tests):
     baseline_performance = timeit.timeit(baseline_fcn, number=num_tests)
     optimized_performance = timeit.timeit(optimized_fcn, number=num_tests)
     _message = "Performance test too fast, susceptible to overhead"
     T.assert_gt(baseline_performance, 0.005, _message)
     T.assert_gt(optimized_performance, 0.005, _message)
     return (baseline_performance / optimized_performance)
Beispiel #12
0
    def test_multistart_monte_carlo_expected_improvement_optimization(self):
        """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 2-EI)."""
        numpy.random.seed(7858)  # TODO(271): Monte Carlo only works for this seed
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20))
        domain, gaussian_process = self.gp_test_environments[index]

        max_num_steps = 75  # this is *too few* steps; we configure it this way so the test will run quickly
        max_num_restarts = 5
        num_steps_averaged = 50
        gamma = 0.2
        pre_mult = 1.5
        max_relative_change = 1.0
        tolerance = 3.0e-2  # really large tolerance b/c converging with monte-carlo (esp in Python) is expensive
        gd_parameters = GradientDescentParameters(
            max_num_steps,
            max_num_restarts,
            num_steps_averaged,
            gamma,
            pre_mult,
            max_relative_change,
            tolerance,
        )
        num_multistarts = 2

        # Expand the domain so that we are definitely not doing constrained optimization
        expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] * self.dim)
        num_to_sample = 2
        repeated_domain = RepeatedDomain(num_to_sample, expanded_domain)

        num_mc_iterations = 10000
        # Just any random point that won't be optimal
        points_to_sample = repeated_domain.generate_random_point_in_domain()
        ei_eval = ExpectedImprovement(gaussian_process, points_to_sample, num_mc_iterations=num_mc_iterations)
        # Compute EI and its gradient for the sake of comparison
        ei_initial = ei_eval.compute_expected_improvement(force_monte_carlo=True)  # TODO(271) Monte Carlo only works for this seed
        grad_ei_initial = ei_eval.compute_grad_expected_improvement()

        ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval, gd_parameters)
        best_point = multistart_expected_improvement_optimization(ei_optimizer, num_multistarts, num_to_sample)

        # Check that gradients are "small"
        ei_eval.current_point = best_point
        ei_final = ei_eval.compute_expected_improvement(force_monte_carlo=True)  # TODO(271) Monte Carlo only works for this seed
        grad_ei_final = ei_eval.compute_grad_expected_improvement()
        self.assert_vector_within_relative(grad_ei_final, numpy.zeros(grad_ei_final.shape), tolerance)

        # Check that output is in the domain
        T.assert_equal(repeated_domain.check_point_inside(best_point), True)

        # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks
        # EI should have improved
        T.assert_gt(ei_final, ei_initial)

        # grad EI should have improved
        for index in numpy.ndindex(grad_ei_final.shape):
            T.assert_lt(numpy.fabs(grad_ei_final[index]), numpy.fabs(grad_ei_initial[index]))
Beispiel #13
0
    def test_verbose(self):
        stdout, stderr, returncode = self.run_job()
        assert_equal(stdout, '2\t"bar"\n1\t"foo"\n3\tnull\n')
        assert_not_equal(stderr, '')
        assert_equal(returncode, 0)
        normal_stderr = stderr

        stdout, stderr, returncode = self.run_job(['-v'])
        assert_equal(stdout, '2\t"bar"\n1\t"foo"\n3\tnull\n')
        assert_not_equal(stderr, '')
        assert_equal(returncode, 0)
        assert_gt(len(stderr), len(normal_stderr))
Beispiel #14
0
    def test_verbose(self):
        stdout, stderr, returncode = self.run_job()
        assert_equal(stdout, '2\t"bar"\n1\t"foo"\n3\tnull\n')
        assert_not_equal(stderr, '')
        assert_equal(returncode, 0)
        normal_stderr = stderr

        stdout, stderr, returncode = self.run_job(['-v'])
        assert_equal(stdout, '2\t"bar"\n1\t"foo"\n3\tnull\n')
        assert_not_equal(stderr, '')
        assert_equal(returncode, 0)
        assert_gt(len(stderr), len(normal_stderr))
Beispiel #15
0
    def test_multistart_qei_expected_improvement_dfo(self):
        """Check that multistart optimization (BFGS) can find the optimum point to sample (using 2-EI)."""
        numpy.random.seed(7860)
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20))
        domain, gaussian_process = self.gp_test_environments[index]

        tolerance = 6.0e-5
        num_multistarts = 3

        # Expand the domain so that we are definitely not doing constrained optimization
        expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 3.0)] * self.dim)
        num_to_sample = 2
        repeated_domain = RepeatedDomain(num_to_sample, expanded_domain)

        num_mc_iterations = 100000
        # Just any random point that won't be optimal
        points_to_sample = repeated_domain.generate_random_point_in_domain()
        ei_eval = ExpectedImprovement(gaussian_process, points_to_sample, num_mc_iterations=num_mc_iterations)
        # Compute EI and its gradient for the sake of comparison
        ei_initial = ei_eval.compute_expected_improvement()

        ei_optimizer = LBFGSBOptimizer(repeated_domain, ei_eval, self.BFGS_parameters)
        best_point = multistart_expected_improvement_optimization(ei_optimizer, num_multistarts, num_to_sample)

        # Check that gradients are "small" or on border. MC is very inaccurate near 0, so use finite difference
        # gradient instead.
        ei_eval.current_point = best_point
        ei_final = ei_eval.compute_expected_improvement()

        finite_diff_grad = numpy.zeros(best_point.shape)
        h_value = 0.00001
        for i in range(best_point.shape[0]):
            for j in range(best_point.shape[1]):
                best_point[i, j] += h_value
                ei_eval.current_point = best_point
                ei_upper = ei_eval.compute_expected_improvement()
                best_point[i, j] -= 2 * h_value
                ei_eval.current_point = best_point
                ei_lower = ei_eval.compute_expected_improvement()
                best_point[i, j] += h_value
                finite_diff_grad[i, j] = (ei_upper - ei_lower) / (2 * h_value)

        self.assert_vector_within_relative(finite_diff_grad, numpy.zeros(finite_diff_grad.shape), tolerance)

        # Check that output is in the domain
        T.assert_true(repeated_domain.check_point_inside(best_point))

        # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks
        # EI should have improved
        T.assert_gt(ei_final, ei_initial)
Beispiel #16
0
    def test_integration(self):
        """Run a runner with self.reporter as a test reporter, and verify a bunch of stuff."""
        runner = TestRunner(DummyTestCase, test_reporters=[self.reporter])
        conn = self.reporter.conn

        # We're creating a new in-memory database in make_reporter, so we don't need to worry about rows from previous tests.
        (build, ) = list(conn.execute(Builds.select()))

        assert_equal(build['buildname'], 'a_build_name')
        assert_equal(build['branch'], 'a_branch_name')
        assert_equal(build['revision'],
                     'deadbeefdeadbeefdeadbeefdeadbeefdeadbeef')

        # Method count should be None until we discover (which is part of running)
        assert_equal(build['method_count'], None)
        # End time should be None until we run.
        assert_equal(build['end_time'], None)

        assert runner.run()

        # Now that we've run the tests, get the build row again and check to see that things are updated.
        (updated_build, ) = list(conn.execute(Builds.select()))

        for key in updated_build.keys():
            if key not in ('end_time', 'run_time', 'method_count'):
                assert_equal(build[key], updated_build[key])

        assert_gt(updated_build['run_time'], 0)
        assert_in_range(updated_build['end_time'], 0, time.time())
        assert_equal(updated_build['method_count'], 2)

        # The discovery_failure column should exist and be False.
        assert 'discovery_failure' in build
        assert_equal(build['discovery_failure'], False)

        # Check that we have one failure and one pass, and that they're the right tests.
        test_results = list(
            conn.execute(
                SA.select(columns=TestResults.columns + Tests.columns,
                          from_obj=TestResults.join(
                              Tests, TestResults.c.test == Tests.c.id))))

        assert_equal(len(test_results), 2)
        (passed_test, ) = [r for r in test_results if not r['failure']]
        (failed_test, ) = [r for r in test_results if r['failure']]

        assert_equal(passed_test['method_name'], 'test_pass')
        assert_equal(failed_test['method_name'], 'test_fail')
Beispiel #17
0
    def test_interface_returns_same_as_cpp(self):
        """Integration test for the /gp/hyper_opt endpoint."""
        moe_route = GP_HYPER_OPT_MOE_ROUTE
        for test_case in self.gp_test_environments:
            python_domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            json_payload = self._build_json_payload(python_domain, python_cov, historical_data)
            resp = self.testapp.post(moe_route.endpoint, json_payload)
            resp_schema = GpHyperOptResponse()
            resp_dict = resp_schema.deserialize(json.loads(resp.body))

            T.assert_in('covariance_info', resp_dict)
            T.assert_equal(resp_dict['covariance_info']['covariance_type'], python_cov.covariance_type)
            # The optimal hyperparameters should be greater than zero
            for hyperparameter in resp_dict['covariance_info']['hyperparameters']:
                T.assert_gt(hyperparameter, 0.0)
Beispiel #18
0
    def test_integration(self):
        """Run a runner with self.reporter as a test reporter, and verify a bunch of stuff."""
        runner = TestRunner(DummyTestCase, test_reporters=[self.reporter])
        conn = self.reporter.conn

        # We're creating a new in-memory database in make_reporter, so we don't need to worry about rows from previous tests.
        (build,) = list(conn.execute(Builds.select()))

        assert_equal(build['buildname'], 'a_build_name')
        assert_equal(build['branch'], 'a_branch_name')
        assert_equal(build['revision'], 'deadbeefdeadbeefdeadbeefdeadbeefdeadbeef')

        # Method count should be None until we discover (which is part of running)
        assert_equal(build['method_count'], None)
        # End time should be None until we run.
        assert_equal(build['end_time'], None)

        assert runner.run()

        # Now that we've run the tests, get the build row again and check to see that things are updated.
        (updated_build,) = list(conn.execute(Builds.select()))

        for key in updated_build.keys():
            if key not in ('end_time', 'run_time', 'method_count'):
                assert_equal(build[key], updated_build[key])

        assert_gt(updated_build['run_time'], 0)
        assert_in_range(updated_build['end_time'], 0, time.time())
        assert_equal(updated_build['method_count'], 2)

        # The discovery_failure column should exist and be False.
        assert 'discovery_failure' in build
        assert_equal(build['discovery_failure'], False)

        # Check that we have one failure and one pass, and that they're the right tests.
        test_results = list(conn.execute(SA.select(
            columns=TestResults.columns + Tests.columns,
            from_obj=TestResults.join(Tests, TestResults.c.test == Tests.c.id)
        )))

        assert_equal(len(test_results), 2)
        (passed_test,) = [r for r in test_results if not r['failure']]
        (failed_test,) = [r for r in test_results if r['failure']]

        assert_equal(passed_test['method_name'], 'test_pass')
        assert_equal(failed_test['method_name'], 'test_fail')
Beispiel #19
0
    def test_chunking_really_is_faster(self):
        """Chunking should be faster when the tasks are really small (so queue
        communication overhead is the biggest factor).
        """
        inputs = tuple(xrange(1, 10)) * 1000
        normal_pool = vimap.pool.fork_identical(factorial_worker, num_workers=2)
        chunked_pool = vimap.pool.fork_identical_chunked(factorial_worker, num_workers=2)

        def factor_normal():
            normal_pool.imap(inputs).block_ignore_output(close_if_done=False)

        def factor_chunked():
            chunked_pool.imap(inputs).block_ignore_output(close_if_done=False)

        speedup_ratio = self.get_speedup_factor(factor_normal, factor_chunked, 2)
        print("Chunked performance test: {0:.1f}x speedup".format(speedup_ratio))
        T.assert_gt(speedup_ratio, 5)
Beispiel #20
0
    def test_update_perf(self):
        """update() should be faster than lots of individual inserts"""

        # Knobs that control how long this test takes vs. how accurate it is
        # This test *should not flake*, but if you run into problems then you
        # should increase `insert_per_iter` (the test will take longer though)
        num_iters = 5
        insert_per_iter = 300
        min_ratio = 10

        # Setup dbs
        def setup_dbs(name):
            name = name + '%d'
            db_paths = [
                os.path.join(self.tmpdir, name % i)
                for i in xrange(num_iters)
            ]
            return [sqlite3dbm.sshelve.open(path) for path in db_paths]
        update_dbs = setup_dbs('update')
        insert_dbs = setup_dbs('insert')

        # Setup data
        insert_data = [
            ('foo%d' % i, 'bar%d' % i)
            for i in xrange(insert_per_iter)
        ]

        # Time upates
        update_start = time.time()
        for update_db in update_dbs:
            update_db.update(insert_data)
        update_time = time.time() - update_start

        # Time inserts
        insert_start = time.time()
        for insert_db in insert_dbs:
            for k, v in insert_data:
                insert_db[k] = v
        insert_time = time.time() - insert_start

        # Inserts should take a subsantially greater amount of time
        testify.assert_gt(insert_time, min_ratio*update_time)
Beispiel #21
0
    def test_interface_returns_same_as_cpp(self):
        """Integration test for the /gp/hyper_opt endpoint."""
        moe_route = GP_HYPER_OPT_MOE_ROUTE
        for test_case in self.gp_test_environments:
            python_domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            json_payload = self._build_json_payload(python_domain, python_cov,
                                                    historical_data)
            resp = self.testapp.post(moe_route.endpoint, json_payload)
            resp_schema = GpHyperOptResponse()
            resp_dict = resp_schema.deserialize(json.loads(resp.body))

            T.assert_in('covariance_info', resp_dict)
            T.assert_equal(resp_dict['covariance_info']['covariance_type'],
                           python_cov.covariance_type)
            # The optimal hyperparameters should be greater than zero
            for hyperparameter in resp_dict['covariance_info'][
                    'hyperparameters']:
                T.assert_gt(hyperparameter, 0.0)
Beispiel #22
0
    def test_cleanup_on_failure(self):
        config = BASIC_CONFIG + dedent("""
        jobs:
          - name: "failjob"
            node: local
            schedule: "constant"
            actions:
              - name: "failaction"
                command: "failplz"
        """) + TOUCH_CLEANUP_FMT
        self.start_with_config(config)

        action_run_url = self.client.get_url('MASTER.failjob.0.failaction')
        sandbox.wait_on_state(self.client.action_runs, action_run_url,
            actionrun.ActionRun.STATE_FAILED.name)

        action_run_url = self.client.get_url('MASTER.failjob.1.cleanup')
        sandbox.wait_on_state(self.client.action_runs, action_run_url,
            actionrun.ActionRun.STATE_SUCCEEDED.name)
        job_runs = self.client.job(self.client.get_url('MASTER.failjob'))['runs']
        assert_gt(len(job_runs), 1)
Beispiel #23
0
    def test_update_perf(self):
        """update() should be faster than lots of individual inserts"""

        # Knobs that control how long this test takes vs. how accurate it is
        # This test *should not flake*, but if you run into problems then you
        # should increase `insert_per_iter` (the test will take longer though)
        num_iters = 5
        insert_per_iter = 300
        min_ratio = 10

        # Setup dbs
        def setup_dbs(name):
            name = name + '%d'
            db_paths = [
                os.path.join(self.tmpdir, name % i) for i in xrange(num_iters)
            ]
            return [sqlite3dbm.sshelve.open(path) for path in db_paths]

        update_dbs = setup_dbs('update')
        insert_dbs = setup_dbs('insert')

        # Setup data
        insert_data = [('foo%d' % i, 'bar%d' % i)
                       for i in xrange(insert_per_iter)]

        # Time upates
        update_start = time.time()
        for update_db in update_dbs:
            update_db.update(insert_data)
        update_time = time.time() - update_start

        # Time inserts
        insert_start = time.time()
        for insert_db in insert_dbs:
            for k, v in insert_data:
                insert_db[k] = v
        insert_time = time.time() - insert_start

        # Inserts should take a subsantially greater amount of time
        testify.assert_gt(insert_time, min_ratio * update_time)
Beispiel #24
0
    def test_gradient_descent_optimizer_constrained(self):
        """Check that gradient descent can find the global optimum (in a domain) when the true optimum is outside."""
        # Domain where the optimum, (0.5, 0.5, 0.5), lies outside the domain
        domain_bounds = [
            ClosedInterval(0.05, 0.32),
            ClosedInterval(0.05, 0.6),
            ClosedInterval(0.05, 0.32)
        ]
        domain = TensorProductDomain(domain_bounds)
        gradient_descent_optimizer = GradientDescentOptimizer(
            domain, self.polynomial, self.gd_parameters)

        # Work out what the maxima point woudl be given the domain constraints (i.e., project to the nearest point on domain)
        constrained_optimum_point = self.polynomial.optimum_point
        for i, bounds in enumerate(domain_bounds):
            if constrained_optimum_point[i] > bounds.max:
                constrained_optimum_point[i] = bounds.max
            elif constrained_optimum_point[i] < bounds.min:
                constrained_optimum_point[i] = bounds.min

        tolerance = 2.0e-13
        initial_guess = numpy.full(self.polynomial.dim, 0.2)
        gradient_descent_optimizer.objective_function.current_point = initial_guess
        initial_value = gradient_descent_optimizer.objective_function.compute_objective_function(
        )
        gradient_descent_optimizer.optimize()
        output = gradient_descent_optimizer.objective_function.current_point
        # Verify coordinates
        self.assert_vector_within_relative(output, constrained_optimum_point,
                                           tolerance)

        # Verify optimized value is better than initial guess
        final_value = self.polynomial.compute_objective_function()
        T.assert_gt(final_value, initial_value)

        # Verify derivative: only get 0 derivative if the coordinate lies inside domain boundaries
        gradient = self.polynomial.compute_grad_objective_function()
        for i, bounds in enumerate(domain_bounds):
            if bounds.is_inside(self.polynomial.optimum_point[i]):
                self.assert_scalar_within_relative(gradient[i], 0.0, tolerance)
Beispiel #25
0
    def test_can_get_all_job_flows(self):
        now = datetime.datetime.utcnow()

        NUM_JOB_FLOWS = 2222
        assert_gt(NUM_JOB_FLOWS, DEFAULT_MAX_JOB_FLOWS_RETURNED)

        for i in range(NUM_JOB_FLOWS):
            jfid = 'j-%04d' % i
            self.mock_emr_job_flows[jfid] = MockEmrObject(
                creationdatetime=to_iso8601(now - datetime.timedelta(minutes=i)),
                jobflowid=jfid)

        emr_conn = EMRJobRunner().make_emr_conn()

        # ordinary describe_jobflows() hits the limit on number of job flows
        some_jfs = emr_conn.describe_jobflows()
        assert_equal(len(some_jfs), DEFAULT_MAX_JOB_FLOWS_RETURNED)

        all_jfs = describe_all_job_flows(emr_conn)
        assert_equal(len(all_jfs), NUM_JOB_FLOWS)
        assert_equal(sorted(jf.jobflowid for jf in all_jfs),
                     [('j-%04d' % i) for i in range(NUM_JOB_FLOWS)])
Beispiel #26
0
    def test_can_get_all_job_flows(self):
        now = datetime.datetime.utcnow()

        NUM_JOB_FLOWS = 2222
        assert_gt(NUM_JOB_FLOWS, DEFAULT_MAX_JOB_FLOWS_RETURNED)

        for i in range(NUM_JOB_FLOWS):
            jfid = 'j-%04d' % i
            self.mock_emr_job_flows[jfid] = MockEmrObject(
                creationdatetime=to_iso8601(now - datetime.timedelta(minutes=i)),
                jobflowid=jfid)

        emr_conn = EMRJobRunner().make_emr_conn()

        # ordinary describe_jobflows() hits the limit on number of job flows
        some_jfs = emr_conn.describe_jobflows()
        assert_equal(len(some_jfs), DEFAULT_MAX_JOB_FLOWS_RETURNED)

        all_jfs = describe_all_job_flows(emr_conn)
        assert_equal(len(all_jfs), NUM_JOB_FLOWS)
        assert_equal(sorted(jf.jobflowid for jf in all_jfs),
                     [('j-%04d' % i) for i in range(NUM_JOB_FLOWS)])
Beispiel #27
0
    def test_performance(self):
        # NOTE: Avoid hyperthreading, which doesn't help performance
        # in our test case.
        num_workers = min(8, multiprocessing.cpu_count() / 2)
        T.assert_gt(num_workers, 1, "Too few cores to run performance test.")

        inputs = tuple(xrange(7000, 7100))

        def factor_sequential():
            for i in inputs:
                factorial(i)

        pool = vimap.pool.fork_identical(factorial_worker, num_workers=num_workers)

        def factor_parallel():
            pool.imap(inputs).block_ignore_output(close_if_done=False)

        sequential_performance = timeit.timeit(factor_sequential, number=4)
        parallel_performance = timeit.timeit(factor_parallel, number=4)
        speedup_ratio = sequential_performance / parallel_performance
        linear_speedup_ratio = float(num_workers)
        efficiency = speedup_ratio / linear_speedup_ratio
        print("Easy performance test efficiency: {0:.1f}% ({1:.1f}x speedup)".format(efficiency * 100.0, speedup_ratio))
        T.assert_gt(efficiency, 0.70, "Failed performance test!!")
    def test_integration(self):
        """Run a runner with self.reporter as a test reporter, and verify a bunch of stuff."""
        runner = TestRunner(DummyTestCase, test_reporters=[self.reporter])
        conn = self.reporter.conn

        # We're creating a new in-memory database in make_reporter, so we don't need to worry about rows from previous tests.
        (build,) = list(conn.execute(self.reporter.Builds.select()))

        assert_equal(build['buildname'], 'a_build_name')
        assert_equal(build['branch'], 'a_branch_name')
        assert_equal(build['revision'], 'deadbeefdeadbeefdeadbeefdeadbeefdeadbeef')
        assert_equal(build['buildbot_run_id'], self.fake_buildbot_run_id)

        # Method count should be None until we discover (which is part of running)
        assert_equal(build['method_count'], None)
        # End time should be None until we run.
        assert_equal(build['end_time'], None)

        assert runner.run()

        # Now that we've run the tests, get the build row again and check to see that things are updated.
        (updated_build,) = list(conn.execute(self.reporter.Builds.select()))

        for key in updated_build.keys():
            if key not in ('end_time', 'run_time', 'method_count'):
                assert_equal(build[key], updated_build[key])

        assert_gt(updated_build['run_time'], 0)
        assert_in_range(updated_build['end_time'], 0, time.time())
        assert_equal(updated_build['method_count'], 3)

        # The discovery_failure column should exist and be False.
        assert 'discovery_failure' in build
        assert_equal(build['discovery_failure'], False)

        # Check test results.
        test_results = self._get_test_results(conn)
        assert_equal(len(test_results), 3)

        # Check that we have one failure and one pass, and that they're the right tests.
        (passed_test,) = [r for r in test_results if not r['failure']]
        (failed_test, failed_test_2) = [r for r in test_results if r['failure']]

        assert_equal(passed_test['method_name'], 'test_pass')
        assert_equal(passed_test.traceback, None)
        assert_equal(passed_test.error, None)

        assert_equal(failed_test['method_name'], 'test_fail')
        assert_equal(failed_test.traceback.split('\n'), [
            'Traceback (most recent call last):',
            RegexMatcher('  File "\./test/plugins/sql_reporter_test\.py", line \d+, in test_fail'),
            '    assert False',
            'AssertionError',
            '' # ends with newline
        ])
        assert_equal(failed_test.error, 'AssertionError')

        assert_equal(failed_test_2['method_name'], 'test_multiline')
        assert_equal(failed_test_2.traceback.split('\n'), [
            'Traceback (most recent call last):',
            RegexMatcher('  File "\./test/plugins/sql_reporter_test\.py", line \d+, in test_multiline'),
            '    3""")',
            'Exception: I love lines:',
            '    1',
            '        2',
            '            3',
            '' # ends with newline
        ])
        assert_equal(failed_test_2.error, 'Exception: I love lines:\n    1\n        2\n            3')
Beispiel #29
0
 def _assert_range(self, x, lower, upper):
     assert_gt(x, lower)
     assert_lt(x, upper)
Beispiel #30
0
 def _assert_range(self, x, lower, upper):
     assert_gt(x, lower)
     assert_lt(x, upper)
Beispiel #31
0
    def test_integration(self):
        """Run a runner with self.reporter as a test reporter, and verify a bunch of stuff."""
        runner = TestRunner(DummyTestCase, test_reporters=[self.reporter])
        conn = self.reporter.conn

        # We're creating a new in-memory database in make_reporter, so we don't need to worry about rows from previous tests.
        (build, ) = list(conn.execute(self.reporter.Builds.select()))

        assert_equal(build['buildname'], 'a_build_name')
        assert_equal(build['branch'], 'a_branch_name')
        assert_equal(build['revision'],
                     'deadbeefdeadbeefdeadbeefdeadbeefdeadbeef')
        assert_equal(build['buildbot_run_id'], self.fake_buildbot_run_id)

        # Method count should be None until we discover (which is part of running)
        assert_equal(build['method_count'], None)
        # End time should be None until we run.
        assert_equal(build['end_time'], None)

        assert runner.run()

        # Now that we've run the tests, get the build row again and check to see that things are updated.
        (updated_build, ) = list(conn.execute(self.reporter.Builds.select()))

        for key in updated_build.keys():
            if key not in ('end_time', 'run_time', 'method_count'):
                assert_equal(build[key], updated_build[key])

        assert_gt(updated_build['run_time'], 0)
        assert_in_range(updated_build['end_time'], 0, time.time())
        assert_equal(updated_build['method_count'], 3)

        # The discovery_failure column should exist and be False.
        assert 'discovery_failure' in build
        assert_equal(build['discovery_failure'], False)

        # Check test results.
        test_results = self._get_test_results(conn)
        assert_equal(len(test_results), 3)

        # Check that we have one failure and one pass, and that they're the right tests.
        (passed_test, ) = [r for r in test_results if not r['failure']]
        (failed_test,
         failed_test_2) = [r for r in test_results if r['failure']]

        assert_equal(passed_test['method_name'], 'test_pass')
        assert_equal(passed_test.traceback, None)
        assert_equal(passed_test.error, None)

        assert_equal(failed_test['method_name'], 'test_fail')
        assert_equal(
            failed_test.traceback.split('\n'),
            [
                'Traceback (most recent call last):',
                RegexMatcher(
                    '  File "(\./)?test/plugins/sql_reporter_test\.py", line \d+, in test_fail'
                ),
                '    assert False',
                'AssertionError',
                ''  # ends with newline
            ])
        assert_equal(failed_test.error, 'AssertionError')

        assert_equal(failed_test_2['method_name'], 'test_multiline')
        assert_equal(
            failed_test_2.traceback.split('\n'),
            [
                'Traceback (most recent call last):',
                RegexMatcher(
                    '  File "(\./)?test/plugins/sql_reporter_test\.py", line \d+, in test_multiline'
                ),
                '    3""")',
                'Exception: I love lines:',
                '    1',
                '        2',
                '            3',
                ''  # ends with newline
            ])
        assert_equal(
            failed_test_2.error,
            'Exception: I love lines:\n    1\n        2\n            3')
Beispiel #32
0
    def test_monthly(self):
        sch = scheduler_from_config('1st day')
        next_run_date = sch.next_run_time(None)

        assert_gt(next_run_date, self.now)
        assert_equal(next_run_date.month, 7)
Beispiel #33
0
    def test_monthly(self):
        sch = scheduler_from_config('1st day')
        next_run_date = sch.next_run_time(None)

        assert_gt(next_run_date, self.now)
        assert_equal(next_run_date.month, 7)