def run(spec_dict): """ Details of the scalability exercise can be specified in the code block below. Note that only deviations from the benchmark initialization files need to be addressed. """ cleanup() os.mkdir("rslt") os.chdir("rslt") grid_slaves = spec_dict["slaves"] for fname in spec_dict["fnames"]: run_single(spec_dict, fname, grid_slaves) aggregate_information("scalability", spec_dict["fnames"]) send_notification("scalability") os.chdir("../")
def run(request, is_compile, is_background, is_strict, num_procs): """ Run the regression tests. """ if is_compile: compile_package(True) # We can set up a multiprocessing pool right away. mp_pool = mp.Pool(num_procs) # The late import is required so a potentially just compiled FORTRAN implementation # is recognized. This is important for the creation of the regression vault as we # want to include FORTRAN use cases. from respy import RespyCls # Process command line arguments is_creation = False is_investigation, is_check = False, False num_tests, idx = None, None if request[0] == "create": is_creation, num_tests = True, int(request[1]) elif request[0] == "check": is_check, num_tests = True, int(request[1]) elif request[0] == "investigate": is_investigation, idx = True, int(request[1]) else: raise AssertionError("request in [create, check. investigate]") if num_tests is not None: assert num_tests > 0 if idx is not None: assert idx >= 0 if is_investigation: fname = TEST_RESOURCES_DIR / "regression_vault.pickle" with open(fname, "rb") as p: tests = pickle.load(p) attr, crit_val = tests[idx] params_spec = _params_spec_from_attributes(attr) options_spec = _options_spec_from_attributes(attr) respy_obj = RespyCls(params_spec, options_spec) simulate_observed(respy_obj) result = respy_obj.fit()[1] np.testing.assert_almost_equal(result, crit_val, decimal=DECIMALS) if is_creation: # We maintain the separate execution in the case of a single processor for # debugging purposes. The error messages are generally much more informative. if num_procs == 1: tests = [] for idx in range(num_tests): tests += [create_single(idx)] else: tests = mp_pool.map(create_single, range(num_tests)) with open(TEST_RESOURCES_DIR / "regression_vault.pickle", "wb") as p: pickle.dump(tests, p) return if is_check: fname = TEST_RESOURCES_DIR / "regression_vault.pickle" with open(fname, "rb") as p: tests = pickle.load(p) run_single = partial(check_single, tests) indices = list(range(num_tests)) # We maintain the separate execution in the case of a single processor for # debugging purposes. The error messages are generally much more informative. if num_procs == 1: ret = [] for index in indices: ret += [run_single(index)] # We need an early termination if a strict test run is requested. if is_strict and (False in ret): break else: ret = [] for chunk in get_chunks(indices, num_procs): ret += mp_pool.map(run_single, chunk) # We need an early termination if a strict test run is requested. So we # check whether there are any failures in the last batch. if is_strict and (False in ret): break # This allows to call this test from another script, that runs other tests as # well. idx_failures = [i for i, x in enumerate(ret) if x not in [True, None]] is_failure = False in ret if len(idx_failures) > 0: is_failure = True if not is_background: send_notification( "regression", is_failed=is_failure, idx_failures=idx_failures ) return not is_failure
def run(request, is_create, is_background, old_release, new_release): """ Test the different releases against each other. """ cleanup() # Processing of command line arguments. if request[0] == "investigate": is_investigation, is_run = True, False elif request[0] == "run": is_investigation, is_run = False, True else: raise AssertionError("request in [run, investigate]") seed_investigation, hours = None, 0.0 if is_investigation: seed_investigation = int(request[1]) assert isinstance(seed_investigation, int) elif is_run: hours = float(request[1]) assert hours > 0.0 # Set up auxiliary information to construct commands. env_dir = os.environ["HOME"] + "/.envs" old_exec = env_dir + "/" + old_release + "/bin/python" new_exec = env_dir + "/" + new_release + "/bin/python" # Create fresh virtual environments if requested. if is_create: for release in [old_release, new_release]: cmd = ["virtualenv", env_dir + "/" + release, "--clear"] subprocess.check_call(cmd) # Set up the virtual environments with the two releases under # investigation. for which in ["old", "new"]: if which == "old": release, python_exec = old_release, old_exec elif which == "new": release, python_exec = new_release, new_exec else: raise AssertionError cmd = [python_exec, SCRIPT_FNAME, "upgrade"] subprocess.check_call(cmd) cmd = [python_exec, SCRIPT_FNAME, "prepare", release] subprocess.check_call(cmd) # Evaluation loop. start, timeout = datetime.now(), timedelta(hours=hours) num_tests, is_failure = 0, False while True: num_tests += 1 # Set seed. if is_investigation: seed = seed_investigation else: seed = random.randrange(1, 100000) np.random.seed(seed) # The idea is to have all elements that are hand-crafted for the release comparison in # the function below. constr = {} constr["flag_estimation"] = True prepare_release_tests(constr, old_release, new_release) # We use the current release for the simulation of the underlying dataset. respy_obj = RespyCls("test.respy.ini") respy_obj.simulate() for which in ["old", "new"]: if which == "old": release, python_exec = old_release, old_exec elif which == "new": release, python_exec = old_release, new_exec else: raise AssertionError cmd = [python_exec, SCRIPT_FNAME, "estimate", which] subprocess.check_call(cmd) # Compare the resulting values of the criterion function. crit_val_old = pkl.load(open("old/crit_val.respy.pkl", "rb")) crit_val_new = pkl.load(open("new/crit_val.respy.pkl", "rb")) if not is_investigation: try: np.testing.assert_allclose(crit_val_old, crit_val_new) except AssertionError: is_failure = True else: np.testing.assert_allclose(crit_val_old, crit_val_new) is_timeout = timeout < datetime.now() - start if is_investigation or is_failure or is_timeout: break if not is_background and not is_investigation: send_notification( "release", hours=hours, is_failed=is_failure, seed=seed, num_tests=num_tests, old_release=old_release, new_release=new_release, )
def run(request, is_compile, is_background): """ Run the property test battery. """ # Processing of command line arguments. if request[0] == "investigate": is_investigation, is_run = True, False elif request[0] == "run": is_investigation, is_run = False, True else: raise AssertionError("request in [run, investigate]") seed_investigation, hours = None, 0.0 if is_investigation: seed_investigation = int(request[1]) assert isinstance(seed_investigation, int) elif is_run: hours = float(request[1]) assert hours > 0.0 if not is_investigation: cleanup() if is_compile: compile_package(True) # Get a dictionary with all candidate test cases. test_dict = get_test_dict(PACKAGE_DIR / "respy" / "tests") # We initialize a dictionary that allows to keep track of each test's success or # failure. full_test_record = {} for key_ in test_dict.keys(): full_test_record[key_] = {} for value in test_dict[key_]: full_test_record[key_][value] = [0, 0] # Start with a clean slate. start, timeout = dt.datetime.now(), dt.timedelta(hours=hours) if not is_investigation: cleanup_testing_infrastructure(False) initialize_record_canvas(full_test_record, start, timeout) # Evaluation loop. while True: # Set seed. if is_investigation: seed = seed_investigation else: seed = random.randrange(1, 100000) np.random.seed(seed) module, method = get_random_request(test_dict) mod = importlib.import_module(module) test = getattr(mod.TestClass(), method) if seed_investigation: print("... running ", module, method) # Run random test is_success, msg = None, None # Create a fresh test directory. tmp_dir = get_random_dirname(5) if not is_investigation: os.mkdir(tmp_dir) os.chdir(tmp_dir) if not is_investigation: try: test() full_test_record[module][method][0] += 1 is_success = True except Exception: full_test_record[module][method][1] += 1 msg = traceback.format_exc() is_success = False else: test() if not is_investigation: os.chdir("../") # Record iteration if not is_investigation: update_testing_record(module, method, seed, is_success, msg, full_test_record) cleanup_testing_infrastructure(True) # Timeout. if timeout < dt.datetime.now() - start: break if not is_investigation: finalize_testing_record(full_test_record) # This allows to call this test from another script, that runs other tests as well. if not is_background and not is_investigation: send_notification("property", hours=hours)