def testFailedActorInit(self): ray.init(num_workers=0, driver_mode=ray.SILENT_MODE) error_message1 = "actor constructor failed" error_message2 = "actor method failed" @ray.remote class FailedActor(object): def __init__(self): raise Exception(error_message1) def get_val(self): return 1 def fail_method(self): raise Exception(error_message2) a = FailedActor.remote() # Make sure that we get errors from a failed constructor. wait_for_errors(b"task", 1) self.assertEqual(len(ray.error_info()), 1) self.assertIn(error_message1, ray.error_info()[0][b"message"].decode("ascii")) # Make sure that we get errors from a failed method. a.fail_method.remote() wait_for_errors(b"task", 2) self.assertEqual(len(ray.error_info()), 2) self.assertIn(error_message2, ray.error_info()[1][b"message"].decode("ascii"))
def testFailImportingActor(self): ray.init(num_workers=2, driver_mode=ray.SILENT_MODE) # Create the contents of a temporary Python file. temporary_python_file = """ def temporary_helper_function(): return 1 """ f = tempfile.NamedTemporaryFile(suffix=".py") f.write(temporary_python_file.encode("ascii")) f.flush() directory = os.path.dirname(f.name) # Get the module name and strip ".py" from the end. module_name = os.path.basename(f.name)[:-3] sys.path.append(directory) module = __import__(module_name) # Define an actor that closes over this temporary module. This should # fail when it is unpickled. @ray.remote class Foo(object): def __init__(self): self.x = module.temporary_python_file() def get_val(self): return 1 # There should be no errors yet. self.assertEqual(len(ray.error_info()), 0) # Create an actor. foo = Foo.remote() # Wait for the error to arrive. wait_for_errors(b"register_actor", 1) self.assertIn(b"No module named", ray.error_info()[0][b"message"]) # Wait for the error from when the __init__ tries to run. wait_for_errors(b"task", 1) self.assertIn( b"failed to be imported, and so cannot execute this method", ray.error_info()[1][b"message"]) # Check that if we try to get the function it throws an exception and # does not hang. with self.assertRaises(Exception): ray.get(foo.get_val.remote()) # Wait for the error from when the call to get_val. wait_for_errors(b"task", 2) self.assertIn( b"failed to be imported, and so cannot execute this method", ray.error_info()[2][b"message"]) f.close() # Clean up the junk we added to sys.path. sys.path.pop(-1)
def test_actor_scope_or_intentionally_killed_message(ray_start_regular): @ray.remote class Actor(object): pass a = Actor.remote() a = Actor.remote() a.__ray_terminate__.remote() time.sleep(1) assert len(ray.error_info()) == 0, ( "Should not have propogated an error - {}".format(ray.error_info()))
def testFailedFunctionToRun(self): ray.init(num_workers=2, driver_mode=ray.SILENT_MODE) def f(worker): if ray.worker.global_worker.mode == ray.WORKER_MODE: raise Exception("Function to run failed.") ray.worker.global_worker.run_function_on_all_workers(f) wait_for_errors(b"function_to_run", 2) # Check that the error message is in the task info. self.assertEqual(len(ray.error_info()), 2) self.assertIn(b"Function to run failed.", ray.error_info()[0][b"message"]) self.assertIn(b"Function to run failed.", ray.error_info()[1][b"message"])
def testWorkerDying(self): ray.init(num_workers=0, driver_mode=ray.SILENT_MODE) # Define a remote function that will kill the worker that runs it. @ray.remote def f(): eval("exit()") f.remote() wait_for_errors(b"worker_died", 1) self.assertEqual(len(ray.error_info()), 1) self.assertIn("A worker died or was killed while executing a task.", ray.error_info()[0][b"message"].decode("ascii"))
def test_fail_importing_actor(ray_start_regular): # Create the contents of a temporary Python file. temporary_python_file = """ def temporary_helper_function(): return 1 """ f = tempfile.NamedTemporaryFile(suffix=".py") f.write(temporary_python_file.encode("ascii")) f.flush() directory = os.path.dirname(f.name) # Get the module name and strip ".py" from the end. module_name = os.path.basename(f.name)[:-3] sys.path.append(directory) module = __import__(module_name) # Define an actor that closes over this temporary module. This should # fail when it is unpickled. @ray.remote class Foo(object): def __init__(self): self.x = module.temporary_python_file() def get_val(self): return 1 # There should be no errors yet. assert len(ray.error_info()) == 0 # Create an actor. foo = Foo.remote() # Wait for the error to arrive. wait_for_errors(ray_constants.REGISTER_ACTOR_PUSH_ERROR, 1) errors = relevant_errors(ray_constants.REGISTER_ACTOR_PUSH_ERROR) assert "No module named" in errors[0]["message"] # Wait for the error from when the __init__ tries to run. wait_for_errors(ray_constants.TASK_PUSH_ERROR, 1) errors = relevant_errors(ray_constants.TASK_PUSH_ERROR) assert ("failed to be imported, and so cannot execute this method" in errors[0]["message"]) # Check that if we try to get the function it throws an exception and # does not hang. with pytest.raises(Exception): ray.get(foo.get_val.remote()) # Wait for the error from when the call to get_val. wait_for_errors(ray_constants.TASK_PUSH_ERROR, 2) errors = relevant_errors(ray_constants.TASK_PUSH_ERROR) assert ("failed to be imported, and so cannot execute this method" in errors[1]["message"]) f.close() # Clean up the junk we added to sys.path. sys.path.pop(-1)
def wait_for_errors(error_check): # Wait for errors from all the nondeterministic tasks. errors = [] time_left = 100 while time_left > 0: errors = ray.error_info() if error_check(errors): break time_left -= 1 time.sleep(1) # Make sure that enough errors came through. assert error_check(errors) return errors
def main(cmd_line_args) : t0 = time.perf_counter() listOfRuns = [] seeds = [] num_seeds = 100000 #For reproducibility random.seed(1237) for p in gen_primes(): seeds.append(p) if len(seeds) == num_seeds: break for Domain, H in [("Skiing-machado-sticky-v0", 3600)]: #for Domain, H, numberRollouts in [("GridWorld-4x4-initS{}-v{}".format(intState, gridVersion), 20,4), ("GridWorld-10x10-initS{}-v{}".format(intState, gridVersion), 50, 10), ("GridWorld-20x20-initS{}-v{}".format(intState, gridVersion), 100, 20), ("GridWorld-50x50-initS{}-v{}".format(intState, gridVersion), 250, 50), ("GridWorld-100x100-initS{}-v{}".format(intState, gridVersion), 500, 100)]: #for Domain, H, numberRollouts in [("CTP-4x4-initS{}-v{}".format(intState, gridVersion), 20,4), ("CTP-10x10-initS{}-v{}".format(intState, gridVersion), 50, 10), ("CTP-20x20-initS{}-v{}".format(intState, gridVersion), 100, 20), ("CTP-50x50-initS{}-v{}".format(intState, gridVersion), 250, 50)]: for lookaheadBudget in [100]: numberOfRuns = 100 seed = random.randint(0,100000) numberRollouts = 1 listOfRuns.append(run_experiment.remote(Domain, 99999999, lookaheadBudget, H, numberRollouts, numberOfRuns, seeds, seed, True, False) ) ray.get(listOfRuns) ray.error_info() tf = time.perf_counter() print("Time taken = {}".format(tf-t0))
def testWorkerRaisingException(self): ray.init(num_workers=1, driver_mode=ray.SILENT_MODE) @ray.remote def f(): ray.worker.global_worker._get_next_task_from_local_scheduler = None # Running this task should cause the worker to raise an exception after # the task has successfully completed. f.remote() wait_for_errors(b"worker_crash", 1) wait_for_errors(b"worker_died", 1) self.assertEqual(len(ray.error_info()), 2)
def testWorkerRaisingException(self): ray.init(num_workers=1, driver_mode=ray.SILENT_MODE) @ray.remote def f(): ray.worker.global_worker._get_next_task_from_local_scheduler = None # Running this task should cause the worker to raise an exception after # the task has successfully completed. f.remote() wait_for_errors(ray_constants.WORKER_CRASH_PUSH_ERROR, 1) wait_for_errors(ray_constants.WORKER_DIED_PUSH_ERROR, 1) self.assertEqual(len(ray.error_info()), 2)
def testFailedFunctionToRun(self): ray.init(num_workers=2) def f(worker): if ray.worker.global_worker.mode == ray.WORKER_MODE: raise Exception("Function to run failed.") ray.worker.global_worker.run_function_on_all_workers(f) wait_for_errors(ray_constants.FUNCTION_TO_RUN_PUSH_ERROR, 2) # Check that the error message is in the task info. error_info = ray.error_info() assert len(error_info) == 2 assert "Function to run failed." in error_info[0]["message"] assert "Function to run failed." in error_info[1]["message"]
def wait_for_errors(self, error_check): # Wait for errors from all the nondeterministic tasks. errors = [] time_left = 100 while time_left > 0: errors = ray.error_info() if error_check(errors): break time_left -= 1 time.sleep(1) # Make sure that enough errors came through. self.assertTrue(error_check(errors)) return errors
def testFailImportingRemoteFunction(self): ray.init(num_workers=2, driver_mode=ray.SILENT_MODE) # Create the contents of a temporary Python file. temporary_python_file = """ def temporary_helper_function(): return 1 """ f = tempfile.NamedTemporaryFile(suffix=".py") f.write(temporary_python_file.encode("ascii")) f.flush() directory = os.path.dirname(f.name) # Get the module name and strip ".py" from the end. module_name = os.path.basename(f.name)[:-3] sys.path.append(directory) module = __import__(module_name) # Define a function that closes over this temporary module. This should # fail when it is unpickled. @ray.remote def g(): return module.temporary_python_file() wait_for_errors(b"register_remote_function", 2) self.assertIn(b"No module named", ray.error_info()[0][b"message"]) self.assertIn(b"No module named", ray.error_info()[1][b"message"]) # Check that if we try to call the function it throws an exception and # does not hang. for _ in range(10): self.assertRaises(Exception, lambda: ray.get(g.remote())) f.close() # Clean up the junk we added to sys.path. sys.path.pop(-1)
def testFailImportingRemoteFunction(self): ray.init(num_workers=2, driver_mode=ray.SILENT_MODE) # Create the contents of a temporary Python file. temporary_python_file = """ def temporary_helper_function(): return 1 """ f = tempfile.NamedTemporaryFile(suffix=".py") f.write(temporary_python_file.encode("ascii")) f.flush() directory = os.path.dirname(f.name) # Get the module name and strip ".py" from the end. module_name = os.path.basename(f.name)[:-3] sys.path.append(directory) module = __import__(module_name) # Define a function that closes over this temporary module. This should # fail when it is unpickled. @ray.remote def g(): return module.temporary_python_file() wait_for_errors(ray_constants.REGISTER_REMOTE_FUNCTION_PUSH_ERROR, 2) self.assertIn("No module named", ray.error_info()[0]["message"]) self.assertIn("No module named", ray.error_info()[1]["message"]) # Check that if we try to call the function it throws an exception and # does not hang. for _ in range(10): self.assertRaises(Exception, lambda: ray.get(g.remote())) f.close() # Clean up the junk we added to sys.path. sys.path.pop(-1)
def _collect_distributed_fits(self, n_min=0): n_min = max(0, n_min) moment_updates = self.moment_updates num_moments = len(moment_updates) while len(moment_updates) > n_min: errors = ray.error_info() if len(errors) > 0: print('errors:', errors) print('n min =', n_min, 'remaining =', len(moment_updates), 'object ids =', moment_updates) ready_id, moment_updates = ray.wait(moment_updates, num_returns=1) print('processing', ready_id[0]) update = ray.get(ready_id[0]) self.moments.moments += update self.moment_updates = moment_updates return num_moments - len(moment_updates)
def testWorkerDying(self): ray.init(num_workers=0) # Define a remote function that will kill the worker that runs it. @ray.remote def f(): eval("exit()") f.remote() wait_for_errors(ray_constants.WORKER_DIED_PUSH_ERROR, 1) error_info = ray.error_info() assert len(error_info) == 1 assert "died or was killed while executing" in error_info[0]["message"]
def test_failed_actor_init(ray_start_regular): error_message1 = "actor constructor failed" error_message2 = "actor method failed" @ray.remote class FailedActor(object): def __init__(self): raise Exception(error_message1) def fail_method(self): raise Exception(error_message2) a = FailedActor.remote() # Make sure that we get errors from a failed constructor. wait_for_errors(ray_constants.TASK_PUSH_ERROR, 1) assert len(ray.error_info()) == 1 assert error_message1 in ray.error_info()[0]["message"] # Make sure that we get errors from a failed method. a.fail_method.remote() wait_for_errors(ray_constants.TASK_PUSH_ERROR, 2) assert len(ray.error_info()) == 2 assert error_message1 in ray.error_info()[1]["message"]
def testFailImportingEnvironmentVariable(self): ray.init(num_workers=2, driver_mode=ray.SILENT_MODE) # This will throw an exception when the environment variable is imported on # the workers. def initializer(): if ray.worker.global_worker.mode == ray.WORKER_MODE: raise Exception("The initializer failed.") return 0 ray.env.foo = ray.EnvironmentVariable(initializer) wait_for_errors(b"register_environment_variable", 2) # Check that the error message is in the task info. self.assertIn(b"The initializer failed.", ray.error_info()[0][b"message"]) ray.worker.cleanup()
def testFailImportingReusableVariable(self): ray.init(start_ray_local=True, num_workers=2, driver_mode=ray.SILENT_MODE) # This will throw an exception when the reusable variable is imported on the # workers. def initializer(): if ray.worker.global_worker.mode == ray.WORKER_MODE: raise Exception("The initializer failed.") return 0 ray.reusables.foo = ray.Reusable(initializer) wait_for_errors("ReusableVariableImportError", 1) # Check that the error message is in the task info. self.assertTrue("The initializer failed." in ray.error_info() ["ReusableVariableImportError"][0]["message"]) ray.worker.cleanup()
def testFailReinitializingVariable(self): ray.init(num_workers=2, driver_mode=ray.SILENT_MODE) def initializer(): return 0 def reinitializer(foo): raise Exception("The reinitializer failed.") ray.env.foo = ray.EnvironmentVariable(initializer, reinitializer) @ray.remote def use_foo(): ray.env.foo use_foo.remote() wait_for_errors(b"reinitialize_environment_variable", 1) # Check that the error message is in the task info. self.assertIn(b"The reinitializer failed.", ray.error_info()[0][b"message"]) ray.worker.cleanup()
def testFailReinitializingVariable(self): ray.init(start_ray_local=True, num_workers=2, driver_mode=ray.SILENT_MODE) def initializer(): return 0 def reinitializer(foo): raise Exception("The reinitializer failed.") ray.reusables.foo = ray.Reusable(initializer, reinitializer) @ray.remote def use_foo(): ray.reusables.foo use_foo.remote() wait_for_errors("ReusableVariableReinitializeError", 1) # Check that the error message is in the task info. self.assertTrue("The reinitializer failed." in ray.error_info() ["ReusableVariableReinitializeError"][0]["message"]) ray.worker.cleanup()
def main(cmd_line_args): t0 = time.perf_counter() listOfRuns = [] seeds = [] num_seeds = 100000 #For reproducibility random.seed(1337) for p in gen_primes(): seeds.append(p) if len(seeds) == num_seeds: break for intState in range(10): for num_states in [10, 50]: for Domain, H in [ ("Antishape-{}-initS{}-v2".format(num_states, intState), 4 * num_states), ("Combolock-{}-initS{}-v2".format(num_states, intState), 4 * num_states) ]: #for Domain, H, numberRollouts in [("GridWorld-4x4-initS{}-v{}".format(intState, gridVersion), 20,4), ("GridWorld-10x10-initS{}-v{}".format(intState, gridVersion), 50, 10), ("GridWorld-20x20-initS{}-v{}".format(intState, gridVersion), 100, 20), ("GridWorld-50x50-initS{}-v{}".format(intState, gridVersion), 250, 50), ("GridWorld-100x100-initS{}-v{}".format(intState, gridVersion), 500, 100)]: #for Domain, H, numberRollouts in [("CTP-4x4-initS{}-v{}".format(intState, gridVersion), 20,4), ("CTP-10x10-initS{}-v{}".format(intState, gridVersion), 50, 10), ("CTP-20x20-initS{}-v{}".format(intState, gridVersion), 100, 20), ("CTP-50x50-initS{}-v{}".format(intState, gridVersion), 250, 50)]: for lookaheadBudget in [100, 500, 1000]: numberOfRuns = 20 seed = random.randint(0, 100000) numberRollouts = 1 listOfRuns.append( run_experiment.remote(Domain, 99999999, lookaheadBudget, H, numberRollouts, numberOfRuns, seeds, seed, False, False)) for gridVersion in [2, 3, 5]: for gridDim in [10, 20, 50]: for Domain, H, numberRollouts in [ ("GridWorld-{}x{}-initS{}-v{}".format( gridDim, gridDim, intState, gridVersion), gridDim * 5, 1) ]: #for Domain, H, numberRollouts in [("CTP-4x4-initS{}-v{}".format(intState, gridVersion), 20,4), ("CTP-10x10-initS{}-v{}".format(intState, gridVersion), 50, 10), ("CTP-20x20-initS{}-v{}".format(intState, gridVersion), 100, 20), ("CTP-50x50-initS{}-v{}".format(intState, gridVersion), 250, 50)]: for lookaheadBudget in [100, 1000, 10000]: numberOfRuns = 20 seed = random.randint(0, 100000) numberRollouts = 1 if gridVersion == 5: listOfRuns.append( run_experiment.remote(Domain, 99999999, lookaheadBudget, H, numberRollouts, numberOfRuns, seeds, seed, True, False)) else: listOfRuns.append( run_experiment.remote(Domain, 99999999, lookaheadBudget, H, numberRollouts, numberOfRuns, seeds, seed, False, False)) for gridDim in [10, 20]: for Domain, H, numberRollouts in [ ("CTP-{}x{}-initS{}-v1".format(gridDim, gridDim, intState), gridDim * 5, 1) ]: for lookaheadBudget in [100, 1000, 10000]: numberOfRuns = 20 seed = random.randint(0, 100000) numberRollouts = 1 listOfRuns.append( run_experiment.remote(Domain, 99999999, lookaheadBudget, H, numberRollouts, numberOfRuns, seeds, seed, True, True)) ray.get(listOfRuns) ray.error_info() tf = time.perf_counter() print("Time taken = {}".format(tf - t0))
def relevant_errors(error_type): return [info for info in ray.error_info() if info["type"] == error_type]
def test_error_isolation(ray_start_head): redis_address = ray_start_head # Connect a driver to the Ray cluster. ray.init(redis_address=redis_address) # There shouldn't be any errors yet. assert len(ray.error_info()) == 0 error_string1 = "error_string1" error_string2 = "error_string2" @ray.remote def f(): raise Exception(error_string1) # Run a remote function that throws an error. with pytest.raises(Exception): ray.get(f.remote()) # Wait for the error to appear in Redis. while len(ray.error_info()) != 1: time.sleep(0.1) print("Waiting for error to appear.") # Make sure we got the error. assert len(ray.error_info()) == 1 assert error_string1 in ray.error_info()[0]["message"] # Start another driver and make sure that it does not receive this # error. Make the other driver throw an error, and make sure it # receives that error. driver_script = """ import ray import time ray.init(redis_address="{}") time.sleep(1) assert len(ray.error_info()) == 0 @ray.remote def f(): raise Exception("{}") try: ray.get(f.remote()) except Exception as e: pass while len(ray.error_info()) != 1: print(len(ray.error_info())) time.sleep(0.1) assert len(ray.error_info()) == 1 assert "{}" in ray.error_info()[0]["message"] print("success") """.format(redis_address, error_string2, error_string2) out = run_string_as_driver(driver_script) # Make sure the other driver succeeded. assert "success" in out # Make sure that the other error message doesn't show up for this # driver. assert len(ray.error_info()) == 1 assert error_string1 in ray.error_info()[0]["message"]
def testErrorIsolation(self): # Start the Ray processes on this machine. out = subprocess.check_output([start_ray_script, "--head"]).decode("ascii") # Get the redis address from the output. redis_substring_prefix = "redis_address=\"" redis_address_location = out.find(redis_substring_prefix) + len( redis_substring_prefix) redis_address = out[redis_address_location:] redis_address = redis_address.split("\"")[0] # Connect a driver to the Ray cluster. ray.init(redis_address=redis_address, driver_mode=ray.SILENT_MODE) # There shouldn't be any errors yet. self.assertEqual(len(ray.error_info()), 0) error_string1 = "error_string1" error_string2 = "error_string2" @ray.remote def f(): raise Exception(error_string1) # Run a remote function that throws an error. with self.assertRaises(Exception): ray.get(f.remote()) # Wait for the error to appear in Redis. while len(ray.error_info()) != 1: time.sleep(0.1) print("Waiting for error to appear.") # Make sure we got the error. self.assertEqual(len(ray.error_info()), 1) self.assertIn(error_string1, ray.error_info()[0][b"message"].decode("ascii")) # Start another driver and make sure that it does not receive this error. # Make the other driver throw an error, and make sure it receives that # error. driver_script = """ import ray import time ray.init(redis_address="{}") time.sleep(1) assert len(ray.error_info()) == 0 @ray.remote def f(): raise Exception("{}") try: ray.get(f.remote()) except Exception as e: pass while len(ray.error_info()) != 1: print(len(ray.error_info())) time.sleep(0.1) assert len(ray.error_info()) == 1 assert "{}" in ray.error_info()[0][b"message"].decode("ascii") print("success") """.format(redis_address, error_string2, error_string2) # Save the driver script as a file so we can call it using subprocess. with tempfile.NamedTemporaryFile() as f: f.write(driver_script.encode("ascii")) f.flush() out = subprocess.check_output(["python", f.name]).decode("ascii") # Make sure the other driver succeeded. self.assertIn("success", out) # Make sure that the other error message doesn't show up for this driver. self.assertEqual(len(ray.error_info()), 1) self.assertIn(error_string1, ray.error_info()[0][b"message"].decode("ascii")) ray.worker.cleanup() subprocess.Popen([stop_ray_script]).wait()
def testErrorIsolation(self): # Connect a driver to the Ray cluster. ray.init(redis_address=self.redis_address, driver_mode=ray.SILENT_MODE) # There shouldn't be any errors yet. self.assertEqual(len(ray.error_info()), 0) error_string1 = "error_string1" error_string2 = "error_string2" @ray.remote def f(): raise Exception(error_string1) # Run a remote function that throws an error. with self.assertRaises(Exception): ray.get(f.remote()) # Wait for the error to appear in Redis. while len(ray.error_info()) != 1: time.sleep(0.1) print("Waiting for error to appear.") # Make sure we got the error. self.assertEqual(len(ray.error_info()), 1) self.assertIn(error_string1, ray.error_info()[0][b"message"].decode("ascii")) # Start another driver and make sure that it does not receive this # error. Make the other driver throw an error, and make sure it # receives that error. driver_script = """ import ray import time ray.init(redis_address="{}") time.sleep(1) assert len(ray.error_info()) == 0 @ray.remote def f(): raise Exception("{}") try: ray.get(f.remote()) except Exception as e: pass while len(ray.error_info()) != 1: print(len(ray.error_info())) time.sleep(0.1) assert len(ray.error_info()) == 1 assert "{}" in ray.error_info()[0][b"message"].decode("ascii") print("success") """.format(self.redis_address, error_string2, error_string2) out = run_string_as_driver(driver_script) # Make sure the other driver succeeded. self.assertIn("success", out) # Make sure that the other error message doesn't show up for this # driver. self.assertEqual(len(ray.error_info()), 1) self.assertIn(error_string1, ray.error_info()[0][b"message"].decode("ascii"))
def relevant_errors(error_type): return [info for info in ray.error_info() if info[b"type"] == error_type]
def testIncorrectMethodCalls(self): ray.init(num_workers=0, driver_mode=ray.SILENT_MODE) @ray.actor class Actor(object): def __init__(self, missing_variable_name): pass def get_val(self, x): pass # Make sure that we get errors if we call the constructor incorrectly. # TODO(rkn): These errors should instead be thrown when the method is # called. # Create an actor with too few arguments. a = Actor() wait_for_errors(b"task", 1) self.assertEqual(len(ray.error_info()), 1) if sys.version_info >= (3, 0): self.assertIn("missing 1 required", ray.error_info()[0][b"message"].decode("ascii")) else: self.assertIn("takes exactly 2 arguments", ray.error_info()[0][b"message"].decode("ascii")) # Create an actor with too many arguments. a = Actor(1, 2) wait_for_errors(b"task", 2) self.assertEqual(len(ray.error_info()), 2) if sys.version_info >= (3, 0): self.assertIn("but 3 were given", ray.error_info()[1][b"message"].decode("ascii")) else: self.assertIn("takes exactly 2 arguments", ray.error_info()[1][b"message"].decode("ascii")) # Create an actor the correct number of arguments. a = Actor(1) # Call a method with too few arguments. a.get_val() wait_for_errors(b"task", 3) self.assertEqual(len(ray.error_info()), 3) if sys.version_info >= (3, 0): self.assertIn("missing 1 required", ray.error_info()[2][b"message"].decode("ascii")) else: self.assertIn("takes exactly 2 arguments", ray.error_info()[2][b"message"].decode("ascii")) # Call a method with too many arguments. a.get_val(1, 2) wait_for_errors(b"task", 4) self.assertEqual(len(ray.error_info()), 4) if sys.version_info >= (3, 0): self.assertIn("but 3 were given", ray.error_info()[3][b"message"].decode("ascii")) else: self.assertIn("takes exactly 2 arguments", ray.error_info()[3][b"message"].decode("ascii")) # Call a method that doesn't exist. with self.assertRaises(AttributeError): a.nonexistent_method() ray.worker.cleanup()
train_loss=loss.item(), best_test_acc=100. * test_correct / test_num) # report metrics tune.register_trainable("train", train) all_trials = tune.run_experiments({ "awesome": { "run": "train", "repeat": 1, # "trial_resources": { # "cpu": 8, # "gpu": 1, # }, "stop": { "best_test_acc": 90, }, #"stop": {"epoch": 1}, "config": { "lr": tune.grid_search(list(uniform.rvs(0, size=3))), "momentum": tune.grid_search(list(uniform.rvs(0, size=1))), }, "local_dir": "ray_results", "max_failures": 1 } }) ray.error_info() ray.global_state.log_files()