def stop_experiment(experiment_name, experiment_config_filename): """Stop the experiment specified by |experiment_config_filename|.""" instances = gcloud.list_instances() experiment_config = yaml_utils.read(experiment_config_filename) cloud_compute_zone = experiment_config['cloud_compute_zone'] trial_prefix = 'r-' + experiment_name experiment_instances = [ instance for instance in instances if instance.startswith(trial_prefix) ] dispatcher_instance = experiment_utils.get_dispatcher_instance_name( experiment_name) if dispatcher_instance not in instances: logger.warning('Dispatcher instance not running, skip.') else: experiment_instances.append(dispatcher_instance) if not experiment_instances: logger.warning('No experiment instances found, no work to do.') return 0 if not gcloud.delete_instances(experiment_instances, cloud_compute_zone): logger.error('Failed to stop experiment instances.') return 1 logger.info('Successfully stopped experiment.') return 0
def stop_experiment(experiment_name, experiment_config_filename): """Stop the experiment specified by |experiment_config_filename|.""" experiment_config = yaml_utils.read(experiment_config_filename) if experiment_config.get('local_experiment', False): raise NotImplementedError( 'Local experiment stop logic is not implemented.') cloud_project = experiment_config['cloud_project'] cloud_compute_zone = experiment_config['cloud_compute_zone'] gce.initialize() instances = list(gce.get_instances(cloud_project, cloud_compute_zone)) experiment_instances = [] dispatcher_instance = experiment_utils.get_dispatcher_instance_name( experiment_name) if dispatcher_instance not in instances: logger.warning('Dispatcher instance not running, skip.') else: experiment_instances.append(dispatcher_instance) trial_prefix = 'r-' + experiment_name experiment_instances.extend([ instance for instance in instances if instance.startswith(trial_prefix) ]) if not experiment_instances: logger.warning('No experiment instances found, no work to do.') return True if not gcloud.delete_instances(experiment_instances, cloud_compute_zone): logger.error('Failed to stop experiment instances.') return False logger.info('Successfully stopped experiment.') return True
def __init__(self, config: Dict): self.config = config self.instance_name = experiment_utils.get_dispatcher_instance_name( config['experiment']) self.process = None
def test_get_dispatcher_instance_name(): """Tests that get_dispatcher_instance_name returns the expected result.""" assert experiment_utils.get_dispatcher_instance_name( 'experiment-a') == 'd-experiment-a'