예제 #1
0
def stop_experiment(experiment_name, experiment_config_filename):
    """Stop the experiment specified by |experiment_config_filename|."""
    instances = gcloud.list_instances()

    experiment_config = yaml_utils.read(experiment_config_filename)
    cloud_compute_zone = experiment_config['cloud_compute_zone']
    trial_prefix = 'r-' + experiment_name
    experiment_instances = [
        instance for instance in instances if instance.startswith(trial_prefix)
    ]
    dispatcher_instance = experiment_utils.get_dispatcher_instance_name(
        experiment_name)
    if dispatcher_instance not in instances:
        logger.warning('Dispatcher instance not running, skip.')
    else:
        experiment_instances.append(dispatcher_instance)

    if not experiment_instances:
        logger.warning('No experiment instances found, no work to do.')
        return 0

    if not gcloud.delete_instances(experiment_instances, cloud_compute_zone):
        logger.error('Failed to stop experiment instances.')
        return 1

    logger.info('Successfully stopped experiment.')
    return 0
예제 #2
0
def stop_experiment(experiment_name, experiment_config_filename):
    """Stop the experiment specified by |experiment_config_filename|."""
    experiment_config = yaml_utils.read(experiment_config_filename)
    if experiment_config.get('local_experiment', False):
        raise NotImplementedError(
            'Local experiment stop logic is not implemented.')

    cloud_project = experiment_config['cloud_project']
    cloud_compute_zone = experiment_config['cloud_compute_zone']

    gce.initialize()
    instances = list(gce.get_instances(cloud_project, cloud_compute_zone))

    experiment_instances = []
    dispatcher_instance = experiment_utils.get_dispatcher_instance_name(
        experiment_name)
    if dispatcher_instance not in instances:
        logger.warning('Dispatcher instance not running, skip.')
    else:
        experiment_instances.append(dispatcher_instance)

    trial_prefix = 'r-' + experiment_name
    experiment_instances.extend([
        instance for instance in instances if instance.startswith(trial_prefix)
    ])
    if not experiment_instances:
        logger.warning('No experiment instances found, no work to do.')
        return True

    if not gcloud.delete_instances(experiment_instances, cloud_compute_zone):
        logger.error('Failed to stop experiment instances.')
        return False

    logger.info('Successfully stopped experiment.')
    return True
예제 #3
0
 def __init__(self, config: Dict):
     self.config = config
     self.instance_name = experiment_utils.get_dispatcher_instance_name(
         config['experiment'])
     self.process = None
예제 #4
0
def test_get_dispatcher_instance_name():
    """Tests that get_dispatcher_instance_name returns the expected result."""
    assert experiment_utils.get_dispatcher_instance_name(
        'experiment-a') == 'd-experiment-a'