def test_delete_instances_less_than_batch_size(mocked_execute): """Test that delete_instances works as intended when instance count is less than batch size.""" instances = ['instance-%d' % i for i in range(5)] mocked_execute.return_value = new_process.ProcessResult(0, '', False) # -q is needed otherwise gcloud will prompt "Y/N?". zone = 'us-central1-a' expected_command = (['gcloud', 'compute', 'instances', 'delete', '-q'] + instances + ['--zone', zone]) gcloud.delete_instances(instances, zone) mocked_execute.assert_called_with(expected_command, expect_zero=False)
def end_expired_trials(experiment_config: dict): """Get all expired trials, end them and return them.""" trials_past_expiry = get_expired_trials( experiment_config['experiment'], experiment_config['max_total_time']) expired_instances = [] current_dt = datetime_now() for trial in trials_past_expiry: expired_instances.append( experiment_utils.get_trial_instance_name( experiment_config['experiment'], trial.id)) trial.time_ended = current_dt # Bail out here because trials_past_expiry will be truthy until evaluated. if not expired_instances: return # Delete instances for expired trials. running_instances = gcloud.list_instances() instances_to_delete = [ i for i in expired_instances if i in running_instances ] if instances_to_delete and not gcloud.delete_instances( instances_to_delete, experiment_config['cloud_compute_zone'], write_to_stdout=False): # If we failed to delete some instances, then don't update the status # of expired trials in database as we don't know which instances were # successfully deleted. Wait for next iteration of end_expired_trials. return db_utils.bulk_save(trials_past_expiry)
def stop_experiment(experiment_name, experiment_config_filename): """Stop the experiment specified by |experiment_config_filename|.""" experiment_config = yaml_utils.read(experiment_config_filename) if experiment_config.get('local_experiment', False): raise NotImplementedError( 'Local experiment stop logic is not implemented.') cloud_project = experiment_config['cloud_project'] cloud_compute_zone = experiment_config['cloud_compute_zone'] gce.initialize() instances = list(gce.get_instances(cloud_project, cloud_compute_zone)) experiment_instances = [] dispatcher_instance = experiment_utils.get_dispatcher_instance_name( experiment_name) if dispatcher_instance not in instances: logger.warning('Dispatcher instance not running, skip.') else: experiment_instances.append(dispatcher_instance) trial_prefix = 'r-' + experiment_name experiment_instances.extend([ instance for instance in instances if instance.startswith(trial_prefix) ]) if not experiment_instances: logger.warning('No experiment instances found, no work to do.') return True if not gcloud.delete_instances(experiment_instances, cloud_compute_zone): logger.error('Failed to stop experiment instances.') return False logger.info('Successfully stopped experiment.') return True
def stop_experiment(experiment_name, experiment_config_filename): """Stop the experiment specified by |experiment_config_filename|.""" instances = gcloud.list_instances() experiment_config = yaml_utils.read(experiment_config_filename) cloud_compute_zone = experiment_config['cloud_compute_zone'] trial_prefix = 'r-' + experiment_name experiment_instances = [ instance for instance in instances if instance.startswith(trial_prefix) ] dispatcher_instance = experiment_utils.get_dispatcher_instance_name( experiment_name) if dispatcher_instance not in instances: logger.warning('Dispatcher instance not running, skip.') else: experiment_instances.append(dispatcher_instance) if not experiment_instances: logger.warning('No experiment instances found, no work to do.') return 0 if not gcloud.delete_instances(experiment_instances, cloud_compute_zone): logger.error('Failed to stop experiment instances.') return 1 logger.info('Successfully stopped experiment.') return 0
def test_delete_instances_greater_than_batch_size(mocked_execute): """Test that delete_instances works as intended when instance count is more than batch size.""" instances = ['instance-%d' % i for i in range(103)] mocked_execute.return_value = new_process.ProcessResult(0, '', False) # -q is needed otherwise gcloud will prompt "Y/N?". zone = 'us-central1-a' gcloud.delete_instances(instances, zone) expected_command_1 = (['gcloud', 'compute', 'instances', 'delete', '-q'] + ['instance-%d' % i for i in range(100)] + ['--zone', zone]) expected_command_2 = (['gcloud', 'compute', 'instances', 'delete', '-q'] + ['instance-%d' % i for i in range(100, 103)] + ['--zone', zone]) mocked_execute.assert_has_calls([ mock.call(expected_command_1, expect_zero=False), mock.call(expected_command_2, expect_zero=False) ])
def delete_instances(instances, experiment_config): """Deletes |instances|.""" cloud_project = experiment_config['cloud_project'] cloud_compute_zone = experiment_config['cloud_compute_zone'] instances_to_delete = [ i for i in gce.get_instances(cloud_project, cloud_compute_zone) if i in instances ] return gcloud.delete_instances(instances_to_delete, experiment_config['cloud_compute_zone'])
def test_delete_instances_fail(mocked_execute): """Test that delete_instances returns False when instance deletion fails.""" instances = ['instance-%d' % i for i in range(5)] mocked_execute.return_value = new_process.ProcessResult(1, 'Error', False) zone = 'us-central1-a' expected_command = (['gcloud', 'compute', 'instances', 'delete', '-q'] + instances + ['--zone', zone]) result = gcloud.delete_instances(instances, zone) assert not result mocked_execute.assert_called_with(expected_command, expect_zero=False)
def delete_instances(instances, experiment_config): """Deletes |instances|.""" running_instances = gcloud.list_instances() instances_to_delete = [i for i in instances if i in running_instances] return gcloud.delete_instances(instances_to_delete, experiment_config['cloud_compute_zone'])