def test_force_removal(self): """Test the mode of removal which should not rely on the host being accessible""" host = synthetic_host("myaddress") synthetic_volume_full(host) self.assertEqual(Volume.objects.count(), 1) self.assertEqual(VolumeNode.objects.count(), 1) # The host disappears, never to be seen again MockAgentRpc.succeed = False try: JobSchedulerClient.command_run_jobs([{ "class_name": "ForceRemoveHostJob", "args": { "host_id": host.id } }], "Test host force remove") self.drain_progress() finally: MockAgentRpc.succeed = True with self.assertRaises(ManagedHost.DoesNotExist): ManagedHost.objects.get(address="myaddress") self.assertEqual(Volume.objects.count(), 0) self.assertEqual(VolumeNode.objects.count(), 0)
def test_cancellation(self): """ While an agent rpc is in flight, check that issuing a cancellation on the manager results in a cancel message being sent to the agent, and the command completing promptly on the manager. """ agent_session_id = self._open_sessions() request_action = self._request_action() rpc_request = self._handle_action_receive(agent_session_id, request_action.actions[0]) command = self._get_command(request_action.command_id) for job in command.jobs.all(): JobSchedulerClient.cancel_job(job.id) # The command should get cancelled promptly command = self._wait_for_command(command.id, RABBITMQ_GRACE_PERIOD) self.assertTrue(command.cancelled) self.assertFalse(command.errored) # A cancellation for the agent rpc should have been sent to the agent cancellation_message = self._receive_messages(1)[0] self.assertDictEqual( cancellation_message["body"], {"type": "ACTION_CANCEL", "id": rpc_request["id"], "action": None, "args": None}, )
def test_force_removal_with_filesystem(self): """Test that when a filesystem depends on a host, the filesystem is deleted along with the host when doing a force remove""" host = synthetic_host("myaddress") self.create_simple_filesystem() from chroma_core.models import ManagedMgs, ManagedMdt, ManagedOst, ManagedFilesystem self.fs = self.set_and_assert_state(self.fs, "available") self.assertState(self.mgt.managedtarget_ptr, "mounted") self.assertState(self.mdt.managedtarget_ptr, "mounted") self.assertState(self.ost.managedtarget_ptr, "mounted") self.assertEqual(ManagedFilesystem.objects.get(pk=self.fs.pk).state, "available") # The host disappears, never to be seen again MockAgentRpc.succeed = False try: JobSchedulerClient.command_run_jobs( [{"class_name": "ForceRemoveHostJob", "args": {"host_id": host.id}}], "Test host force remove" ) self.drain_progress() finally: MockAgentRpc.succeed = True with self.assertRaises(ManagedHost.DoesNotExist): ManagedHost.objects.get(address="myaddress") self.assertEqual(ManagedMgs.objects.count(), 0) self.assertEqual(ManagedOst.objects.count(), 0) self.assertEqual(ManagedMdt.objects.count(), 0) self.assertEqual(Volume.objects.count(), 0) self.assertEqual(VolumeNode.objects.count(), 0) self.assertEqual(ManagedFilesystem.objects.count(), 0)
def test_cancel_pending(self): """Test cancelling a Job which is in state 'pending'""" self.set_state_delayed([(self.host.lnet_configuration, "lnet_unloaded") ]) pending_jobs = Job.objects.filter(state="pending") # stop lnet, unload lnet self.assertEqual(pending_jobs.count(), 2) # This is the one we cancelled explicitly cancelled_job = pending_jobs[0] # This one should be cancelled as a result of cancelling it's dependency consequentially_cancelled_job = pending_jobs[1] JobSchedulerClient.cancel_job(pending_jobs[0].id) self.drain_progress() cancelled_job = freshen(cancelled_job) consequentially_cancelled_job = freshen(consequentially_cancelled_job) self.assertEqual(cancelled_job.state, "complete") self.assertEqual(cancelled_job.errored, False) self.assertEqual(cancelled_job.cancelled, True) self.assertEqual(consequentially_cancelled_job.state, "complete") self.assertEqual(consequentially_cancelled_job.errored, False) self.assertEqual(consequentially_cancelled_job.cancelled, True) pending_jobs = Job.objects.filter(state="pending") self.assertEqual(pending_jobs.count(), 0) self.assertFalse( self.job_scheduler._lock_cache.get_by_job(cancelled_job))
def test_cancel_tasked(self): """Test that cancelling a Job which is in state 'tasked' involves calling the cancel method on RunJobThread""" cancel_bak = RunJobThread.cancel RunJobThread.cancel = mock.Mock() from tests.unit.chroma_core.helpers import log def spawn_job(job): log.debug("neutered spawn_job") thread = mock.Mock() self.job_scheduler._run_threads[job.id] = thread spawn_bak = JobScheduler._spawn_job self.job_scheduler._spawn_job = mock.Mock(side_effect=spawn_job) try: self.set_state_delayed([(self.lnet_configuration, "lnet_down")]) # Start our mock thread 'running' self.job_scheduler._run_next() job = Job.objects.get(state="tasked") JobSchedulerClient.cancel_job(job.id) # That call to cancel should have reached the thread self.assertEqual( self.job_scheduler._run_threads[job.id].cancel.call_count, 1) self.assertFalse(self.job_scheduler._lock_cache.get_by_job(job)) finally: RunJobThread.cancel = cancel_bak JobScheduler._spawn_job = spawn_bak
def _nids_delete(self, obj_list): delete_list = [] for nid in obj_list: delete_list.append({"network_interface": nid.network_interface_id, "lnd_network": -1}) if len(delete_list) > 0: JobSchedulerClient.update_nids(delete_list)
def _nids_delete(self, obj_list): delete_list = [] for nid in obj_list: delete_list.append({'network_interface': nid.network_interface_id, 'lnd_network': -1}) if (len(delete_list) > 0): JobSchedulerClient.update_nids(delete_list)
def obj_update(self, bundle, **kwargs): job = Job.objects.get(pk=kwargs['pk']) new_state = bundle.data['state'] if new_state == 'cancelled': JobSchedulerClient.cancel_job(job.pk) Job.objects.get(pk=kwargs['pk']) bundle.obj = job return bundle
def obj_update(self, bundle, **kwargs): job = Job.objects.get(pk=kwargs["pk"]) new_state = bundle.data["state"] if new_state == "cancelled": JobSchedulerClient.cancel_job(job.pk) Job.objects.get(pk=kwargs["pk"]) bundle.obj = job return bundle
def update_client_mounts(self): # Client mount audit comes in via metrics due to the way the # ClientAudit is implemented. try: client_mounts = self.host_data["metrics"]["raw"][ "lustre_client_mounts"] except KeyError: client_mounts = [] # If lustre_client_mounts is None then nothing changed since the last update and so we can just return. # Not the same as [] empty list which means no mounts if client_mounts == None: return expected_fs_mounts = LustreClientMount.objects.select_related( "filesystem").filter(host=self.host) actual_fs_mounts = [ m["mountspec"].split(":/")[1] for m in client_mounts ] # Don't bother with the rest if there's nothing to do. if len(expected_fs_mounts) == 0 and len(actual_fs_mounts) == 0: return for expected_mount in expected_fs_mounts: if expected_mount.active and expected_mount.filesystem.name not in actual_fs_mounts: update = dict(state="unmounted", mountpoint=None) job_scheduler_notify.notify(expected_mount, self.started_at, update) log.info("updated mount %s on %s -> inactive" % (expected_mount.mountpoint, self.host)) for actual_mount in client_mounts: fsname = actual_mount["mountspec"].split(":/")[1] try: mount = [ m for m in expected_fs_mounts if m.filesystem.name == fsname ][0] log.debug("mount: %s" % mount) if not mount.active: update = dict(state="mounted", mountpoint=actual_mount["mountpoint"]) job_scheduler_notify.notify(mount, self.started_at, update) log.info("updated mount %s on %s -> active" % (actual_mount["mountpoint"], self.host)) except IndexError: log.info("creating new mount %s on %s" % (actual_mount["mountpoint"], self.host)) filesystem = ManagedFilesystem.objects.get(name=fsname) JobSchedulerClient.create_client_mount( self.host, filesystem, actual_mount["mountpoint"])
def _request_action(self, state="lnet_up"): # Start a job which should generate an action command_id = JobSchedulerClient.command_set_state( [(self.host.lnet_configuration.content_type.natural_key(), self.host.lnet_configuration.id, state)], "Test") command = self._get_command(command_id) self.assertEqual(len(command.jobs.all()), 1) self.last_action = time.time() # This have to be hardcoded and kept up to date, a bit crappy, but at least they are in one # place and asserts when it doesn't know what to do. # This is basically describing the messages that we expect to receive when a command is sent. # We can then receive and validate each message as it arrives. if state == "lnet_up": actions = [ "start_lnet", "device_plugin" ] # It will do start_lnet, followed by 1 request for data. elif state == "lnet_down": actions = [ "stop_lnet", "device_plugin" ] # It will do stop_lnet, followed by 1 request for data. else: raise AssertionError( "Unknown state '%s' requested for _request_action" % state) return self.ActionsRequested(command_id, actions)
def obj_create(self, bundle, **kwargs): request = bundle.request self.is_valid(bundle) if bundle.errors: raise ImmediateHttpResponse(response=self.error_response( bundle.request, bundle.errors[self._meta.resource_name])) # Set up an errors dict in the bundle to allow us to carry # hydration errors through to validation. setattr(bundle, 'data_errors', defaultdict(list)) bundle.data['content_type'] = ContentType.objects.get_for_model( KIND_TO_KLASS[bundle.data['kind']]).natural_key() # Should really only be doing one validation pass, but this works # OK for now. It's better than raising a 404 or duplicating the # filesystem validation failure if it doesn't exist, anyhow. self.is_valid(bundle) targets, command = JobSchedulerClient.create_targets([bundle.data]) if request.method == 'POST': raise custom_response( self, request, http.HttpAccepted, { 'command': dehydrate_command(command), 'target': self.full_dehydrate(self.build_bundle(obj=targets[0])).data })
def obj_update(self, bundle, request, **kwargs): bundle.obj = self.cached_obj_get( request=request, **self.remove_api_resource_names(kwargs)) stateful_object = bundle.obj dry_run = bundle.data.get('dry_run', False) if 'state' in bundle.data: new_state = bundle.data['state'] if dry_run: # FIXME: should this be a GET to something like /foo/transitions/from/to/ # to get information about that transition? if stateful_object.state == new_state: report = [] else: report = JobSchedulerClient.get_transition_consequences( stateful_object, new_state) raise custom_response(self, request, http.HttpResponse, report) else: try: command = Command.set_state([(stateful_object, new_state)]) except SchedulingError, e: raise custom_response(self, request, http.HttpBadRequest, {'state': e.message}) if command: raise custom_response( self, request, http.HttpAccepted, {'command': dehydrate_command(command)}) else: raise custom_response(self, request, http.HttpNoContent, None)
def patch_list(self, request, **kwargs): """ Specialization of patch_list to do bulk target creation in a single RPC to job_scheduler (and consequently in a single command). """ deserialized = self.deserialize(request, request.raw_post_data, format=request.META.get('CONTENT_TYPE', 'application/json')) if "objects" not in deserialized: raise BadRequest("Invalid data sent.") if len(deserialized["objects"]) and 'put' not in self._meta.detail_allowed_methods: raise ImmediateHttpResponse(response=http.HttpMethodNotAllowed()) # If any of the included targets is not a creation, then # skip to a normal PATCH instead of this special case one for target_data in deserialized['objects']: if 'id' in target_data or 'resource_uri' in target_data: super(TargetResource, self).patch_list(request, **kwargs) # Validate and prepare each target dict for consumption by job_scheduler for target_data in deserialized['objects']: data = self.alter_deserialized_detail_data(request, target_data) bundle = self.build_bundle(data=dict_strip_unicode_keys(data)) bundle.request = request self.is_valid(bundle) target_data['content_type'] = ContentType.objects.get_for_model(KIND_TO_KLASS[target_data['kind']]).natural_key() targets, command = JobSchedulerClient.create_targets(deserialized['objects']) raise custom_response(self, request, http.HttpAccepted, {'command': dehydrate_command(command), 'targets': [self.get_resource_uri(target) for target in targets]})
def test_onejob(self): # Our self.host is initially lnet_up self.assertEqual( LNetConfiguration.objects.get(pk=self.lnet_configuration.pk).state, "lnet_up") # This tests a state transition which is done by a single job command_id = JobSchedulerClient.command_run_jobs( [{ "class_name": "UpdateDevicesJob", "args": { "hosts": [api.get_resource_uri(self.host)] } }], "Test single job action", ) self.drain_progress() self.assertEqual(Command.objects.get(pk=command_id).complete, True) self.assertEqual(Command.objects.get(pk=command_id).jobs.count(), 1) # Test that if I try to run the same again I get None command = Command.set_state([(freshen(self.lnet_configuration), "lnet_up")]) self.assertEqual(command, None)
def obj_update(self, bundle, **kwargs): if "pk" in kwargs: return super(LNetConfigurationResource, self).obj_update(bundle, **kwargs) lnet_configurations_data = bundle.data.get("objects", [bundle.data]) lnet_configuration = [] for lnet_configuration_data in lnet_configurations_data: lnet_configuration.append({ "host_id": lnet_configuration_data["host"]["id"], "state": lnet_configuration_data["state"] }) command_id = JobSchedulerClient.update_lnet_configuration( lnet_configuration) try: command = Command.objects.get(pk=command_id) except ObjectDoesNotExist: command = None raise custom_response(self, bundle.request, http.HttpAccepted, {"command": dehydrate_command(command)})
def obj_create(self, bundle, **kwargs): request = bundle.request for job in bundle.data["jobs"]: # FIXME: HYD-1367: This is a hack to work around the inability of # the Job class to handle m2m references properly, serializing hosts # to a list of IDs understood by the HostListMixin class if "hosts" in job["args"]: job_ids = [] for uri in job["args"]["hosts"]: job_ids.append(HostResource().get_via_uri( uri, bundle.request).id) del job["args"]["hosts"] job["args"]["host_ids"] = json.dumps(job_ids) from chroma_core.services.job_scheduler.job_scheduler_client import JobSchedulerClient try: command_id = JobSchedulerClient.command_run_jobs( bundle.data["jobs"], bundle.data["message"]) except SchedulingError as e: raise custom_response(self, request, http.HttpBadRequest, {"state": e.message}) bundle.obj = Command.objects.get(pk=command_id) return bundle
def _update_corosync_configuration(self, corosync_configuration, request, **kwargs): network_interface_ids = [resolve(interwork_interface)[2]['pk'] for interwork_interface in corosync_configuration['network_interfaces']] return self.BulkActionResult(dehydrate_command(JobSchedulerClient.update_corosync_configuration(corosync_configuration_id=corosync_configuration['id'], mcast_port=corosync_configuration['mcast_port'], network_interface_ids=network_interface_ids)), None, None)
def alter_list_data_to_serialize(self, request, to_be_serialized): """Post process available jobs and state transitions This method is a TastyPie hook that is called after all fields have been dehydrated. The available_* methods are no longer dehydrated one at a time. Instead, they are all done in two batched calls, and set in the return datastructure here. to_be_serialized is a list of TastyPie Bundles composing some subclass of StatefulObjects under the key 'objects. Returns an updated copy of the input dict. """ batch = [] for bundle in to_be_serialized['objects']: so_ct_key = ContentType.objects.get_for_model( bundle.obj.downcast()).natural_key() batch.append(( so_ct_key, bundle.obj.id, )) computed_transitions = JobSchedulerClient.available_transitions(batch) computed_jobs = JobSchedulerClient.available_jobs(batch) # decorate the transition lists with verbs # and install in the bundle for return for idx, bundle in enumerate(to_be_serialized['objects']): obj_transitions_states_and_verbs = computed_transitions[str( bundle.obj.id)] obj_jobs = computed_jobs[str(bundle.obj.id)] # TODO: available_transitions is deprecated, use available_actions bundle.data[ 'available_transitions'] = obj_transitions_states_and_verbs # TODO: available_jobs is deprecated, use available_actions bundle.data['available_jobs'] = obj_jobs available_actions = sorted( obj_transitions_states_and_verbs + obj_jobs, key=lambda action: action['display_order']) bundle.data['available_actions'] = available_actions return to_be_serialized
def test_mgs_nid_change(self): mgs = synthetic_host("mgs") mds = synthetic_host("mds") oss = synthetic_host("oss") from chroma_core.models import ( ManagedMgs, ManagedMdt, ManagedOst, ManagedFilesystem, ManagedTarget, ManagedTargetMount, ) self.mgt, mgt_tms = ManagedMgs.create_for_volume( synthetic_volume_full(mgs).id, name="MGS") self.fs = ManagedFilesystem.objects.create(mgs=self.mgt, name="testfs") self.mdt, mdt_tms = ManagedMdt.create_for_volume( synthetic_volume_full(mds).id, filesystem=self.fs) self.ost, ost_tms = ManagedOst.create_for_volume( synthetic_volume_full(oss).id, filesystem=self.fs) ObjectCache.add(ManagedFilesystem, self.fs) for target in [self.mgt, self.ost, self.mdt]: ObjectCache.add(ManagedTarget, target.managedtarget_ptr) for tm in chain(mgt_tms, mdt_tms, ost_tms): ObjectCache.add(ManagedTargetMount, tm) self.fs = self.set_and_assert_state(self.fs, "available") self.mock_servers["mgs"]["nids"] = [Nid.Nid("192.168.0.99", "tcp", 0)] self.assertNidsCorrect(mgs) JobSchedulerClient.command_run_jobs([{ "class_name": "UpdateNidsJob", "args": { "hosts": [api.get_resource_uri(mgs)] } }], "Test update nids") self.drain_progress() # The -3 looks past the start/stop that happens after writeconf self.assertEqual(MockAgentRpc.host_calls[mgs][-3][0], "writeconf_target") self.assertEqual(MockAgentRpc.host_calls[mds][-3][0], "writeconf_target") self.assertEqual(MockAgentRpc.host_calls[oss][-3][0], "writeconf_target") self.assertState(self.fs, "stopped")
def obj_create(self, bundle, **kwargs): request = bundle.request ostpool_id, command_id = JobSchedulerClient.create_ostpool(bundle.data) command = Command.objects.get(pk=command_id) raise custom_response(self, request, http.HttpAccepted, {"command": dehydrate_command(command)})
def _pool_delete(self, request, obj_list): commands = [] for obj in obj_list: command_id = JobSchedulerClient.delete_ostpool(obj.id) command = Command.objects.get(pk=command_id) commands.append(dehydrate_command(command)) raise custom_response(self, request, http.HttpAccepted, {"commands": commands})
def obj_update(self, bundle, request, **kwargs): bundle.obj = self.cached_obj_get( request=request, **self.remove_api_resource_names(kwargs)) if hasattr(bundle.obj, 'content_type'): obj = bundle.obj.downcast() else: obj = bundle.obj # FIXME HYD-1032: PUTing modified conf_params and modified state in the same request will # cause one of those two things to be ignored. if not 'conf_params' in bundle.data or isinstance(obj, ManagedMgs): super(ConfParamResource, self).obj_update(bundle, request, **kwargs) try: conf_params = bundle.data['conf_params'] except KeyError: # TODO: pass in whole objects every time so that I can legitimately # validate the presence of this field pass else: # Belt-and-braces: child classes should have validated first, but let's # make sure (bad conf params can be very harmful) errors = chroma_core.lib.conf_param.validate_conf_params( obj.__class__, conf_params) if errors: raise custom_response(self, request, http.HttpBadRequest, {'conf_params': errors}) # Store the conf params mgs_id = chroma_core.lib.conf_param.set_conf_params( obj, conf_params) # If we were returned an MGS, then something has changed, and we will # kick off a command to apply the changes to the filesystem if mgs_id: command_id = JobSchedulerClient.command_run_jobs( [{ 'class_name': 'ApplyConfParams', 'args': { 'mgs_id': mgs_id } }], "Updating configuration parameters") raise custom_response( self, request, http.HttpAccepted, { 'command': dehydrate_command(Command.objects.get(pk=command_id)), self.Meta.resource_name: self.alter_detail_data_to_serialize( request, self.full_dehydrate(bundle)).data }) else: return super(ConfParamResource, self).obj_update(bundle, request, **kwargs) return bundle
def _test_host_contact(self): command = JobSchedulerClient.test_host_contact('test-server') self.drain_progress() job = Job.objects.filter(command__pk=command.id)[0] step_result = StepResult.objects.filter(job__pk=job.id)[0] self.assertEqual(self.expected_result, json.loads(step_result.result))
def test_lnet_dependency(self): """Test that if I try to stop LNet on a host where a target is running, stopping the target calculated as a dependency of that""" self.mgt.managedtarget_ptr = self.set_and_assert_state(self.mgt.managedtarget_ptr, "mounted") self.lnet_configuration = self.assertState(self.host.lnet_configuration, "lnet_up") consequences = JobSchedulerClient.get_transition_consequences(self.host.lnet_configuration, "lnet_down") self.assertEqual(len(consequences["dependency_jobs"]), 1) self.assertEqual(consequences["dependency_jobs"][0]["class"], "StopTargetJob")
def assertNidsCorrect(self, host): JobSchedulerClient.command_run_jobs([{ "class_name": "UpdateDevicesJob", "args": { "hosts": [api.get_resource_uri(host)] } }], "Test update of nids") self.drain_progress() mock_nids = set([ str(Nid.nid_tuple_to_string(Nid.Nid(n[0], n[1], n[2]))) for n in self.mock_servers[host.address]["nids"] ]) recorded_nids = set([ str(n.nid_string) for n in Nid.objects.filter(lnet_configuration__host=host) ]) self.assertSetEqual(mock_nids, recorded_nids)
def obj_create(self, bundle, **kwargs): command_id = JobSchedulerClient.configure_stratagem(bundle.data) try: command = Command.objects.get(pk=command_id) except ObjectDoesNotExist: command = None raise custom_response(self, bundle.request, http.HttpAccepted, {"command": dehydrate_command(command)})
def obj_create(self, bundle, **kwargs): request = bundle.request host = self.fields["host"].hydrate(bundle).obj filesystem = self.fields["filesystem"].hydrate(bundle).obj mountpoint = bundle.data["mountpoint"] client_mount = JobSchedulerClient.create_client_mount(host, filesystem, mountpoint) args = dict(client_mount=self.prepare_mount(client_mount)) raise custom_response(self, request, http.HttpAccepted, args)
def handle_long_polling_dispatch(self, request_type, request, **kwargs): table_timestamps = None if (self.long_polling_tables is not None) and (request.method.lower() in ['get']): log.debug("Long Polling Request: %s" % request.GET) # Allow 2 methods so it can be test easily in a browser. Don't us 'last_modified' in request.GET below # because QueryDict's seem to not implement in as we would expect. if request.GET.get('last_modified') is not None: table_timestamps = request.GET['last_modified'] elif request.META.get('HTTP_IF_NONE_MATCH') is not None: table_timestamps = request.META['HTTP_IF_NONE_MATCH'] if table_timestamps is not None: # The caller sends 0 at the beginning as a initializer, after that they return what they received. if table_timestamps in ['0', 0]: table_timestamps = {'max_timestamp': 0} else: table_timestamps = json.loads(table_timestamps) table_timestamps = JobSchedulerClient.wait_table_change( table_timestamps, [table._meta.db_table for table in self.long_polling_tables], settings.LONG_POLL_TIMEOUT_SECONDS) if table_timestamps: # We want the super of the thing that called us, because it might have other overloads response = super(self.__class__, self).dispatch(request_type, request, **kwargs) if request.GET.get('last_modified') is not None: # Expensive but reliable method, this is only used when a user types from a browser # and only works for json, but that is all we support and the real method is the ETag # take out the spaces because it makes copy-paste debug easier. content_data = json.loads(response.content) content_data['meta']['last_modified'] = json.dumps( table_timestamps).replace(' ', '') response.content = json.dumps(content_data) response['ETag'] = json.dumps(table_timestamps) log.debug("Long Polling response: %s\n" % response) else: raise ImmediateHttpResponse( HttpNotModified("Timeout waiting for data change")) else: # We want the super of the thing that called us, because it might have other overloads response = super(self.__class__, self).dispatch(request_type, request, **kwargs) return response
def test_cancel_complete(self): """Test cancelling a Job which is in state 'complete': should be a no-op """ self.set_state_delayed([(self.lnet_configuration, "lnet_down")]) job = Job.objects.get(state="pending") # Run, check that it goes to successful state self.set_state_complete() job = freshen(job) self.assertEqual(job.state, "complete") self.assertEqual(job.cancelled, False) self.assertEqual(job.errored, False) # Try to cancel, check that it is not modified JobSchedulerClient.cancel_job(job.id) job = freshen(job) self.assertEqual(job.state, "complete") self.assertEqual(job.cancelled, False) self.assertEqual(job.errored, False) self.assertFalse(self.job_scheduler._lock_cache.get_by_job(job))