def _query_and_own_runnable_job_token(self, workflow, instance): """Attempt to own a runnable job token from a given workflow instance. Try to own a runnable job token in a given workflow instance. The ownership of the qualifying job token lasts for a limited time so it has to be periodically renewed. Args: workflow: The name of the workflow whose jobs should be considered. instance: The workflow instance whose jobs should be considered. """ assert not self._owned_job_token name = Name(workflow=workflow, instance=instance, job_state=Name.RUNNABLE_STATE) query = Query() query.namePrefix = name.get_job_state_prefix() query.maxTokens = 1 request = QueryAndOwnRequest() request.query = query request.expirationTime = time.time() + Worker._LEASE_TIME_SEC request.owner = self._name try: response = self._client.query_and_own(request) if response.tokens: assert len(response.tokens) == 1 self._owned_job_token = response.tokens[0] except TokenMasterException: LOG.exception('error sending request %s', request)
def test_query_and_own(self): some_token = self._trie['/some_dir/some_token_0'] some_token.owner = 'some_owner' some_token.expirationTime = 10 # in the past some_token = self._trie['/some_dir/some_token_1'] some_token.owner = 'some_owner' some_token.expirationTime = sys.maxint # in the future some_query = Query() some_query.namePrefix = '' some_query.maxTokens = 200 request = QueryAndOwnRequest() request.owner = 'some_other_owner' request.expirationTime = sys.maxint request.query = some_query transaction = QueryAndOwnTransaction() transaction.prepare(request) response = transaction.commit(self._trie, self._get_blessed_version(), self._store) # Should have owned all tokens but two: the blessed version and the one # token that is already owned. self.assertEqual(len(self._trie) - 2, len(response.tokens)) for token in response.tokens: self.assertEquals('some_other_owner', token.owner) self.assertEquals(sys.maxint, token.expirationTime)
def _own_schedule_token_list(self): """Attempt to own some schedule tokens. Only unowned tokens will be considered. Unowned schedules are ready to run. The ownership of the qualifying job token lasts for a limited time so it has to be periodically renewed if the schedule takes longer than that to run. """ assert not self._owned_schedule_token query = Query() query.namePrefix = Name.SCHEDULE_PREFIX query.maxTokens = self._SCHEDULE_GANG_SIZE request = QueryAndOwnRequest() request.query = query request.expirationTime = int(time.time()) + Scheduler._LEASE_TIME_SEC request.owner = self._name try: response = self._client.query_and_own(request) if response.tokens: assert len(response.tokens) <= self._SCHEDULE_GANG_SIZE self._owned_schedule_token_list = [ token for token in response.tokens if token ] LOG.info("got %d schedule token(s) from master.", len(self._owned_schedule_token_list)) except TokenMasterException: LOG.exception('')
def _own_schedule_token_list(self): """Attempt to own some schedule tokens. Only unowned tokens will be considered. Unowned schedules are ready to run. The ownership of the qualifying job token lasts for a limited time so it has to be periodically renewed if the schedule takes longer than that to run. """ assert not self._owned_schedule_token query = Query() query.namePrefix = Name.SCHEDULE_PREFIX query.maxTokens = self._SCHEDULE_GANG_SIZE request = QueryAndOwnRequest() request.query = query request.expirationTime = int(time.time()) + Scheduler._LEASE_TIME_SEC request.owner = self._name try: response = self._client.query_and_own(request) if response.tokens: assert len(response.tokens) <= self._SCHEDULE_GANG_SIZE self._owned_schedule_token_list = [token for token in response.tokens if token] LOG.info( "got %d schedule token(s) from master.", len(self._owned_schedule_token_list) ) except TokenMasterException: LOG.exception('')
def test_query(self): query = Query() query.namePrefix = '' query.maxTokens = 10 request = QueryRequest() request.queries = [query] handler = MasterHandler(EphemeralStore()) response = handler.query(request) self.assertEqual(1, len(response.tokens))
def _refresh_actions(self): """Reload actions from the master.""" request = QueryRequest(queries=[]) name = Name() top_query = Query() top_query.namePrefix = name.get_signal_prefix() request.queries.append(top_query) if self._workflow: workflow_query = Query() name.workflow = self._workflow workflow_query.namePrefix = name.get_signal_prefix() request.queries.append(workflow_query) if self._instance: instance_query = Query() name.instance = self._instance instance_query.namePrefix = name.get_signal_prefix() request.queries.append(instance_query) response = self._client.query(request) signal_tokens = [] for tokens in response.tokens: signal_tokens.extend(tokens) self._dedup_actions(signal_tokens)
def test_query_and_own(self): query = Query() query.namePrefix = '' query.maxTokens = 10 request = QueryAndOwnRequest() request.owner = 'some_owner' request.expirationTime = sys.maxint request.query = query handler = MasterHandler(EphemeralStore()) response = handler.query_and_own(request) self.assertEqual(0, len(response.tokens))
def test_recursive(self): Options = collections.namedtuple('args', 'recursive force command_args') options = Options(recursive=True, force=True, command_args=['/some_path']) command = Rm() command.prepare(options) client = mock.Mock() token = Token(version=10, name='/some_path/some_token', owner='some_owner', expirationTime=10, data='some_data') query_response = QueryResponse(tokens=[[token]]) client.query.return_value = query_response modify_response = ModifyResponse() client.modify.return_value = modify_response output = command.execute(client, None) query = Query(namePrefix='/some_path') query_request = QueryRequest(queries=[query]) client.query.assert_called_once_with(query_request) modify_request = ModifyRequest(deletes=[token]) client.modify.assert_called_once_with(modify_request) self.assertEqual('removed 1 token(s)\n', output)
def _make_runnable(self, workflow, instance): """Attempt to make jobs in a given workflow instance runnable. Go over all waiting jobs in a given workflow instance and try to make them runnable. Args: workflow: The name of the workflow whose jobs should be considered. instance: The workflow instance whose jobs should be considered. Returns: True if there were no errors during communication with the master, otherwise False. """ name = Name() name.workflow = workflow name.instance = instance name.job_state = Name.WAITING_STATE query = Query(namePrefix=name.get_job_state_prefix()) # TODO(pawel): to prevent multiple workers from trying to make the # same job runnable at the same time, this should be a # QueryAndOwnRequest. Note that the current implementation is correct, # just inefficient. request = QueryRequest(queries=[query]) try: response = self._client.query(request) except TokenMasterException: LOG.exception('error sending request %s', request) return False assert len(response.tokens) == 1 for token in response.tokens[0]: if not self._make_job_runnable(token): return False return True
def test_recursive(self): Options = collections.namedtuple('args', 'recursive command_args') options = Options(recursive=True, command_args=['/some_path']) command = Cat() command.prepare(options) client = mock.Mock() token = Token(version=10, name='/some_path/some_token', owner='some_owner', expirationTime=10, data='some_data') query_response = QueryResponse(tokens=[[token]]) client.query.return_value = query_response output = command.execute(client, None) query = Query(namePrefix='/some_path') query_request = QueryRequest(queries=[query]) client.query.assert_called_once_with(query_request) self.assertEqual('total 1\nToken(version=10, owner=some_owner, ' 'expirationTime=1970-01-01 00:00:10 UTC, ' 'priority=0.000000, name=/some_path/some_token, ' 'data=some_data)\n', output)
def _get_token(self, name): query = Query(namePrefix=name) request = QueryRequest(queries=[query]) response = self._client.query(request) self.assertEqual(1, len(response.tokens)) self.assertEqual(1, len(response.tokens[0])) return response.tokens[0][0]
def _is_done(self, workflow, instance): """Check if the workflow instance is done. A workflow is done if it does not have runnable jobs. Returns: True if we are certain that the workflow is not running. Otherwise False. If there were any errors during communication with the master, the return value is False. """ # Attempt to make the workflow runnable and verify that no WAITING job # tokens were changed in the meantime. name = Name(workflow=workflow, instance=instance, job_state=Name.WAITING_STATE) query = Query(namePrefix=name.get_job_state_prefix()) request = QueryRequest(queries=[query]) try: snapshot = Snapshot(self._client, request) except: LOG.exception('error sending request %s', request) return False if not self._make_runnable(workflow, instance): return False if not self._has_no_runnable_jobs(workflow, instance): return False try: return not snapshot.refresh() except: LOG.exception('error sending request %s', request) return False
def _make_job_runnable(self, job_token): """Attempt to make a job runnable. Query event tokens in job inputs. If a combination of triggering events exist, remove those events and make the job runnable. Otherwise, do nothing. Args: job_token: The job token to make runnable. Returns: True if there were no errors during communication with the master, otherwise False. """ job = pickle.loads(job_token.data) name = Name.from_job_token_name(job_token.name) request = QueryRequest(queries=[]) # TODO(pawel): handle jobs with no dependencies assert job.inputs for input_name in job.inputs: prefix = Name() prefix.workflow = name.workflow prefix.instance = name.instance prefix.job = name.job prefix.input = input_name query = Query() query.namePrefix = prefix.get_input_prefix() query.maxTokens = 1 request.queries.append(query) try: response = self._client.query(request) except TokenMasterException: # TODO(pawel): add a retry count and fail if a limit is reached. LOG.exception('error sending request %s', request) return False triggering_events = Worker._get_triggering_events(response.tokens) if triggering_events: return self._move_job_token_to_runnable(job_token, triggering_events) return True
def _read_tokens_from_client(self, client): """Read archived job tokens from the client. Args: client: The client to read tokens from. """ name = Name(workflow=self._workflow, instance=self._instance) query = Query(namePrefix=name.get_workflow_prefix()) request = QueryRequest(queries=[query]) response = client.query(request) assert len(response.tokens) == 1 tokens = response.tokens[0] self._filter_job_tokens(tokens) self._filter_event_tokens(tokens)
def test_empty(self): Options = collections.namedtuple('args', 'recursive command_args') options = Options(recursive=False, command_args=['/some_path']) command = Cat() command.prepare(options) client = mock.Mock() response = QueryResponse() client.query.return_value = response output = command.execute(client, None) query = Query(namePrefix='/some_path') request = QueryRequest(queries=[query]) client.query.assert_called_once_with(request) self.assertEqual('total 0\n', output)
def _get_signal_token(self, action): """Retrieve signal for a specific action from the master. Args: action: The action to get signal for. Returns: The signal token if found, otherwise None. """ request = QueryRequest(queries=[]) name = Name(workflow=self._workflow, instance=self._instance, signal=Signal.action_to_string(action)) query = Query() query.namePrefix = name.get_signal_token_name() request.queries.append(query) response = self._client.query(request) assert len(response.tokens) == 1 tokens = response.tokens[0] if not tokens: return None assert len(tokens) == 1 return tokens[0]
def test_query(self): some_query = Query() some_query.namePrefix = '/some_dir' some_query.maxTokens = 10 some_other_query = Query() some_other_query.namePrefix = '/some_dir/some_token_0' some_other_query.maxTokens = 100 request = QueryRequest() request.queries = [some_query, some_other_query] transaction = QueryTransaction() transaction.prepare(request) response = transaction.commit(self._trie, self._get_blessed_version(), self._store) self.assertEqual(2, len(response.tokens)) self.assertEqual(10, len(response.tokens[0])) for token in response.tokens[0]: self.assertTrue(token.name.startswith('/some_dir')) self.assertEqual(9, token.priority) self.assertEqual(11, len(response.tokens[1])) for token in response.tokens[1]: self.assertTrue(token.name.startswith('/some_dir/some_token_0'))
def _has_no_runnable_jobs(self, workflow, instance): """Check if the workflow instance does not contain runnable jobs. Returns: True if we are certain that the workflow has no runnable jobs. Otherwise False. If there were any errors during communication with the master, the return value is False. """ name = Name(workflow=workflow, instance=instance, job_state=Name.RUNNABLE_STATE) query = Query(namePrefix=name.get_job_state_prefix()) request = QueryRequest(queries=[query]) try: response = self._client.query(request) except TokenMasterException: LOG.exception('error sending request %s', request) return False assert len(response.tokens) == 1 if response.tokens[0]: return False return True
def _get_tokens(prefix, recursive, client): """Get tokens for a given name prefix. Args: prefix: The token name prefix to match. recursive: If False, only token with name fully matching the prefix will be retrieved. Otherwise, all tokens with names starting with the prefix will be retrieved. client: The client to use when communicating with the master. Returns: List of tokens matching a given prefix. """ result = [] query = Query(namePrefix=prefix) request = QueryRequest(queries=[query]) response = client.query(request) if response.tokens: assert len(response.tokens) == 1 for token in response.tokens[0]: if recursive or token.name == prefix: result.append(token) if not recursive: return result return result