def test_restart_finishes_job(self): test = "restart_finishes" with self._setup_app_provider(test) as app_provider: job_id = '12345' with app_provider.new_app() as app: manager = app.only_manager job_info = { 'job_id': job_id, 'command_line': 'sleep 1000', 'setup': True, } submit_job(manager, job_info) external_id = None for i in range(10): time.sleep(.05) # TODO: unfortunate breaking of abstractions here. external_id = manager._proxied_manager._external_id(job_id) if external_id: break if external_id is None: assert False, "Test failed, couldn't get exteranl id for job id." drmaa_session = DrmaaSessionFactory().get() drmaa_session.kill(external_id) drmaa_session.close() consumer = self._status_update_consumer(test) consumer.start() with app_provider.new_app() as app: consumer.wait_for_messages() consumer.join() assert len(consumer.messages) == 1, len(consumer.messages) assert consumer.messages[0]["status"] == "complete"
def test_staging_failure_fires_failed_status(self): test = "stating_failure_fires_failed" with self._setup_app_provider(test, manager_type="queued_python") as app_provider: job_id = '12345' consumer = self._status_update_consumer(test) consumer.start() with app_provider.new_app() as app: manager = app.only_manager job_info = { 'job_id': job_id, 'command_line': 'sleep 1000', 'setup': True, # Invalid staging description... 'remote_staging': {"setup": [{"moo": "cow"}]} } # TODO: redo this with submit_job coming through MQ for test consistency. submit_job(manager, job_info) import time time.sleep(2) consumer.wait_for_messages() consumer.join() assert len(consumer.messages) == 1, len(consumer.messages) assert consumer.messages[0]["status"] == "failed"
def test_setup_failure_fires_failed_status(self): test = "stating_failure_fires_failed" with self._setup_app_provider(test, manager_type="queued_python") as app_provider: job_id = '12345' consumer = self._status_update_consumer(test) consumer.start() with app_provider.new_app() as app: manager = app.only_manager job_info = { 'job_id': job_id, 'command_line': 'sleep 1000', 'setup': True, } with open(os.path.join(app_provider.staging_directory, job_id), "w") as f: f.write("File where staging directory should be, setup should fail now.") # TODO: redo this with submit_job coming through MQ for test consistency, # would eliminate the need for the exception catch as well. try: submit_job(manager, job_info) except Exception: pass consumer.wait_for_messages() consumer.join() assert len(consumer.messages) == 1, len(consumer.messages) assert consumer.messages[0]["status"] == "failed"
def __process_setup_message(manager, body, message): try: job_id = __client_job_id_from_body(body) assert job_id, 'Could not parse job id from body: %s' % body log.debug("Received message in setup queue for Pulsar job id: %s", job_id) manager_endpoint_util.submit_job(manager, body) except Exception: job_id = job_id or 'unknown' log.exception("Failed to setup job %s obtained via message queue." % job_id) message.ack()
def run_server_for_job(args): config_builder = PulsarManagerConfigBuilder(args) manager, app = manager_from_args(config_builder) try: job_config = _load_job_config(args) submit_job(manager, job_config) wait_for_job(manager, job_config) except BaseException: log.exception("Failure submitting or waiting on job.") finally: app.shutdown()
def __process_setup_message(manager, body, message): if message.acknowledged: log.info("Message is already acknowledged (by an upstream callback?), Pulsar will not handle this message") return try: job_id = __client_job_id_from_body(body) assert job_id, 'Could not parse job id from body: %s' % body log.debug("Received message in setup queue for Pulsar job id: %s", job_id) manager_endpoint_util.submit_job(manager, body) except Exception: job_id = job_id or 'unknown' log.exception("Failed to setup job %s obtained via message queue." % job_id) message.ack()
def submit(manager, job_id, command_line, params='{}', dependencies_description='null', setup_params='{}', remote_staging='{}', env='[]'): submit_params = loads(params) setup_params = loads(setup_params) dependencies_description = loads(dependencies_description) env = loads(env) remote_staging = loads(remote_staging) submit_config = dict( job_id=job_id, command_line=command_line, setup_params=setup_params, submit_params=submit_params, dependencies_description=dependencies_description, env=env, remote_staging=remote_staging, ) submit_job(manager, submit_config)
def run_task(): try: log.info("Running task %s" % task.task_id.value) task_data = from_base64_json(task.data) manager_options = task_data["manager"] config_builder = PulsarManagerConfigBuilder(**manager_options) manager, pulsar_app = manager_from_args(config_builder) job_config = task_data["job"] submit_job(manager, job_config) self.__task_update(driver, task, mesos_pb2.TASK_RUNNING) wait_for_job(manager, job_config) self.__task_update(driver, task, mesos_pb2.TASK_FINISHED) pulsar_app.shutdown() except Exception: log.exception("Failed to run, update, or monitor task %s" % task) raise
def main(args=None): arg_parser = ArgumentParser(description=DESCRIPTION) arg_parser.add_argument("--file", default=None) arg_parser.add_argument("--base64", default=None) PulsarManagerConfigBuilder.populate_options(arg_parser) args = arg_parser.parse_args(args) config_builder = PulsarManagerConfigBuilder(args) manager, app = manager_from_args(config_builder) try: job_config = __load_job_config(args) submit_job(manager, job_config) wait_for_job(manager, job_config) except BaseException: log.exception("Failure submitting or waiting on job.") finally: app.shutdown()
def submit(manager, job_id, command_line, params='{}', dependencies_description='null', setup_params='{}', remote_staging='{}', env='[]', submit_extras='{}'): submit_params = loads(params) setup_params = loads(setup_params) dependencies_description = loads(dependencies_description) env = loads(env) remote_staging = loads(remote_staging) submit_extras = loads(submit_extras) submit_config = dict( job_id=job_id, command_line=command_line, setup_params=setup_params, submit_params=submit_params, dependencies_description=dependencies_description, env=env, remote_staging=remote_staging, ) submit_config.update(submit_extras) submit_job(manager, submit_config)