def simulate_finished_job_of_a_type(self, job_type): """ Creates a database a job that is finished. It will be of the type given as parameter. There will be some randomness in the parameters to generate some random ids :param type: type that you want the job to be """ params = { 'structure': ''.join(random.choice(string.ascii_lowercase) for i in range(10)), } docker_image_url = 'some_url' job = delayed_job_models.get_or_create(job_type, params, docker_image_url) # simulate it finished job_run_dir = os.path.join(self.ABS_RUN_DIR_PATH, job.id) job.run_dir_path = job_run_dir os.makedirs(job_run_dir, exist_ok=True) output_dir = os.path.join(self.ABS_OUT_DIR_PATH, job.id) job.output_dir_path = output_dir os.makedirs(output_dir, exist_ok=True) # Add some outputs utils.simulate_outputs_of_job(job, output_dir) job.status = delayed_job_models.JobStatuses.FINISHED delayed_job_models.save_job(job) return job
def create_test_jobs_1(self): """ This will create: - 2 Jobs in error state, each running in a different lsf cluster - 2 Jobs in finished state, each running in a different lsf cluster """ run_environment = RUN_CONFIG.get('run_env') lsf_config = RUN_CONFIG.get('lsf_submission') lsf_host = lsf_config['lsf_host'] with self.flask_app.app_context(): i = 0 for status in [delayed_job_models.JobStatuses.FINISHED, delayed_job_models.JobStatuses.ERROR]: for assigned_host in [lsf_host, 'another_host']: job = delayed_job_models.DelayedJob( id=f'Job-{assigned_host}-{status}', type='TEST', lsf_job_id=i, status=status, lsf_host=assigned_host, run_environment=run_environment, created_at=datetime.utcnow(), started_at=datetime.utcnow() + timedelta(seconds=1), finished_at=datetime.utcnow() + timedelta(seconds=2) ) job.output_dir_path = job_submission_service.get_job_output_dir_path(job) os.makedirs(job.output_dir_path, exist_ok=True) delayed_job_models.save_job(job) i += 1
def prepare_job_submission_script(job): """ Prepares the script that will submit the job to LSF :param job: job object for which prepare the job submission script """ job_submission_script_template_path = os.path.join(Path().absolute(), 'templates', SUBMISSION_FILE_NAME) with open(job_submission_script_template_path, 'r') as template_file: submit_job_template = template_file.read() lsf_config = RUN_CONFIG.get('lsf_submission') lsf_user = lsf_config['lsf_user'] lsf_host = lsf_config['lsf_host'] run_params_path = get_job_run_params_file_path(job) job_config = delayed_job_models.get_job_config(job.type) if (job_config.docker_registry_username is not None) and (job_config.docker_registry_password is not None): set_username = f"export SINGULARITY_DOCKER_USERNAME='******'" set_password = f"export SINGULARITY_DOCKER_PASSWORD='******'" set_docker_registry_credentials = f'{set_username}\n{set_password}\n' else: set_docker_registry_credentials = '' resources_params = get_job_resources_params(job) job_submission_script = submit_job_template.format( JOB_ID=job.id, LSF_USER=lsf_user, LSF_HOST=lsf_host, RUN_PARAMS_FILE=run_params_path, DOCKER_IMAGE_URL=job.docker_image_url, SET_DOCKER_REGISTRY_CREDENTIALS=set_docker_registry_credentials, RUN_DIR=get_job_run_dir(job), RESOURCES_PARAMS=resources_params) job.requirements_parameters_string = resources_params job.lsf_host = lsf_host delayed_job_models.save_job(job) submit_file_path = get_job_submission_script_file_path(job) with open(submit_file_path, 'w') as submission_script_file: submission_script_file.write(job_submission_script) # make sure file is executable file_stats = os.stat(submit_file_path) os.chmod(submit_file_path, file_stats.st_mode | stat.S_IEXEC)
def submit_job_to_lsf(job): """ Runs a script that submits the job to LSF :param job: DelayedJob object """ submit_file_path = get_job_submission_script_file_path(job) submission_output_path = Path(submit_file_path).parent.joinpath( 'submission.out') submission_error_path = Path(submit_file_path).parent.joinpath( 'submission.err') lsf_config = RUN_CONFIG.get('lsf_submission') id_rsa_path = lsf_config['id_rsa_file'] run_command = f'{submit_file_path} {id_rsa_path}' app_logging.debug( f'Going to run job submission script, command: {run_command}') must_run_jobs = RUN_CONFIG.get('run_jobs', True) if not must_run_jobs: app_logging.debug(f'Not submitting jobs because run_jobs is False') return submission_process = subprocess.run(run_command.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) app_logging.debug(f'Submission STD Output: \n {submission_process.stdout}') app_logging.debug(f'Submission STD Error: \n {submission_process.stderr}') with open(submission_output_path, 'wb') as submission_out_file: submission_out_file.write(submission_process.stdout) with open(submission_error_path, 'wb') as submission_err_file: submission_err_file.write(submission_process.stderr) return_code = submission_process.returncode app_logging.debug(f'submission return code was: {return_code}') if return_code != 0: raise JobSubmissionError( 'There was an error when running the job submission script! Please check the logs' ) lsf_job_id = get_lsf_job_id(str(submission_process.stdout)) job.lsf_job_id = lsf_job_id job.status = delayed_job_models.JobStatuses.QUEUED delayed_job_models.save_job(job) app_logging.debug(f'LSF Job ID is: {lsf_job_id}')
def prepare_output_dir(job): """ Makes sure to create the output dir for the job :param job: job object for which create the job output """ job_output_dir = get_job_output_dir_path(job) if os.path.exists(job_output_dir): utils.delete_directory_robustly(job_output_dir) job.output_dir_path = job_output_dir delayed_job_models.save_job(job) os.makedirs(job_output_dir, exist_ok=True) app_logging.debug(f'Job output dir is {job_output_dir}')
def create_job_run_dir(job): """ CReates the directory where the job will run :param job: job object for which to create the job run directory """ job_run_dir = get_job_run_dir(job) job_input_files_dir = get_job_input_files_dir(job) if os.path.exists(job_run_dir): utils.delete_directory_robustly(job_run_dir) job.run_dir_path = job_run_dir delayed_job_models.save_job(job) os.makedirs(job_run_dir, exist_ok=True) os.makedirs(job_input_files_dir, exist_ok=True) app_logging.debug(f'Job run dir is {job_run_dir}')
def react_to_bjobs_json_output(json_output): """ Reads the dict obtained from the status script output, modifies the jobs accordingly :param json_output: dict with the output parsed from running the command """ print(f'Parsing json: {json.dumps(json_output)}') for record in json_output['RECORDS']: lsf_id = record['JOBID'] lsf_status = record['STAT'] new_status = map_lsf_status_to_job_status(lsf_status) job = delayed_job_models.get_job_by_lsf_id(lsf_id) old_status = job.status status_changed = old_status != new_status if not status_changed: continue job.status = new_status if new_status == delayed_job_models.JobStatuses.RUNNING: parse_job_started_at_time_if_not_set(job, record) elif new_status == delayed_job_models.JobStatuses.ERROR: # If the job ran too fast, the started at could have not been captured by my previous run. parse_job_started_at_time_if_not_set(job, record) parse_job_finished_at_time_if_not_set(job, record) if job.num_failures is None: job.num_failures = 0 job.num_failures += 1 save_job_statistics(job) elif new_status == delayed_job_models.JobStatuses.FINISHED: parse_job_started_at_time_if_not_set(job, record) parse_job_finished_at_time_if_not_set(job, record) set_job_expiration_time(job) save_job_outputs(job) save_job_statistics(job) delayed_job_models.save_job(job) print(f'Job {job.id} with lsf id {job.lsf_job_id} new state is {new_status}')
def simulate_finished_job(run_dir_path, expires_at=None): """ Creates a database a job that is finished. It will expire at the date passed as parameter. :param run_dir_path: path of the run dir of the job :param expires_at: Expiration date that you want for the job. None if it is not necessary for your test """ # create a job job_type = 'SIMILARITY' params = { 'search_type': 'SIMILARITY', 'structure': ''.join(random.choice(string.ascii_lowercase) for i in range(10)), 'threshold': '70' } docker_image_url = 'some_url' job = delayed_job_models.get_or_create(job_type, params, docker_image_url) # simulate it finished job_run_dir = os.path.join(run_dir_path, job.id) job.run_dir_path = job_run_dir os.makedirs(job_run_dir, exist_ok=True) output_dir = os.path.join(run_dir_path, job.id).join('outputs') job.output_dir_path = output_dir os.makedirs(output_dir, exist_ok=True) # Add some inputs simulate_inputs_to_job(job, job_run_dir) # Add some outputs simulate_outputs_of_job(job, output_dir) job.status = delayed_job_models.JobStatuses.FINISHED job.expires_at = expires_at delayed_job_models.save_job(job) return job
def create_and_submit_job(job_type, input_files_desc, input_files_hashes, docker_image_url, job_params): """ Creates a job and submits if to LSF :param job_type: type of job to submit :param input_files_desc: dict with the paths of the input files :param input_files_hashes: dict with the hashes of the input files :param docker_image_url: image of the container to use :param job_params: parameters of the job :return: the job object created """ job = delayed_job_models.get_or_create(job_type, job_params, docker_image_url, input_files_hashes) job.progress = 0 job.started_at = None job.finished_at = None delayed_job_models.save_job(job) app_logging.debug(f'Submitting Job: {job.id}') prepare_job_and_submit(job, input_files_desc) return job
def simulate_finished_job(self, expires_at): """ Creates a database a job that is finished. It will expire at the date passed as parameter. :param expires_at: Expiration date that you want for the job """ # create a job job_type = 'SIMILARITY' params = { 'search_type': 'SIMILARITY', 'structure': ''.join(random.choice(string.ascii_lowercase) for i in range(10)), 'threshold': '70' } docker_image_url = 'some_url' job = delayed_job_models.get_or_create(job_type, params, docker_image_url) # simulate it finished job_run_dir = os.path.join(self.ABS_RUN_DIR_PATH, job.id) job.run_dir_path = job_run_dir os.makedirs(job_run_dir, exist_ok=True) output_dir = os.path.join(self.ABS_OUT_DIR_PATH, job.id) job.output_dir_path = output_dir os.makedirs(output_dir, exist_ok=True) # Add some outputs utils.simulate_outputs_of_job(job, output_dir) job.status = delayed_job_models.JobStatuses.FINISHED job.expires_at = expires_at delayed_job_models.save_job(job) return job