def test_calculate_job_input_hash(): """Test calculate_job_input_hash.""" job_spec_1 = {"workflow_workspace": "test"} workflow_json = {} job_spec_2 = {} assert calculate_job_input_hash(job_spec_1, workflow_json) == calculate_job_input_hash( job_spec_2, workflow_json)
def _update_job_cache(msg): """Update caching information for finished job.""" cached_job = Session.query(JobCache).filter_by( job_id=msg['caching_info'].get('job_id')).first() input_files = [] if cached_job: file_access_times = calculate_file_access_time( msg['caching_info'].get('workflow_workspace')) for filename in cached_job.access_times: if filename in file_access_times: input_files.append(filename) else: return cmd = msg['caching_info']['job_spec']['cmd'] # removes cd to workspace, to be refactored clean_cmd = ';'.join(cmd.split(';')[1:]) msg['caching_info']['job_spec']['cmd'] = clean_cmd if 'workflow_workspace' in msg['caching_info']['job_spec']: del msg['caching_info']['job_spec']['workflow_workspace'] input_hash = calculate_job_input_hash(msg['caching_info']['job_spec'], msg['caching_info']['workflow_json']) workspace_hash = calculate_hash_of_dir( msg['caching_info'].get('workflow_workspace'), input_files) if workspace_hash == -1: return cached_job.parameters = input_hash cached_job.result_path = msg['caching_info'].get('result_path') cached_job.workspace_hash = workspace_hash Session.add(cached_job)
def _update_job_cache(msg): """Update caching information for finished job.""" cached_job = (Session.query(JobCache).filter_by( job_id=msg["caching_info"].get("job_id")).first()) input_files = [] if cached_job: file_access_times = calculate_file_access_time( msg["caching_info"].get("workflow_workspace")) for filename in cached_job.access_times: if filename in file_access_times: input_files.append(filename) else: return cmd = msg["caching_info"]["job_spec"]["cmd"] # removes cd to workspace, to be refactored clean_cmd = ";".join(cmd.split(";")[1:]) msg["caching_info"]["job_spec"]["cmd"] = clean_cmd if "workflow_workspace" in msg["caching_info"]["job_spec"]: del msg["caching_info"]["job_spec"]["workflow_workspace"] input_hash = calculate_job_input_hash(msg["caching_info"]["job_spec"], msg["caching_info"]["workflow_json"]) workspace_hash = calculate_hash_of_dir( msg["caching_info"].get("workflow_workspace"), input_files) if workspace_hash == -1: return cached_job.parameters = input_hash cached_job.result_path = msg["caching_info"].get("result_path") cached_job.workspace_hash = workspace_hash Session.add(cached_job)
def job_is_cached(job_spec, workflow_json, workflow_workspace): """Check if job result exists in the cache.""" input_hash = calculate_job_input_hash(job_spec, workflow_json) workspace_hash = calculate_hash_of_dir(workflow_workspace) if workspace_hash == -1: return None cached_job = Session.query(JobCache).filter_by( parameters=input_hash, workspace_hash=workspace_hash).first() if cached_job: return {'result_path': cached_job.result_path, 'job_id': cached_job.job_id} else: return None