Ejemplo n.º 1
0
def test_calculate_hash_of_dir(sample_workflow_workspace):
    """Test calculate_hash_of_dir."""
    non_existing_dir_hash = calculate_hash_of_dir('a/b/c')
    assert non_existing_dir_hash == -1
    sample_workflow_workspace_path = next(sample_workflow_workspace('sample'))
    dir_hash = calculate_hash_of_dir(sample_workflow_workspace_path)
    assert dir_hash == '8d287a3e2240b1762862d485a424363c'
    include_only_path = os.path.join(sample_workflow_workspace_path, 'code',
                                     'worldpopulation.ipynb')
    hash_of_single_file = calculate_hash_of_dir(sample_workflow_workspace_path,
                                                [include_only_path])
    assert hash_of_single_file == '18ce945e21ab4db472525abe1e0f8080'
    empty_dir_hash = calculate_hash_of_dir(sample_workflow_workspace_path, [])
    md5_hash = md5()
    assert empty_dir_hash == md5_hash.hexdigest()
Ejemplo n.º 2
0
def test_calculate_hash_of_dir(sample_workflow_workspace):
    """Test calculate_hash_of_dir."""
    non_existing_dir_hash = calculate_hash_of_dir("a/b/c")
    assert non_existing_dir_hash == -1
    sample_workflow_workspace_path = next(sample_workflow_workspace("sample"))
    dir_hash = calculate_hash_of_dir(sample_workflow_workspace_path)
    assert dir_hash == "8d287a3e2240b1762862d485a424363c"
    include_only_path = os.path.join(sample_workflow_workspace_path, "code",
                                     "worldpopulation.ipynb")
    hash_of_single_file = calculate_hash_of_dir(sample_workflow_workspace_path,
                                                [include_only_path])
    assert hash_of_single_file == "18ce945e21ab4db472525abe1e0f8080"
    empty_dir_hash = calculate_hash_of_dir(sample_workflow_workspace_path, [])
    md5_hash = md5()
    assert empty_dir_hash == md5_hash.hexdigest()
Ejemplo n.º 3
0
def _update_job_cache(msg):
    """Update caching information for finished job."""
    cached_job = Session.query(JobCache).filter_by(
        job_id=msg['caching_info'].get('job_id')).first()

    input_files = []
    if cached_job:
        file_access_times = calculate_file_access_time(
            msg['caching_info'].get('workflow_workspace'))
        for filename in cached_job.access_times:
            if filename in file_access_times:
                input_files.append(filename)
    else:
        return
    cmd = msg['caching_info']['job_spec']['cmd']
    # removes cd to workspace, to be refactored
    clean_cmd = ';'.join(cmd.split(';')[1:])
    msg['caching_info']['job_spec']['cmd'] = clean_cmd

    if 'workflow_workspace' in msg['caching_info']['job_spec']:
        del msg['caching_info']['job_spec']['workflow_workspace']
    input_hash = calculate_job_input_hash(msg['caching_info']['job_spec'],
                                          msg['caching_info']['workflow_json'])
    workspace_hash = calculate_hash_of_dir(
        msg['caching_info'].get('workflow_workspace'), input_files)
    if workspace_hash == -1:
        return

    cached_job.parameters = input_hash
    cached_job.result_path = msg['caching_info'].get('result_path')
    cached_job.workspace_hash = workspace_hash
    Session.add(cached_job)
Ejemplo n.º 4
0
def _update_job_cache(msg):
    """Update caching information for finished job."""
    cached_job = (Session.query(JobCache).filter_by(
        job_id=msg["caching_info"].get("job_id")).first())

    input_files = []
    if cached_job:
        file_access_times = calculate_file_access_time(
            msg["caching_info"].get("workflow_workspace"))
        for filename in cached_job.access_times:
            if filename in file_access_times:
                input_files.append(filename)
    else:
        return
    cmd = msg["caching_info"]["job_spec"]["cmd"]
    # removes cd to workspace, to be refactored
    clean_cmd = ";".join(cmd.split(";")[1:])
    msg["caching_info"]["job_spec"]["cmd"] = clean_cmd

    if "workflow_workspace" in msg["caching_info"]["job_spec"]:
        del msg["caching_info"]["job_spec"]["workflow_workspace"]
    input_hash = calculate_job_input_hash(msg["caching_info"]["job_spec"],
                                          msg["caching_info"]["workflow_json"])
    workspace_hash = calculate_hash_of_dir(
        msg["caching_info"].get("workflow_workspace"), input_files)
    if workspace_hash == -1:
        return

    cached_job.parameters = input_hash
    cached_job.result_path = msg["caching_info"].get("result_path")
    cached_job.workspace_hash = workspace_hash
    Session.add(cached_job)
Ejemplo n.º 5
0
def test_calculate_hash_of_dir(sample_workflow_workspace):  # noqa: F811
    """Test calculate_hash_of_dir."""
    non_existing_dir_hash = calculate_hash_of_dir("a/b/c")
    assert non_existing_dir_hash == -1

    test_workspace_path = pkg_resources.resource_filename(
        "pytest_reana", "test_workspace")
    sample_workflow_workspace_path = next(sample_workflow_workspace("sample"))
    shutil.rmtree(sample_workflow_workspace_path)
    shutil.copytree(test_workspace_path, sample_workflow_workspace_path)
    dir_hash = calculate_hash_of_dir(sample_workflow_workspace_path)
    assert dir_hash == "cb2669b4d7651aa717b6952fce85575f"
    include_only_path = os.path.join(sample_workflow_workspace_path, "code",
                                     "worldpopulation.ipynb")
    hash_of_single_file = calculate_hash_of_dir(sample_workflow_workspace_path,
                                                [include_only_path])
    assert hash_of_single_file == "18ce945e21ab4db472525abe1e0f8080"
    empty_dir_hash = calculate_hash_of_dir(sample_workflow_workspace_path, [])
    md5_hash = md5()
    assert empty_dir_hash == md5_hash.hexdigest()
    shutil.rmtree(sample_workflow_workspace_path)
Ejemplo n.º 6
0
def job_is_cached(job_spec, workflow_json, workflow_workspace):
    """Check if job result exists in the cache."""
    input_hash = calculate_job_input_hash(job_spec, workflow_json)
    workspace_hash = calculate_hash_of_dir(workflow_workspace)
    if workspace_hash == -1:
        return None

    cached_job = Session.query(JobCache).filter_by(
        parameters=input_hash,
        workspace_hash=workspace_hash).first()
    if cached_job:
        return {'result_path': cached_job.result_path,
                'job_id': cached_job.job_id}
    else:
        return None