Exemple #1
0
def test_import_library_require_permissions():
    """Verify library creation (import) is off by default."""
    app = _mock_app()
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")

    library = model.Library(name="my library 1",
                            description="my library description",
                            synopsis="my synopsis")
    root_folder = model.LibraryFolder(name="my library 1",
                                      description='folder description')
    library.root_folder = root_folder
    sa_session.add_all((library, root_folder))
    sa_session.flush()

    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(temp_directory,
                                         app=app) as export_store:
        export_store.export_library(library)

    error_caught = False
    try:
        import_model_store = store.get_import_model_store_for_directory(
            temp_directory, app=app, user=u)
        import_model_store.perform_import()
    except AssertionError:
        # TODO: throw and catch a better exception...
        error_caught = True

    assert error_caught
Exemple #2
0
def test_edit_metadata_files():
    app = _mock_app(store_by="uuid")
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    d1 = _create_datasets(sa_session, h, 1, extension="bam")[0]
    sa_session.add_all((h, d1))
    sa_session.flush()
    index = NamedTemporaryFile("w")
    index.write("cool bam index")
    metadata_dict = {
        "bam_index":
        MetadataTempFile.from_JSON({
            "kwds": {},
            "filename": index.name
        })
    }
    d1.metadata.from_JSON_dict(json_dict=metadata_dict)
    assert d1.metadata.bam_index
    assert isinstance(d1.metadata.bam_index, model.MetadataFile)

    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(
            temp_directory, app=app, for_edit=True,
            strip_metadata_files=False) as export_store:
        export_store.add_dataset(d1)

    import_history = model.History(name="Test History for Import", user=u)
    sa_session.add(import_history)
    sa_session.flush()
    _perform_import_from_directory(temp_directory, app, u, import_history,
                                   store.ImportOptions(allow_edit=True))
Exemple #3
0
def test_model_create_context_persist_error_hda():
    work_directory = mkdtemp()
    with open(os.path.join(work_directory, "file1.txt"), "w") as f:
        f.write("hello world\nhello world line 2")
    target = {
        "destination": {
            "type": "hdas",
        },
        "elements": [{
            "error_message": "Failed to download some URL I guess",
        }],
    }
    app = _mock_app(store_by="uuid")
    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(
            temp_directory, serialize_dataset_objects=True) as export_store:
        persist_target_to_export_store(target, export_store, app.object_store,
                                       work_directory)

    import_history = _import_directory_to_history(app, temp_directory,
                                                  work_directory)

    assert len(import_history.datasets) == 1
    imported_hda = import_history.datasets[0]
    assert imported_hda.state == "error"
    assert imported_hda.info == "Failed to download some URL I guess"
Exemple #4
0
    def setup_job(self,
                  history,
                  store_directory,
                  include_hidden=False,
                  include_deleted=False,
                  compressed=True):
        """Perform setup for job to export a history into an archive.

        Method generates attribute files for export, sets the corresponding attributes
        in the jeha object, and returns a command line for running the job. The command
        line includes the command, inputs, and options; it does not include the output
        file because it must be set at runtime.
        """
        app = self.app

        # symlink files on export, on worker files will tarred up in a dereferenced manner.
        with store.DirectoryModelExportStore(
                store_directory, app=app,
                export_files="symlink") as export_store:
            export_store.export_history(history,
                                        include_hidden=include_hidden,
                                        include_deleted=include_deleted)

        #
        # Create and return command line for running tool.
        #
        options = f"--galaxy-version '{VERSION_MAJOR}'"
        if compressed:
            options += " -G"
        return f"{options} {store_directory}"
Exemple #5
0
    def setup_job(self, jeha, include_hidden=False, include_deleted=False):
        """ Perform setup for job to export a history into an archive. Method generates
            attribute files for export, sets the corresponding attributes in the jeha
            object, and returns a command line for running the job. The command line
            includes the command, inputs, and options; it does not include the output
            file because it must be set at runtime. """

        app = self.app

        #
        # Create attributes/metadata files for export.
        #
        jeha.dataset.create_extra_files_path()
        temp_output_dir = jeha.dataset.extra_files_path

        history = jeha.history
        history_attrs_filename = os.path.join(temp_output_dir,
                                              ATTRS_FILENAME_HISTORY)
        jeha.history_attrs_filename = history_attrs_filename

        # symlink files on export, on worker files will tarred up in a dereferenced manner.
        with store.DirectoryModelExportStore(
                temp_output_dir, app=app,
                export_files="symlink") as export_store:
            export_store.export_history(history,
                                        include_hidden=include_hidden,
                                        include_deleted=include_deleted)

        #
        # Create and return command line for running tool.
        #
        options = "--galaxy-version '%s'" % VERSION_MAJOR
        if jeha.compressed:
            options += " -G"
        return "{} {}".format(options, temp_output_dir)
Exemple #6
0
def test_import_export_composite_datasets():
    app = _mock_app()
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    d1 = _create_datasets(sa_session, h, 1, extension="html")[0]
    d1.dataset.create_extra_files_path()
    sa_session.add_all((h, d1))
    sa_session.flush()

    primary = NamedTemporaryFile("w")
    primary.write("cool primary file")
    primary.flush()
    app.object_store.update_from_file(d1.dataset,
                                      file_name=primary.name,
                                      create=True,
                                      preserve_symlinks=True)

    composite1 = NamedTemporaryFile("w")
    composite1.write("cool composite file")
    composite1.flush()

    app.object_store.update_from_file(d1.dataset,
                                      extra_dir=os.path.normpath(
                                          os.path.join(d1.extra_files_path,
                                                       "parent_dir")),
                                      alt_name="child_file",
                                      file_name=composite1.name,
                                      create=True,
                                      preserve_symlinks=True)

    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(temp_directory,
                                         app=app,
                                         export_files="copy") as export_store:
        export_store.add_dataset(d1)

    import_history = model.History(name="Test History for Import", user=u)
    sa_session.add(import_history)
    sa_session.flush()
    _perform_import_from_directory(temp_directory, app, u, import_history)
    assert len(import_history.datasets) == 1
    import_dataset = import_history.datasets[0]
    root_extra_files_path = import_dataset.extra_files_path
    assert len(os.listdir(root_extra_files_path)) == 1
    assert os.listdir(root_extra_files_path)[0] == "parent_dir"
    composite_sub_dir = os.path.join(root_extra_files_path, "parent_dir")
    child_files = os.listdir(composite_sub_dir)
    assert len(child_files) == 1
    with open(os.path.join(composite_sub_dir, child_files[0]), "r") as f:
        contents = f.read()
        assert contents == "cool composite file"
Exemple #7
0
def test_persist_target_hdca():
    work_directory = mkdtemp()
    with open(os.path.join(work_directory, "file1.txt"), "w") as f:
        f.write("hello world\nhello world line 2")
    with open(os.path.join(work_directory, "file2.txt"), "w") as f:
        f.write("file 2 contents")

    target = {
        "destination": {
            "type": "hdca",
        },
        "name":
        "My HDCA",
        "collection_type":
        "list",
        "elements": [{
            "filename": "file1.txt",
            "ext": "txt",
            "dbkey": "hg19",
            "info": "dataset info",
            "name": "my file",
        }, {
            "filename": "file2.txt",
            "ext": "txt",
            "dbkey": "hg18",
            "info": "dataset info 2",
            "name": "my file 2",
        }]
    }

    app = _mock_app(store_by="uuid")
    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(
            temp_directory, serialize_dataset_objects=True) as export_store:
        persist_target_to_export_store(target, export_store, app.object_store,
                                       work_directory)

    import_history = _import_directory_to_history(app, temp_directory,
                                                  work_directory)
    assert len(import_history.dataset_collections) == 1
    assert len(import_history.datasets) == 2

    import_hdca = import_history.dataset_collections[0]
    datasets = import_hdca.dataset_instances
    assert len(datasets) == 2
    dataset0 = datasets[0]
    dataset1 = datasets[1]

    with open(dataset0.file_name, "r") as f:
        assert f.read().startswith("hello world\n")
    with open(dataset1.file_name, "r") as f:
        assert f.read().startswith("file 2 contents")
Exemple #8
0
def _setup_simple_export(export_kwds):
    app = _mock_app()

    u, h, d1, d2, j = _setup_simple_cat_job(app)

    sa_session = app.model.context

    import_history = model.History(name="Test History for Import", user=u)
    sa_session.add(import_history)

    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(temp_directory,
                                         app=app,
                                         **export_kwds) as export_store:
        export_store.add_dataset(d1)
        export_store.add_dataset(d2)

    return app, h, temp_directory, import_history
Exemple #9
0
def _import_library_target(target, work_directory):
    app = _mock_app(store_by="uuid")
    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(
            temp_directory, app=app,
            serialize_dataset_objects=True) as export_store:
        persist_target_to_export_store(target, export_store, app.object_store,
                                       work_directory)

    u = model.User(email="*****@*****.**", password="******")

    import_options = store.ImportOptions(allow_dataset_object_edit=True,
                                         allow_library_creation=True)
    import_model_store = store.get_import_model_store_for_directory(
        temp_directory, app=app, user=u, import_options=import_options)
    import_model_store.perform_import()

    sa_session = app.model.context
    return sa_session
def test_model_create_context_persist_hdas():
    work_directory = mkdtemp()
    with open(os.path.join(work_directory, "file1.txt"), "w") as f:
        f.write("hello world\nhello world line 2")
    target = {
        "destination": {
            "type": "hdas",
        },
        "elements": [{
            "filename": "file1.txt",
            "ext": "txt",
            "dbkey": "hg19",
            "name": "my file",
            "md5": "e5d21b1ea57fc9a31f8ea0110531bf3d",
            "tags": ["name:value"]
        }],
    }
    app = _mock_app()
    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(
            temp_directory, serialize_dataset_objects=True) as export_store:
        persist_target_to_export_store(target, export_store, app.object_store,
                                       work_directory)

    import_history = _import_directory_to_history(app, temp_directory,
                                                  work_directory)

    assert len(import_history.datasets) == 1
    imported_hda = import_history.datasets[0]
    assert imported_hda.ext == "txt"
    assert imported_hda.name == "my file"
    assert imported_hda.metadata.data_lines == 2
    assert len(imported_hda.dataset.hashes) == 1
    assert imported_hda.dataset.hashes[
        0].hash_value == "e5d21b1ea57fc9a31f8ea0110531bf3d"
    tags = imported_hda.tags
    assert len(tags) == 1
    assert tags[0].value == "value"

    with open(imported_hda.file_name) as f:
        assert f.read().startswith("hello world\n")
Exemple #11
0
def test_import_export_library():
    """Test basics of library, library folder, and library dataset import/export."""
    app = _mock_app()
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")

    library = model.Library(name="my library 1",
                            description="my library description",
                            synopsis="my synopsis")
    root_folder = model.LibraryFolder(name="my library 1",
                                      description='folder description')
    library.root_folder = root_folder
    sa_session.add_all((library, root_folder))
    sa_session.flush()

    subfolder = model.LibraryFolder(name="sub folder 1",
                                    description="sub folder")
    root_folder.add_folder(subfolder)
    sa_session.add(subfolder)

    ld = model.LibraryDataset(folder=root_folder,
                              name="my name",
                              info="my library dataset")
    ldda = model.LibraryDatasetDatasetAssociation(create_dataset=True,
                                                  flush=False)
    ld.library_dataset_dataset_association = ldda
    root_folder.add_library_dataset(ld)

    sa_session.add(ld)
    sa_session.add(ldda)

    sa_session.flush()
    assert len(root_folder.datasets) == 1
    assert len(root_folder.folders) == 1

    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(temp_directory,
                                         app=app) as export_store:
        export_store.export_library(library)

    import_model_store = store.get_import_model_store_for_directory(
        temp_directory,
        app=app,
        user=u,
        import_options=store.ImportOptions(allow_library_creation=True))
    import_model_store.perform_import()

    all_libraries = sa_session.query(model.Library).all()
    assert len(all_libraries) == 2, len(all_libraries)
    all_lddas = sa_session.query(model.LibraryDatasetDatasetAssociation).all()
    assert len(all_lddas) == 2, len(all_lddas)

    new_library = [l for l in all_libraries if l.id != library.id][0]
    assert new_library.name == "my library 1"
    assert new_library.description == "my library description"
    assert new_library.synopsis == "my synopsis"

    new_root = new_library.root_folder
    assert new_root
    assert new_root.name == "my library 1"

    assert len(new_root.folders) == 1
    assert len(new_root.datasets) == 1
Exemple #12
0
def test_import_export_edit_collection():
    """Test modifying existing collections with imports."""
    app = _mock_app()
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    c1 = model.DatasetCollection(collection_type="list", populated=False)
    hc1 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=1, collection=c1, name="HistoryCollectionTest1")

    sa_session.add(hc1)
    sa_session.add(h)
    sa_session.flush()

    import_history = model.History(name="Test History for Import", user=u)
    sa_session.add(import_history)

    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(temp_directory,
                                         app=app,
                                         for_edit=True) as export_store:
        export_store.add_dataset_collection(hc1)

    # Fabric editing metadata for collection...
    collections_metadata_path = os.path.join(temp_directory,
                                             store.ATTRS_FILENAME_COLLECTIONS)
    datasets_metadata_path = os.path.join(temp_directory,
                                          store.ATTRS_FILENAME_DATASETS)
    with open(collections_metadata_path, "r") as f:
        hdcas_metadata = json.load(f)

    assert len(hdcas_metadata) == 1
    hdca_metadata = hdcas_metadata[0]
    assert hdca_metadata
    assert "id" in hdca_metadata
    assert "collection" in hdca_metadata
    collection_metadata = hdca_metadata["collection"]
    assert "populated_state" in collection_metadata
    assert collection_metadata[
        "populated_state"] == model.DatasetCollection.populated_states.NEW

    collection_metadata[
        "populated_state"] = model.DatasetCollection.populated_states.OK

    d1 = model.HistoryDatasetAssociation(extension="txt",
                                         create_dataset=True,
                                         flush=False)
    d1.hid = 1
    d2 = model.HistoryDatasetAssociation(extension="txt",
                                         create_dataset=True,
                                         flush=False)
    d2.hid = 2
    serialization_options = model.SerializationOptions(for_edit=True)
    dataset_list = [
        d1.serialize(app.security, serialization_options),
        d2.serialize(app.security, serialization_options)
    ]

    dc = model.DatasetCollection(
        id=collection_metadata["id"],
        collection_type="list",
        element_count=2,
    )
    dc.populated_state = model.DatasetCollection.populated_states.OK
    dce1 = model.DatasetCollectionElement(
        element=d1,
        element_index=0,
        element_identifier="first",
    )
    dce2 = model.DatasetCollectionElement(
        element=d2,
        element_index=1,
        element_identifier="second",
    )
    dc.elements = [dce1, dce2]
    with open(datasets_metadata_path, "w") as datasets_f:
        json.dump(dataset_list, datasets_f)

    hdca_metadata["collection"] = dc.serialize(app.security,
                                               serialization_options)
    with open(collections_metadata_path, "w") as collections_f:
        json.dump(hdcas_metadata, collections_f)

    _perform_import_from_directory(temp_directory, app, u, import_history,
                                   store.ImportOptions(allow_edit=True))

    sa_session.refresh(c1)
    assert c1.populated_state == model.DatasetCollection.populated_states.OK, c1.populated_state
    assert len(c1.elements) == 2
Exemple #13
0
def set_metadata_portable():
    import galaxy.model
    tool_job_working_directory = os.path.abspath(os.getcwd())
    metadata_tmp_files_dir = os.path.join(tool_job_working_directory,
                                          "metadata")
    galaxy.model.metadata.MetadataTempFile.tmp_dir = metadata_tmp_files_dir

    metadata_params_path = os.path.join("metadata", "params.json")
    try:
        with open(metadata_params_path, "r") as f:
            metadata_params = json.load(f)
    except IOError:
        raise Exception("Failed to find metadata/params.json from cwd [%s]" %
                        tool_job_working_directory)
    datatypes_config = metadata_params["datatypes_config"]
    job_metadata = metadata_params["job_metadata"]
    provided_metadata_style = metadata_params.get("provided_metadata_style")
    max_metadata_value_size = metadata_params.get(
        "max_metadata_value_size") or 0
    outputs = metadata_params["outputs"]

    datatypes_registry = validate_and_load_datatypes_config(datatypes_config)
    tool_provided_metadata = load_job_metadata(job_metadata,
                                               provided_metadata_style)

    def set_meta(new_dataset_instance, file_dict):
        set_meta_with_tool_provided(new_dataset_instance, file_dict,
                                    set_meta_kwds, datatypes_registry,
                                    max_metadata_value_size)

    object_store_conf_path = os.path.join("metadata", "object_store_conf.json")
    extended_metadata_collection = os.path.exists(object_store_conf_path)

    object_store = None
    job_context = None
    version_string = ""

    export_store = None
    if extended_metadata_collection:
        from galaxy.tool_util.parser.stdio import ToolStdioRegex, ToolStdioExitCode
        tool_dict = metadata_params["tool"]
        stdio_exit_code_dicts, stdio_regex_dicts = tool_dict[
            "stdio_exit_codes"], tool_dict["stdio_regexes"]
        stdio_exit_codes = list(map(ToolStdioExitCode, stdio_exit_code_dicts))
        stdio_regexes = list(map(ToolStdioRegex, stdio_regex_dicts))

        with open(object_store_conf_path, "r") as f:
            config_dict = json.load(f)
        from galaxy.objectstore import build_object_store_from_config
        assert config_dict is not None
        object_store = build_object_store_from_config(None,
                                                      config_dict=config_dict)
        galaxy.model.Dataset.object_store = object_store

        outputs_directory = os.path.join(tool_job_working_directory, "outputs")
        if not os.path.exists(outputs_directory):
            outputs_directory = tool_job_working_directory

        # TODO: constants...
        if os.path.exists(os.path.join(outputs_directory, "tool_stdout")):
            with open(os.path.join(outputs_directory, "tool_stdout"),
                      "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(outputs_directory, "tool_stderr"),
                      "rb") as f:
                tool_stderr = f.read()
        elif os.path.exists(os.path.join(outputs_directory, "stdout")):
            # Puslar style working directory.
            with open(os.path.join(outputs_directory, "stdout"), "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(outputs_directory, "stderr"), "rb") as f:
                tool_stderr = f.read()

        job_id_tag = metadata_params["job_id_tag"]

        # TODO: this clearly needs to be refactored, nothing in runners should be imported here..
        from galaxy.job_execution.output_collect import default_exit_code_file, read_exit_code_from
        exit_code_file = default_exit_code_file(".", job_id_tag)
        tool_exit_code = read_exit_code_from(exit_code_file, job_id_tag)

        from galaxy.tool_util.output_checker import check_output, DETECTED_JOB_STATE
        check_output_detected_state, tool_stdout, tool_stderr, job_messages = check_output(
            stdio_regexes, stdio_exit_codes, tool_stdout, tool_stderr,
            tool_exit_code, job_id_tag)
        if check_output_detected_state == DETECTED_JOB_STATE.OK and not tool_provided_metadata.has_failed_outputs(
        ):
            final_job_state = galaxy.model.Job.states.OK
        else:
            final_job_state = galaxy.model.Job.states.ERROR

        from pulsar.client.staging import COMMAND_VERSION_FILENAME
        version_string = ""
        if os.path.exists(COMMAND_VERSION_FILENAME):
            version_string = open(COMMAND_VERSION_FILENAME).read()

        # TODO: handle outputs_to_working_directory?
        from galaxy.util.expressions import ExpressionContext
        job_context = ExpressionContext(
            dict(stdout=tool_stdout, stderr=tool_stderr))

        # Load outputs.
        import_model_store = store.imported_store_for_metadata(
            'metadata/outputs_new', object_store=object_store)
        export_store = store.DirectoryModelExportStore(
            'metadata/outputs_populated',
            serialize_dataset_objects=True,
            for_edit=True)

    for output_name, output_dict in outputs.items():
        if extended_metadata_collection:
            dataset_instance_id = output_dict["id"]
            dataset = import_model_store.sa_session.query(
                galaxy.model.HistoryDatasetAssociation).find(
                    dataset_instance_id)
            assert dataset is not None
        else:
            filename_in = os.path.join("metadata/metadata_in_%s" % output_name)
            dataset = cPickle.load(open(filename_in,
                                        'rb'))  # load DatasetInstance

        filename_kwds = os.path.join("metadata/metadata_kwds_%s" % output_name)
        filename_out = os.path.join("metadata/metadata_out_%s" % output_name)
        filename_results_code = os.path.join("metadata/metadata_results_%s" %
                                             output_name)
        override_metadata = os.path.join("metadata/metadata_override_%s" %
                                         output_name)
        dataset_filename_override = output_dict["filename_override"]

        # Same block as below...
        set_meta_kwds = stringify_dictionary_keys(
            json.load(open(filename_kwds))
        )  # load kwds; need to ensure our keywords are not unicode
        try:
            dataset.dataset.external_filename = dataset_filename_override
            store_by = metadata_params.get("object_store_store_by", "id")
            extra_files_dir_name = "dataset_%s_files" % getattr(
                dataset.dataset, store_by)
            files_path = os.path.abspath(
                os.path.join(tool_job_working_directory, "working",
                             extra_files_dir_name))
            dataset.dataset.external_extra_files_path = files_path
            file_dict = tool_provided_metadata.get_dataset_meta(
                output_name, dataset.dataset.id, dataset.dataset.uuid)
            if 'ext' in file_dict:
                dataset.extension = file_dict['ext']
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            override_metadata = json.load(open(override_metadata))
            for metadata_name, metadata_file_override in override_metadata:
                if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value(
                        metadata_file_override):
                    metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON(
                        metadata_file_override)
                setattr(dataset.metadata, metadata_name,
                        metadata_file_override)
            if output_dict.get("validate", False):
                set_validated_state(dataset)
            set_meta(dataset, file_dict)

            if extended_metadata_collection:
                meta = tool_provided_metadata.get_dataset_meta(
                    output_name, dataset.dataset.id, dataset.dataset.uuid)
                if meta:
                    context = ExpressionContext(meta, job_context)
                else:
                    context = job_context

                # Lazy and unattached
                # if getattr(dataset, "hidden_beneath_collection_instance", None):
                #    dataset.visible = False
                dataset.blurb = 'done'
                dataset.peek = 'no peek'
                dataset.info = (dataset.info or '')
                if context['stdout'].strip():
                    # Ensure white space between entries
                    dataset.info = dataset.info.rstrip(
                    ) + "\n" + context['stdout'].strip()
                if context['stderr'].strip():
                    # Ensure white space between entries
                    dataset.info = dataset.info.rstrip(
                    ) + "\n" + context['stderr'].strip()
                dataset.tool_version = version_string
                dataset.set_size()
                if 'uuid' in context:
                    dataset.dataset.uuid = context['uuid']
                object_store.update_from_file(dataset.dataset, create=True)
                from galaxy.job_execution.output_collect import collect_extra_files
                collect_extra_files(object_store, dataset, ".")
                if galaxy.model.Job.states.ERROR == final_job_state:
                    dataset.blurb = "error"
                    dataset.mark_unhidden()
                else:
                    # If the tool was expected to set the extension, attempt to retrieve it
                    if dataset.ext == 'auto':
                        dataset.extension = context.get('ext', 'data')
                        dataset.init_meta(copy_from=dataset)

                    # This has already been done:
                    # else:
                    #     self.external_output_metadata.load_metadata(dataset, output_name, self.sa_session, working_directory=self.working_directory, remote_metadata_directory=remote_metadata_directory)
                    line_count = context.get('line_count', None)
                    try:
                        # Certain datatype's set_peek methods contain a line_count argument
                        dataset.set_peek(line_count=line_count)
                    except TypeError:
                        # ... and others don't
                        dataset.set_peek()

                from galaxy.jobs import TOOL_PROVIDED_JOB_METADATA_KEYS
                for context_key in TOOL_PROVIDED_JOB_METADATA_KEYS:
                    if context_key in context:
                        context_value = context[context_key]
                        setattr(dataset, context_key, context_value)

                if extended_metadata_collection:
                    export_store.add_dataset(dataset)
                else:
                    cPickle.dump(dataset, open(filename_out, 'wb+'))
            else:
                dataset.metadata.to_JSON_dict(
                    filename_out)  # write out results of set_meta

            json.dump((True, 'Metadata has been set successfully'),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has succeeded
        except Exception:
            json.dump((False, traceback.format_exc()),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has failed somehow

    if extended_metadata_collection:
        # discover extra outputs...
        from galaxy.job_execution.output_collect import collect_dynamic_outputs, collect_primary_datasets, SessionlessJobContext

        job_context = SessionlessJobContext(
            metadata_params, tool_provided_metadata, object_store,
            export_store, import_model_store,
            os.path.join(tool_job_working_directory, "working"))

        output_collections = {}
        for name, output_collection in metadata_params[
                "output_collections"].items():
            output_collections[name] = import_model_store.sa_session.query(
                galaxy.model.HistoryDatasetCollectionAssociation).find(
                    output_collection["id"])
        outputs = {}
        for name, output in metadata_params["outputs"].items():
            outputs[name] = import_model_store.sa_session.query(
                galaxy.model.HistoryDatasetAssociation).find(output["id"])

        input_ext = json.loads(metadata_params["job_params"].get(
            "__input_ext", '"data"'))
        collect_primary_datasets(
            job_context,
            outputs,
            input_ext=input_ext,
        )
        collect_dynamic_outputs(job_context, output_collections)

    if export_store:
        export_store._finalize()
    write_job_metadata(tool_job_working_directory, job_metadata, set_meta,
                       tool_provided_metadata)
Exemple #14
0
def set_metadata_portable():
    tool_job_working_directory = os.path.abspath(os.getcwd())
    metadata_tmp_files_dir = os.path.join(tool_job_working_directory,
                                          "metadata")
    MetadataTempFile.tmp_dir = metadata_tmp_files_dir

    metadata_params_path = os.path.join("metadata", "params.json")
    try:
        with open(metadata_params_path) as f:
            metadata_params = json.load(f)
    except OSError:
        raise Exception(
            f"Failed to find metadata/params.json from cwd [{tool_job_working_directory}]"
        )
    datatypes_config = metadata_params["datatypes_config"]
    job_metadata = metadata_params["job_metadata"]
    provided_metadata_style = metadata_params.get("provided_metadata_style")
    max_metadata_value_size = metadata_params.get(
        "max_metadata_value_size") or 0
    outputs = metadata_params["outputs"]

    datatypes_registry = validate_and_load_datatypes_config(datatypes_config)
    tool_provided_metadata = load_job_metadata(job_metadata,
                                               provided_metadata_style)

    def set_meta(new_dataset_instance, file_dict):
        set_meta_with_tool_provided(new_dataset_instance, file_dict,
                                    set_meta_kwds, datatypes_registry,
                                    max_metadata_value_size)

    object_store_conf_path = os.path.join("metadata", "object_store_conf.json")
    extended_metadata_collection = os.path.exists(object_store_conf_path)

    object_store = None
    job_context = None
    version_string = ""

    export_store = None
    final_job_state = Job.states.OK
    if extended_metadata_collection:
        tool_dict = metadata_params["tool"]
        stdio_exit_code_dicts, stdio_regex_dicts = tool_dict[
            "stdio_exit_codes"], tool_dict["stdio_regexes"]
        stdio_exit_codes = list(map(ToolStdioExitCode, stdio_exit_code_dicts))
        stdio_regexes = list(map(ToolStdioRegex, stdio_regex_dicts))

        with open(object_store_conf_path) as f:
            config_dict = json.load(f)
        assert config_dict is not None
        object_store = build_object_store_from_config(None,
                                                      config_dict=config_dict)
        Dataset.object_store = object_store

        outputs_directory = os.path.join(tool_job_working_directory, "outputs")
        if not os.path.exists(outputs_directory):
            outputs_directory = tool_job_working_directory

        # TODO: constants...
        if os.path.exists(os.path.join(outputs_directory, "tool_stdout")):
            with open(os.path.join(outputs_directory, "tool_stdout"),
                      "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(outputs_directory, "tool_stderr"),
                      "rb") as f:
                tool_stderr = f.read()
        elif os.path.exists(os.path.join(tool_job_working_directory,
                                         "stdout")):
            with open(os.path.join(tool_job_working_directory, "stdout"),
                      "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(tool_job_working_directory, "stderr"),
                      "rb") as f:
                tool_stderr = f.read()
        elif os.path.exists(os.path.join(outputs_directory, "stdout")):
            # Puslar style output directory? Was this ever used - did this ever work?
            with open(os.path.join(outputs_directory, "stdout"), "rb") as f:
                tool_stdout = f.read()

            with open(os.path.join(outputs_directory, "stderr"), "rb") as f:
                tool_stderr = f.read()
        else:
            wdc = os.listdir(tool_job_working_directory)
            odc = os.listdir(outputs_directory)
            error_desc = "Failed to find tool_stdout or tool_stderr for this job, cannot collect metadata"
            error_extra = f"Working dir contents [{wdc}], output directory contents [{odc}]"
            log.warn(f"{error_desc}. {error_extra}")
            raise Exception(error_desc)

        job_id_tag = metadata_params["job_id_tag"]

        exit_code_file = default_exit_code_file(".", job_id_tag)
        tool_exit_code = read_exit_code_from(exit_code_file, job_id_tag)

        check_output_detected_state, tool_stdout, tool_stderr, job_messages = check_output(
            stdio_regexes, stdio_exit_codes, tool_stdout, tool_stderr,
            tool_exit_code, job_id_tag)
        if check_output_detected_state == DETECTED_JOB_STATE.OK and not tool_provided_metadata.has_failed_outputs(
        ):
            final_job_state = Job.states.OK
        else:
            final_job_state = Job.states.ERROR

        version_string = ""
        if os.path.exists(COMMAND_VERSION_FILENAME):
            version_string = open(COMMAND_VERSION_FILENAME).read()

        expression_context = ExpressionContext(
            dict(stdout=tool_stdout, stderr=tool_stderr))

        # Load outputs.
        export_store = store.DirectoryModelExportStore(
            'metadata/outputs_populated',
            serialize_dataset_objects=True,
            for_edit=True,
            strip_metadata_files=False,
            serialize_jobs=False)
    try:
        import_model_store = store.imported_store_for_metadata(
            'metadata/outputs_new', object_store=object_store)
    except AssertionError:
        # Remove in 21.09, this should only happen for jobs that started on <= 20.09 and finish now
        import_model_store = None

    job_context = SessionlessJobContext(
        metadata_params,
        tool_provided_metadata,
        object_store,
        export_store,
        import_model_store,
        os.path.join(tool_job_working_directory, "working"),
        final_job_state=final_job_state,
    )

    unnamed_id_to_path = {}
    for unnamed_output_dict in job_context.tool_provided_metadata.get_unnamed_outputs(
    ):
        destination = unnamed_output_dict["destination"]
        elements = unnamed_output_dict["elements"]
        destination_type = destination["type"]
        if destination_type == 'hdas':
            for element in elements:
                filename = element.get('filename')
                if filename:
                    unnamed_id_to_path[element['object_id']] = os.path.join(
                        job_context.job_working_directory, filename)

    for output_name, output_dict in outputs.items():
        dataset_instance_id = output_dict["id"]
        klass = getattr(
            galaxy.model,
            output_dict.get('model_class', 'HistoryDatasetAssociation'))
        dataset = None
        if import_model_store:
            dataset = import_model_store.sa_session.query(klass).find(
                dataset_instance_id)
        if dataset is None:
            # legacy check for jobs that started before 21.01, remove on 21.05
            filename_in = os.path.join(f"metadata/metadata_in_{output_name}")
            import pickle
            dataset = pickle.load(open(filename_in,
                                       'rb'))  # load DatasetInstance
        assert dataset is not None

        filename_kwds = os.path.join(f"metadata/metadata_kwds_{output_name}")
        filename_out = os.path.join(f"metadata/metadata_out_{output_name}")
        filename_results_code = os.path.join(
            f"metadata/metadata_results_{output_name}")
        override_metadata = os.path.join(
            f"metadata/metadata_override_{output_name}")
        dataset_filename_override = output_dict["filename_override"]
        # pre-20.05 this was a per job parameter and not a per dataset parameter, drop in 21.XX
        legacy_object_store_store_by = metadata_params.get(
            "object_store_store_by", "id")

        # Same block as below...
        set_meta_kwds = stringify_dictionary_keys(
            json.load(open(filename_kwds))
        )  # load kwds; need to ensure our keywords are not unicode
        try:
            dataset.dataset.external_filename = unnamed_id_to_path.get(
                dataset_instance_id, dataset_filename_override)
            store_by = output_dict.get("object_store_store_by",
                                       legacy_object_store_store_by)
            extra_files_dir_name = f"dataset_{getattr(dataset.dataset, store_by)}_files"
            files_path = os.path.abspath(
                os.path.join(tool_job_working_directory, "working",
                             extra_files_dir_name))
            dataset.dataset.external_extra_files_path = files_path
            file_dict = tool_provided_metadata.get_dataset_meta(
                output_name, dataset.dataset.id, dataset.dataset.uuid)
            if 'ext' in file_dict:
                dataset.extension = file_dict['ext']
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            override_metadata = json.load(open(override_metadata))
            for metadata_name, metadata_file_override in override_metadata:
                if MetadataTempFile.is_JSONified_value(metadata_file_override):
                    metadata_file_override = MetadataTempFile.from_JSON(
                        metadata_file_override)
                setattr(dataset.metadata, metadata_name,
                        metadata_file_override)
            if output_dict.get("validate", False):
                set_validated_state(dataset)
            if dataset_instance_id not in unnamed_id_to_path:
                # We're going to run through set_metadata in collect_dynamic_outputs with more contextual metadata,
                # so skip set_meta here.
                set_meta(dataset, file_dict)

            if extended_metadata_collection:
                meta = tool_provided_metadata.get_dataset_meta(
                    output_name, dataset.dataset.id, dataset.dataset.uuid)
                if meta:
                    context = ExpressionContext(meta, expression_context)
                else:
                    context = expression_context

                # Lazy and unattached
                # if getattr(dataset, "hidden_beneath_collection_instance", None):
                #    dataset.visible = False
                dataset.blurb = 'done'
                dataset.peek = 'no peek'
                dataset.info = (dataset.info or '')
                if context['stdout'].strip():
                    # Ensure white space between entries
                    dataset.info = f"{dataset.info.rstrip()}\n{context['stdout'].strip()}"
                if context['stderr'].strip():
                    # Ensure white space between entries
                    dataset.info = f"{dataset.info.rstrip()}\n{context['stderr'].strip()}"
                dataset.tool_version = version_string
                dataset.set_size()
                if 'uuid' in context:
                    dataset.dataset.uuid = context['uuid']
                if dataset_filename_override and dataset_filename_override != dataset.file_name:
                    # This has to be a job with outputs_to_working_directory set.
                    # We update the object store with the created output file.
                    object_store.update_from_file(
                        dataset.dataset,
                        file_name=dataset_filename_override,
                        create=True)
                collect_extra_files(object_store, dataset, ".")
                if Job.states.ERROR == final_job_state:
                    dataset.blurb = "error"
                    dataset.mark_unhidden()
                else:
                    # If the tool was expected to set the extension, attempt to retrieve it
                    if dataset.ext == 'auto':
                        dataset.extension = context.get('ext', 'data')
                        dataset.init_meta(copy_from=dataset)

                    # This has already been done:
                    # else:
                    #     self.external_output_metadata.load_metadata(dataset, output_name, self.sa_session, working_directory=self.working_directory, remote_metadata_directory=remote_metadata_directory)
                    line_count = context.get('line_count', None)
                    try:
                        # Certain datatype's set_peek methods contain a line_count argument
                        dataset.set_peek(line_count=line_count)
                    except TypeError:
                        # ... and others don't
                        dataset.set_peek()

                for context_key in TOOL_PROVIDED_JOB_METADATA_KEYS:
                    if context_key in context:
                        context_value = context[context_key]
                        setattr(dataset, context_key, context_value)
                # We never want to persist the external_filename.
                dataset.dataset.external_filename = None
                export_store.add_dataset(dataset)
            else:
                dataset.metadata.to_JSON_dict(
                    filename_out)  # write out results of set_meta

            json.dump((True, 'Metadata has been set successfully'),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has succeeded
        except Exception:
            json.dump((False, traceback.format_exc()),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has failed somehow

    if extended_metadata_collection:
        # discover extra outputs...
        output_collections = {}
        for name, output_collection in metadata_params[
                "output_collections"].items():
            output_collections[name] = import_model_store.sa_session.query(
                HistoryDatasetCollectionAssociation).find(
                    output_collection["id"])
        outputs = {}
        for name, output in metadata_params["outputs"].items():
            klass = getattr(
                galaxy.model,
                output.get('model_class', 'HistoryDatasetAssociation'))
            outputs[name] = import_model_store.sa_session.query(klass).find(
                output["id"])

        input_ext = json.loads(metadata_params["job_params"].get(
            "__input_ext", '"data"'))
        collect_primary_datasets(
            job_context,
            outputs,
            input_ext=input_ext,
        )
        collect_dynamic_outputs(job_context, output_collections)

    if export_store:
        export_store._finalize()
    write_job_metadata(tool_job_working_directory, job_metadata, set_meta,
                       tool_provided_metadata)
Exemple #15
0
def set_metadata_portable():
    tool_job_working_directory = os.path.abspath(os.getcwd())
    metadata_tmp_files_dir = os.path.join(tool_job_working_directory, "metadata")
    MetadataTempFile.tmp_dir = metadata_tmp_files_dir

    metadata_params = get_metadata_params(tool_job_working_directory)
    datatypes_config = metadata_params["datatypes_config"]
    job_metadata = metadata_params["job_metadata"]
    provided_metadata_style = metadata_params.get("provided_metadata_style")
    max_metadata_value_size = metadata_params.get("max_metadata_value_size") or 0
    max_discovered_files = metadata_params.get("max_discovered_files")
    outputs = metadata_params["outputs"]

    datatypes_registry = validate_and_load_datatypes_config(datatypes_config)
    tool_provided_metadata = load_job_metadata(job_metadata, provided_metadata_style)

    def set_meta(new_dataset_instance, file_dict):
        set_meta_with_tool_provided(new_dataset_instance, file_dict, set_meta_kwds, datatypes_registry, max_metadata_value_size)

    try:
        object_store = get_object_store(tool_job_working_directory=tool_job_working_directory)
    except (FileNotFoundError, AssertionError):
        object_store = None
    extended_metadata_collection = bool(object_store)
    job_context = None
    version_string = None

    export_store = None
    final_job_state = Job.states.OK
    job_messages = []
    if extended_metadata_collection:
        tool_dict = metadata_params["tool"]
        stdio_exit_code_dicts, stdio_regex_dicts = tool_dict["stdio_exit_codes"], tool_dict["stdio_regexes"]
        stdio_exit_codes = list(map(ToolStdioExitCode, stdio_exit_code_dicts))
        stdio_regexes = list(map(ToolStdioRegex, stdio_regex_dicts))

        outputs_directory = os.path.join(tool_job_working_directory, "outputs")
        if not os.path.exists(outputs_directory):
            outputs_directory = tool_job_working_directory

        # TODO: constants...
        locations = [
            (outputs_directory, 'tool_'),
            (tool_job_working_directory, ''),
            (outputs_directory, ''),  # # Pulsar style output directory? Was this ever used - did this ever work?
        ]
        for directory, prefix in locations:
            if os.path.exists(os.path.join(directory, f"{prefix}stdout")):
                with open(os.path.join(directory, f"{prefix}stdout"), 'rb') as f:
                    tool_stdout = f.read(MAX_STDIO_READ_BYTES)
                with open(os.path.join(directory, f"{prefix}stderr"), 'rb') as f:
                    tool_stderr = f.read(MAX_STDIO_READ_BYTES)
                break
        else:
            if os.path.exists(os.path.join(tool_job_working_directory, 'task_0')):
                # We have a task splitting job
                tool_stdout = b''
                tool_stderr = b''
                paths = Path(tool_job_working_directory).glob('task_*')
                for path in paths:
                    with open(path / 'outputs' / 'tool_stdout', 'rb') as f:
                        task_stdout = f.read(MAX_STDIO_READ_BYTES)
                        if task_stdout:
                            tool_stdout = b"%s[%s stdout]\n%s\n" % (tool_stdout, path.name.encode(), task_stdout)
                    with open(path / 'outputs' / 'tool_stderr', 'rb') as f:
                        task_stderr = f.read(MAX_STDIO_READ_BYTES)
                        if task_stderr:
                            tool_stderr = b"%s[%s stdout]\n%s\n" % (tool_stderr, path.name.encode(), task_stderr)
            else:
                wdc = os.listdir(tool_job_working_directory)
                odc = os.listdir(outputs_directory)
                error_desc = "Failed to find tool_stdout or tool_stderr for this job, cannot collect metadata"
                error_extra = f"Working dir contents [{wdc}], output directory contents [{odc}]"
                log.warn(f"{error_desc}. {error_extra}")
                raise Exception(error_desc)

        job_id_tag = metadata_params["job_id_tag"]

        exit_code_file = default_exit_code_file(".", job_id_tag)
        tool_exit_code = read_exit_code_from(exit_code_file, job_id_tag)

        check_output_detected_state, tool_stdout, tool_stderr, job_messages = check_output(stdio_regexes, stdio_exit_codes, tool_stdout, tool_stderr, tool_exit_code, job_id_tag)
        if check_output_detected_state == DETECTED_JOB_STATE.OK and not tool_provided_metadata.has_failed_outputs():
            final_job_state = Job.states.OK
        else:
            final_job_state = Job.states.ERROR

        version_string_path = os.path.join('outputs', COMMAND_VERSION_FILENAME)
        version_string = collect_shrinked_content_from_path(version_string_path)

        expression_context = ExpressionContext(dict(stdout=tool_stdout[:255], stderr=tool_stderr[:255]))

        # Load outputs.
        export_store = store.DirectoryModelExportStore('metadata/outputs_populated', serialize_dataset_objects=True, for_edit=True, strip_metadata_files=False, serialize_jobs=True)
    try:
        import_model_store = store.imported_store_for_metadata('metadata/outputs_new', object_store=object_store)
    except AssertionError:
        # Remove in 21.09, this should only happen for jobs that started on <= 20.09 and finish now
        import_model_store = None

    tool_script_file = os.path.join(tool_job_working_directory, 'tool_script.sh')
    job = None
    if import_model_store and export_store:
        job = next(iter(import_model_store.sa_session.objects[Job].values()))

    job_context = SessionlessJobContext(
        metadata_params,
        tool_provided_metadata,
        object_store,
        export_store,
        import_model_store,
        os.path.join(tool_job_working_directory, "working"),
        final_job_state=final_job_state,
        max_discovered_files=max_discovered_files,
    )

    if extended_metadata_collection:
        # discover extra outputs...
        output_collections = {}
        for name, output_collection in metadata_params["output_collections"].items():
            # TODO: remove HistoryDatasetCollectionAssociation fallback on 22.01, model_class used to not be serialized prior to 21.09
            model_class = output_collection.get('model_class', 'HistoryDatasetCollectionAssociation')
            collection = import_model_store.sa_session.query(getattr(galaxy.model, model_class)).find(output_collection["id"])
            output_collections[name] = collection
        output_instances = {}
        for name, output in metadata_params["outputs"].items():
            klass = getattr(galaxy.model, output.get('model_class', 'HistoryDatasetAssociation'))
            output_instances[name] = import_model_store.sa_session.query(klass).find(output["id"])

        input_ext = json.loads(metadata_params["job_params"].get("__input_ext") or '"data"')
        try:
            collect_primary_datasets(
                job_context,
                output_instances,
                input_ext=input_ext,
            )
            collect_dynamic_outputs(job_context, output_collections)
        except MaxDiscoveredFilesExceededError as e:
            final_job_state = Job.states.ERROR
            job_messages.append(str(e))
        if job:
            job.job_messages = job_messages
            job.state = final_job_state
        if os.path.exists(tool_script_file):
            with open(tool_script_file) as command_fh:
                command_line_lines = []
                for i, line in enumerate(command_fh):
                    if i == 0 and line.endswith('COMMAND_VERSION 2>&1;'):
                        # Don't record version command as part of command line
                        continue
                    command_line_lines.append(line)
                job.command_line = "".join(command_line_lines).strip()
                export_store.export_job(job, include_job_data=False)

    unnamed_id_to_path = {}
    for unnamed_output_dict in job_context.tool_provided_metadata.get_unnamed_outputs():
        destination = unnamed_output_dict["destination"]
        elements = unnamed_output_dict["elements"]
        destination_type = destination["type"]
        if destination_type == 'hdas':
            for element in elements:
                filename = element.get('filename')
                object_id = element.get('object_id')
                if filename and object_id:
                    unnamed_id_to_path[object_id] = os.path.join(job_context.job_working_directory, filename)

    for output_name, output_dict in outputs.items():
        dataset_instance_id = output_dict["id"]
        klass = getattr(galaxy.model, output_dict.get('model_class', 'HistoryDatasetAssociation'))
        dataset = None
        if import_model_store:
            dataset = import_model_store.sa_session.query(klass).find(dataset_instance_id)
        if dataset is None:
            # legacy check for jobs that started before 21.01, remove on 21.05
            filename_in = os.path.join(f"metadata/metadata_in_{output_name}")
            import pickle
            dataset = pickle.load(open(filename_in, 'rb'))  # load DatasetInstance
        assert dataset is not None

        filename_kwds = os.path.join(f"metadata/metadata_kwds_{output_name}")
        filename_out = os.path.join(f"metadata/metadata_out_{output_name}")
        filename_results_code = os.path.join(f"metadata/metadata_results_{output_name}")
        override_metadata = os.path.join(f"metadata/metadata_override_{output_name}")
        dataset_filename_override = output_dict["filename_override"]
        # pre-20.05 this was a per job parameter and not a per dataset parameter, drop in 21.XX
        legacy_object_store_store_by = metadata_params.get("object_store_store_by", "id")

        # Same block as below...
        set_meta_kwds = stringify_dictionary_keys(json.load(open(filename_kwds)))  # load kwds; need to ensure our keywords are not unicode
        try:
            external_filename = unnamed_id_to_path.get(dataset_instance_id, dataset_filename_override)
            if not os.path.exists(external_filename):
                matches = glob.glob(external_filename)
                assert len(matches) == 1, f"More than one file matched by output glob '{external_filename}'"
                external_filename = matches[0]
                assert safe_contains(tool_job_working_directory, external_filename), f"Cannot collect output '{external_filename}' from outside of working directory"
                created_from_basename = os.path.relpath(external_filename, os.path.join(tool_job_working_directory, 'working'))
                dataset.dataset.created_from_basename = created_from_basename
            # override filename if we're dealing with outputs to working directory and dataset is not linked to
            link_data_only = metadata_params.get("link_data_only")
            if not link_data_only:
                # Only set external filename if we're dealing with files in job working directory.
                # Fixes link_data_only uploads
                dataset.dataset.external_filename = external_filename
                store_by = output_dict.get("object_store_store_by", legacy_object_store_store_by)
                extra_files_dir_name = f"dataset_{getattr(dataset.dataset, store_by)}_files"
                files_path = os.path.abspath(os.path.join(tool_job_working_directory, "working", extra_files_dir_name))
                dataset.dataset.external_extra_files_path = files_path
            file_dict = tool_provided_metadata.get_dataset_meta(output_name, dataset.dataset.id, dataset.dataset.uuid)
            if 'ext' in file_dict:
                dataset.extension = file_dict['ext']
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            override_metadata = json.load(open(override_metadata))
            for metadata_name, metadata_file_override in override_metadata:
                if MetadataTempFile.is_JSONified_value(metadata_file_override):
                    metadata_file_override = MetadataTempFile.from_JSON(metadata_file_override)
                setattr(dataset.metadata, metadata_name, metadata_file_override)
            if output_dict.get("validate", False):
                set_validated_state(dataset)
            if dataset_instance_id not in unnamed_id_to_path:
                # We're going to run through set_metadata in collect_dynamic_outputs with more contextual metadata,
                # so skip set_meta here.
                set_meta(dataset, file_dict)
                if extended_metadata_collection:
                    collect_extra_files(object_store, dataset, ".")
                    dataset.state = dataset.dataset.state = final_job_state

            if extended_metadata_collection:
                if not link_data_only and os.path.getsize(external_filename):
                    # Here we might be updating a disk based objectstore when outputs_to_working_directory is used,
                    # or a remote object store from its cache path.
                    object_store.update_from_file(dataset.dataset, file_name=external_filename, create=True)
                # TODO: merge expression_context into tool_provided_metadata so we don't have to special case this (here and in _finish_dataset)
                meta = tool_provided_metadata.get_dataset_meta(output_name, dataset.dataset.id, dataset.dataset.uuid)
                if meta:
                    context = ExpressionContext(meta, expression_context)
                else:
                    context = expression_context
                dataset.blurb = 'done'
                dataset.peek = 'no peek'
                dataset.info = (dataset.info or '')
                if context['stdout'].strip():
                    # Ensure white space between entries
                    dataset.info = f"{dataset.info.rstrip()}\n{context['stdout'].strip()}"
                if context['stderr'].strip():
                    # Ensure white space between entries
                    dataset.info = f"{dataset.info.rstrip()}\n{context['stderr'].strip()}"
                dataset.tool_version = version_string
                if 'uuid' in context:
                    dataset.dataset.uuid = context['uuid']
                if not final_job_state == Job.states.ERROR:
                    line_count = context.get('line_count', None)
                    try:
                        # Certain datatype's set_peek methods contain a line_count argument
                        dataset.set_peek(line_count=line_count)
                    except TypeError:
                        # ... and others don't
                        dataset.set_peek()
                for context_key in TOOL_PROVIDED_JOB_METADATA_KEYS:
                    if context_key in context:
                        context_value = context[context_key]
                        setattr(dataset, context_key, context_value)
                # We only want to persist the external_filename if the dataset has been linked in.
                if not link_data_only:
                    dataset.dataset.external_filename = None
                    dataset.dataset.extra_files_path = None
                export_store.add_dataset(dataset)
            else:
                dataset.metadata.to_JSON_dict(filename_out)  # write out results of set_meta

            json.dump((True, 'Metadata has been set successfully'), open(filename_results_code, 'wt+'))  # setting metadata has succeeded
        except Exception:
            json.dump((False, traceback.format_exc()), open(filename_results_code, 'wt+'))  # setting metadata has failed somehow

    if export_store:
        export_store._finalize()
    write_job_metadata(tool_job_working_directory, job_metadata, set_meta, tool_provided_metadata)