コード例 #1
0
def persist_elements_to_hdca(model_persistence_context,
                             elements,
                             hdca,
                             collector=None):
    filenames = odict.odict()

    def add_to_discovered_files(elements, parent_identifiers=[]):
        for element in elements:
            if "elements" in element:
                add_to_discovered_files(element["elements"],
                                        parent_identifiers + [element["name"]])
            else:
                discovered_file = discovered_file_for_element(
                    element,
                    model_persistence_context.job_working_directory,
                    parent_identifiers,
                    collector=collector)
                filenames[discovered_file.path] = discovered_file

    add_to_discovered_files(elements)

    collection = hdca.collection
    collection_builder = builder.BoundCollectionBuilder(collection)
    model_persistence_context.populate_collection_elements(
        collection,
        collection_builder,
        filenames,
    )
    collection_builder.populate()
コード例 #2
0
def persist_elements_to_hdca(
        model_persistence_context: Union['JobContext', 'SessionlessJobContext',
                                         SessionlessModelPersistenceContext],
        elements,
        hdca,
        collector=None):
    filenames = {}

    def add_to_discovered_files(elements, parent_identifiers=None):
        parent_identifiers = parent_identifiers or []
        for element in elements:
            if "elements" in element:
                add_to_discovered_files(element["elements"],
                                        parent_identifiers + [element["name"]])
            else:
                discovered_file = discovered_file_for_element(
                    element,
                    model_persistence_context,
                    parent_identifiers,
                    collector=collector)
                filenames[discovered_file.path] = discovered_file

    add_to_discovered_files(elements)

    collection = hdca.collection
    collection_builder = builder.BoundCollectionBuilder(collection)
    model_persistence_context.populate_collection_elements(
        collection,
        collection_builder,
        filenames,
    )
    collection_builder.populate()
コード例 #3
0
def test_job_context_discover_outputs_flushes_once(mocker):
    app = _mock_app()
    sa_session = app.model.context
    # mocker is a pytest-mock fixture

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    tool = Tool(app)
    tool_provided_metadata = None
    job = model.Job()
    job.history = h
    sa_session.add(job)
    sa_session.flush()
    job_working_directory = tempfile.mkdtemp()
    setup_data(job_working_directory)
    permission_provider = PermissionProvider()
    metadata_source_provider = MetadataSourceProvider()
    object_store = app.object_store
    input_dbkey = '?'
    final_job_state = 'ok'
    collection_description = FilePatternDatasetCollectionDescription(
        pattern="__name__")
    collection = model.DatasetCollection(collection_type='list',
                                         populated=False)
    sa_session.add(collection)
    job_context = JobContext(tool, tool_provided_metadata, job,
                             job_working_directory, permission_provider,
                             metadata_source_provider, input_dbkey,
                             object_store, final_job_state)
    collection_builder = builder.BoundCollectionBuilder(collection)
    dataset_collectors = [dataset_collector(collection_description)]
    output_name = 'output'
    filenames = job_context.find_files(output_name, collection,
                                       dataset_collectors)
    assert len(filenames) == 10
    spy = mocker.spy(sa_session, 'flush')
    job_context.populate_collection_elements(
        collection,
        collection_builder,
        filenames,
        name=output_name,
        metadata_source_name='',
        final_job_state=job_context.final_job_state,
    )
    collection_builder.populate()
    assert spy.call_count == 0
    sa_session.flush()
    assert len(collection.dataset_instances) == 10
    assert collection.dataset_instances[0].dataset.file_size == 1
コード例 #4
0
 def collection_builder_for(self, dataset_collection):
     return builder.BoundCollectionBuilder(dataset_collection)
コード例 #5
0
ファイル: output_collect.py プロジェクト: scrathat/galaxy
def collect_dynamic_outputs(
    job_context,
    output_collections,
):
    # unmapped outputs do not correspond to explicit outputs of the tool, they were inferred entirely
    # from the tool provided metadata (e.g. galaxy.json).
    for unnamed_output_dict in job_context.tool_provided_metadata.get_unnamed_outputs():
        assert "destination" in unnamed_output_dict
        assert "elements" in unnamed_output_dict
        destination = unnamed_output_dict["destination"]
        elements = unnamed_output_dict["elements"]

        assert "type" in destination
        destination_type = destination["type"]
        assert destination_type in ["library_folder", "hdca", "hdas"]

        # three destination types we need to handle here - "library_folder" (place discovered files in a library folder),
        # "hdca" (place discovered files in a history dataset collection), and "hdas" (place discovered files in a history
        # as stand-alone datasets).
        if destination_type == "library_folder":
            # populate a library folder (needs to be already have been created)
            library_folder = job_context.get_library_folder(destination)
            persist_elements_to_folder(job_context, elements, library_folder)
        elif destination_type == "hdca":
            # create or populate a dataset collection in the history
            assert "collection_type" in unnamed_output_dict
            object_id = destination.get("object_id")
            if object_id:
                hdca = job_context.get_hdca(object_id)
            else:
                name = unnamed_output_dict.get("name", "unnamed collection")
                collection_type = unnamed_output_dict["collection_type"]
                collection_type_description = COLLECTION_TYPE_DESCRIPTION_FACTORY.for_collection_type(collection_type)
                structure = UninitializedTree(collection_type_description)
                hdca = job_context.create_hdca(name, structure)
            persist_elements_to_hdca(job_context, elements, hdca, collector=DEFAULT_DATASET_COLLECTOR)
        elif destination_type == "hdas":
            persist_hdas(elements, job_context)

    for name, has_collection in output_collections.items():
        output_collection_def = job_context.output_collection_def(name)
        if not output_collection_def:
            continue

        if not output_collection_def.dynamic_structure:
            continue

        # Could be HDCA for normal jobs or a DC for mapping
        # jobs.
        if hasattr(has_collection, "collection"):
            collection = has_collection.collection
        else:
            collection = has_collection

        # We are adding dynamic collections, which may be precreated, but their actually state is still new!
        collection.populated_state = collection.populated_states.NEW

        try:
            collection_builder = builder.BoundCollectionBuilder(collection)
            dataset_collectors = [dataset_collector(description) for description in output_collection_def.dataset_collector_descriptions]
            output_name = output_collection_def.name
            filenames = job_context.find_files(output_name, collection, dataset_collectors)
            job_context.populate_collection_elements(
                collection,
                collection_builder,
                filenames,
                name=output_collection_def.name,
                metadata_source_name=output_collection_def.metadata_source,
            )
            collection_builder.populate()
        except Exception:
            log.exception("Problem gathering output collection.")
            collection.handle_population_failed("Problem building datasets for collection.")

        job_context.add_dataset_collection(has_collection)