def persist_elements_to_hdca(model_persistence_context, elements, hdca, collector=None): filenames = odict.odict() def add_to_discovered_files(elements, parent_identifiers=[]): for element in elements: if "elements" in element: add_to_discovered_files(element["elements"], parent_identifiers + [element["name"]]) else: discovered_file = discovered_file_for_element( element, model_persistence_context.job_working_directory, parent_identifiers, collector=collector) filenames[discovered_file.path] = discovered_file add_to_discovered_files(elements) collection = hdca.collection collection_builder = builder.BoundCollectionBuilder(collection) model_persistence_context.populate_collection_elements( collection, collection_builder, filenames, ) collection_builder.populate()
def persist_elements_to_hdca( model_persistence_context: Union['JobContext', 'SessionlessJobContext', SessionlessModelPersistenceContext], elements, hdca, collector=None): filenames = {} def add_to_discovered_files(elements, parent_identifiers=None): parent_identifiers = parent_identifiers or [] for element in elements: if "elements" in element: add_to_discovered_files(element["elements"], parent_identifiers + [element["name"]]) else: discovered_file = discovered_file_for_element( element, model_persistence_context, parent_identifiers, collector=collector) filenames[discovered_file.path] = discovered_file add_to_discovered_files(elements) collection = hdca.collection collection_builder = builder.BoundCollectionBuilder(collection) model_persistence_context.populate_collection_elements( collection, collection_builder, filenames, ) collection_builder.populate()
def test_job_context_discover_outputs_flushes_once(mocker): app = _mock_app() sa_session = app.model.context # mocker is a pytest-mock fixture u = model.User(email="*****@*****.**", password="******") h = model.History(name="Test History", user=u) tool = Tool(app) tool_provided_metadata = None job = model.Job() job.history = h sa_session.add(job) sa_session.flush() job_working_directory = tempfile.mkdtemp() setup_data(job_working_directory) permission_provider = PermissionProvider() metadata_source_provider = MetadataSourceProvider() object_store = app.object_store input_dbkey = '?' final_job_state = 'ok' collection_description = FilePatternDatasetCollectionDescription( pattern="__name__") collection = model.DatasetCollection(collection_type='list', populated=False) sa_session.add(collection) job_context = JobContext(tool, tool_provided_metadata, job, job_working_directory, permission_provider, metadata_source_provider, input_dbkey, object_store, final_job_state) collection_builder = builder.BoundCollectionBuilder(collection) dataset_collectors = [dataset_collector(collection_description)] output_name = 'output' filenames = job_context.find_files(output_name, collection, dataset_collectors) assert len(filenames) == 10 spy = mocker.spy(sa_session, 'flush') job_context.populate_collection_elements( collection, collection_builder, filenames, name=output_name, metadata_source_name='', final_job_state=job_context.final_job_state, ) collection_builder.populate() assert spy.call_count == 0 sa_session.flush() assert len(collection.dataset_instances) == 10 assert collection.dataset_instances[0].dataset.file_size == 1
def collection_builder_for(self, dataset_collection): return builder.BoundCollectionBuilder(dataset_collection)
def collect_dynamic_outputs( job_context, output_collections, ): # unmapped outputs do not correspond to explicit outputs of the tool, they were inferred entirely # from the tool provided metadata (e.g. galaxy.json). for unnamed_output_dict in job_context.tool_provided_metadata.get_unnamed_outputs(): assert "destination" in unnamed_output_dict assert "elements" in unnamed_output_dict destination = unnamed_output_dict["destination"] elements = unnamed_output_dict["elements"] assert "type" in destination destination_type = destination["type"] assert destination_type in ["library_folder", "hdca", "hdas"] # three destination types we need to handle here - "library_folder" (place discovered files in a library folder), # "hdca" (place discovered files in a history dataset collection), and "hdas" (place discovered files in a history # as stand-alone datasets). if destination_type == "library_folder": # populate a library folder (needs to be already have been created) library_folder = job_context.get_library_folder(destination) persist_elements_to_folder(job_context, elements, library_folder) elif destination_type == "hdca": # create or populate a dataset collection in the history assert "collection_type" in unnamed_output_dict object_id = destination.get("object_id") if object_id: hdca = job_context.get_hdca(object_id) else: name = unnamed_output_dict.get("name", "unnamed collection") collection_type = unnamed_output_dict["collection_type"] collection_type_description = COLLECTION_TYPE_DESCRIPTION_FACTORY.for_collection_type(collection_type) structure = UninitializedTree(collection_type_description) hdca = job_context.create_hdca(name, structure) persist_elements_to_hdca(job_context, elements, hdca, collector=DEFAULT_DATASET_COLLECTOR) elif destination_type == "hdas": persist_hdas(elements, job_context) for name, has_collection in output_collections.items(): output_collection_def = job_context.output_collection_def(name) if not output_collection_def: continue if not output_collection_def.dynamic_structure: continue # Could be HDCA for normal jobs or a DC for mapping # jobs. if hasattr(has_collection, "collection"): collection = has_collection.collection else: collection = has_collection # We are adding dynamic collections, which may be precreated, but their actually state is still new! collection.populated_state = collection.populated_states.NEW try: collection_builder = builder.BoundCollectionBuilder(collection) dataset_collectors = [dataset_collector(description) for description in output_collection_def.dataset_collector_descriptions] output_name = output_collection_def.name filenames = job_context.find_files(output_name, collection, dataset_collectors) job_context.populate_collection_elements( collection, collection_builder, filenames, name=output_collection_def.name, metadata_source_name=output_collection_def.metadata_source, ) collection_builder.populate() except Exception: log.exception("Problem gathering output collection.") collection.handle_population_failed("Problem building datasets for collection.") job_context.add_dataset_collection(has_collection)