def _new_pair_collection(self): hda_forward = self._new_hda(contents="Forward dataset.") hda_forward.id = 1 hda_forward.extension = "txt" hda_reverse = self._new_hda(contents="Reverse dataset.") hda_reverse.id = 2 hda_reverse.extension = "txt" collection = model.DatasetCollection() collection.id = 1 element_forward = model.DatasetCollectionElement( collection=collection, element=hda_forward, element_index=0, element_identifier="forward", ) element_forward.id = 1 element_reverse = model.DatasetCollectionElement( collection=collection, element=hda_reverse, element_index=0, element_identifier="reverse", ) element_reverse.id = 2 collection.collection_type = "paired" return collection
def test_export_copied_collection(): app, sa_session, h = _setup_history_for_export("Collection History with copied collection") d1, d2 = _create_datasets(sa_session, h, 2) c1 = model.DatasetCollection(collection_type="paired") hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1") h.hid_counter = 4 dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) sa_session.add_all((dce1, dce2, d1, d2, hc1)) sa_session.flush() hc2 = hc1.copy(element_destination=h) h.add_pending_items() assert h.hid_counter == 7 sa_session.add(hc2) sa_session.flush() assert hc2.copied_from_history_dataset_collection_association == hc1 imported_history = _import_export(app, h) assert imported_history.hid_counter == 7 assert len(imported_history.dataset_collections) == 2 assert len(imported_history.datasets) == 4 _assert_distinct_hids(imported_history) imported_by_hid = _hid_dict(imported_history) assert imported_by_hid[4].copied_from_history_dataset_association == imported_by_hid[1] assert imported_by_hid[5].copied_from_history_dataset_association == imported_by_hid[2] assert imported_by_hid[6].copied_from_history_dataset_collection_association == imported_by_hid[3]
def __assert_output_format_is(expected, output, input_extensions=[], param_context=[], add_collection=False): inputs = {} last_ext = "data" i = 1 for name, ext in input_extensions: hda = model.HistoryDatasetAssociation(extension=ext) hda.metadata.random_field = str(i) # Populate a random metadata field for testing inputs[name] = hda last_ext = ext i += 1 input_collections = {} if add_collection: hda_forward = model.HistoryDatasetAssociation(extension="txt") hda_reverse = model.HistoryDatasetAssociation(extension="txt") c1 = model.DatasetCollection(collection_type="pair") hc1 = model.HistoryDatasetCollectionAssociation(collection=c1, name="HistoryCollectionTest1") dce1 = model.DatasetCollectionElement(collection=c1, element=hda_forward, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=hda_reverse, element_identifier="reverse", element_index=1) c1.elements = [dce1, dce2] input_collections["hdcai"] = [(hc1, False)] actual_format = determine_output_format(output, param_context, inputs, input_collections, last_ext) assert actual_format == expected, "Actual format %s, does not match expected %s" % (actual_format, expected)
def test_dataset_instance_order(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) elements = [] list_pair = model.DatasetCollection(collection_type="list:paired") for i in range(20): pair = model.DatasetCollection(collection_type="pair") forward = model.HistoryDatasetAssociation(extension="txt", history=h1, name=f"forward_{i}", create_dataset=True, sa_session=model.session) reverse = model.HistoryDatasetAssociation(extension="bam", history=h1, name=f"reverse_{i}", create_dataset=True, sa_session=model.session) dce1 = model.DatasetCollectionElement(collection=pair, element=forward, element_identifier=f"forward_{i}", element_index=1) dce2 = model.DatasetCollectionElement(collection=pair, element=reverse, element_identifier=f"reverse_{i}", element_index=2) to_persist = [(forward, reverse), (dce1, dce2)] self.persist(pair) for item in to_persist: if i % 2: self.persist(item[0]) self.persist(item[1]) else: self.persist(item[1]) self.persist(item[0]) elements.append(model.DatasetCollectionElement(collection=list_pair, element=pair, element_index=i, element_identifier=str(i))) self.persist(list_pair) random.shuffle(elements) for item in elements: self.persist(item) forward = [] reverse = [] for i, dataset_instance in enumerate(list_pair.dataset_instances): if i % 2: reverse.append(dataset_instance) else: forward.append(dataset_instance) assert all(d.name == f"forward_{i}" for i, d in enumerate(forward)) assert all(d.name == f"reverse_{i}" for i, d in enumerate(reverse))
def test_collections_in_histories(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) d1 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) c1 = model.DatasetCollection(collection_type="pair") hc1 = model.HistoryDatasetCollectionAssociation( history=h1, collection=c1, name="HistoryCollectionTest1") dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="left") dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="right") self.persist(u, h1, d1, d2, c1, hc1, dce1, dce2) loaded_dataset_collection = self.query( model.HistoryDatasetCollectionAssociation).filter( model.HistoryDatasetCollectionAssociation.name == "HistoryCollectionTest1").first().collection self.assertEqual(len(loaded_dataset_collection.elements), 2) assert loaded_dataset_collection.collection_type == "pair" assert loaded_dataset_collection["left"] == dce1 assert loaded_dataset_collection["right"] == dce2
def test_export_collection_with_copied_datasets_and_overlapping_hids(): app, sa_session, h = _setup_history_for_export("Collection History with dataset from other history") dataset_history = model.History(name="Dataset History", user=h.user) d1, d2 = _create_datasets(sa_session, dataset_history, 2) sa_session.add(d1) sa_session.add(d2) sa_session.add(dataset_history) sa_session.flush() app.object_store.update_from_file(d1, file_name="test-data/1.txt", create=True) app.object_store.update_from_file(d2, file_name="test-data/2.bed", create=True) d1_copy = d1.copy() d2_copy = d2.copy() d1_copy.history = h d2_copy.history = h c1 = model.DatasetCollection(collection_type="paired") hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1") h.hid_counter = 5 dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) sa_session.add(dce1) sa_session.add(dce2) sa_session.add(d1_copy) sa_session.add(d2_copy) sa_session.add(hc1) sa_session.flush() _import_export(app, h)
def test_export_collection_hids(): app, sa_session, h = _setup_history_for_export("Collection History with dataset from this history") d1, d2 = _create_datasets(sa_session, h, 2) c1 = model.DatasetCollection(collection_type="paired") hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1") h.hid_counter = 4 dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) sa_session.add(dce1) sa_session.add(dce2) sa_session.add(d1) sa_session.add(d2) sa_session.add(hc1) sa_session.flush() imported_history = _import_export(app, h) assert imported_history.hid_counter == 4, imported_history.hid_counter assert len(imported_history.dataset_collections) == 1 assert len(imported_history.datasets) == 2 for hdca in imported_history.dataset_collections: assert hdca.hid == 3, hdca.hid for hda in imported_history.datasets: assert hda.hid in [1, 2], hda.hid _assert_distinct_hids(imported_history)
def test_export_copied_objects_copied_outside_history(): app, sa_session, h = _setup_history_for_export("Collection History with copied objects") d1, d2 = _create_datasets(sa_session, h, 2) c1 = model.DatasetCollection(collection_type="paired") hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1") h.hid_counter = 4 dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) sa_session.add_all((dce1, dce2, d1, d2, hc1)) sa_session.flush() hc2 = hc1.copy(element_destination=h) h.add_dataset_collection(hc2) sa_session.add(hc2) other_h = model.History(name=h.name + "-other", user=h.user) sa_session.add(other_h) hc3 = hc2.copy(element_destination=other_h) other_h.add_dataset_collection(hc3) sa_session.add(hc3) sa_session.flush() hc4 = hc3.copy(element_destination=h) h.add_dataset_collection(hc4) sa_session.add(hc4) sa_session.flush() assert h.hid_counter == 10 original_by_hid = _hid_dict(h) assert original_by_hid[7].copied_from_history_dataset_association != original_by_hid[4] assert original_by_hid[8].copied_from_history_dataset_association != original_by_hid[5] assert original_by_hid[9].copied_from_history_dataset_collection_association != original_by_hid[6] imported_history = _import_export(app, h) assert imported_history.hid_counter == 10 assert len(imported_history.dataset_collections) == 3 assert len(imported_history.datasets) == 6 _assert_distinct_hids(imported_history) imported_by_hid = _hid_dict(imported_history) assert imported_by_hid[4].copied_from_history_dataset_association == imported_by_hid[1] assert imported_by_hid[5].copied_from_history_dataset_association == imported_by_hid[2] assert imported_by_hid[6].copied_from_history_dataset_collection_association == imported_by_hid[3] assert imported_by_hid[7].copied_from_history_dataset_association == imported_by_hid[4] assert imported_by_hid[8].copied_from_history_dataset_association == imported_by_hid[5] assert imported_by_hid[9].copied_from_history_dataset_collection_association == imported_by_hid[6]
def test_history_collection_copy(list_size=NUM_DATASETS): with _setup_mapping_and_user() as (test_config, object_store, model, old_history): for i in range(NUM_COLLECTIONS): hdas = [] for i in range(list_size * 2): hda_path = test_config.write("moo", "test_metadata_original_%d" % i) hda = _create_hda(model, object_store, old_history, hda_path, visible=False, include_metadata_file=False) hdas.append(hda) list_elements = [] list_collection = model.DatasetCollection(collection_type="list:paired") for j in range(list_size): paired_collection = model.DatasetCollection(collection_type="paired") forward_dce = model.DatasetCollectionElement(collection=paired_collection, element=hdas[j * 2]) reverse_dce = model.DatasetCollectionElement(collection=paired_collection, element=hdas[j * 2 + 1]) paired_collection.elements = [forward_dce, reverse_dce] paired_collection_element = model.DatasetCollectionElement(collection=list_collection, element=paired_collection) list_elements.append(paired_collection_element) model.context.add_all([forward_dce, reverse_dce, paired_collection_element]) list_collection.elements = list_elements history_dataset_collection = model.HistoryDatasetCollectionAssociation(collection=list_collection) history_dataset_collection.user = old_history.user model.context.add(history_dataset_collection) model.context.flush() old_history.add_dataset_collection(history_dataset_collection) history_dataset_collection.add_item_annotation(model.context, old_history.user, history_dataset_collection, "annotation #%d" % history_dataset_collection.hid) model.context.flush() annotation_str = history_dataset_collection.get_item_annotation_str(model.context, old_history.user, history_dataset_collection) # Saving magic SA invocations for detecting full flushes that may harm performance. # from sqlalchemy import event # @event.listens_for(model.context, "before_flush") # def track_instances_before_flush(session, context, instances): # if not instances: # print("FULL FLUSH...") # else: # print("Flushing just %s" % instances) history_copy_timer = ExecutionTimer() new_history = old_history.copy(target_user=old_history.user) print("history copied %s" % history_copy_timer) for i, hda in enumerate(new_history.active_datasets): assert hda.get_size() == 3 annotation_str = hda.get_item_annotation_str(model.context, old_history.user, hda) assert annotation_str == "annotation #%d" % hda.hid, annotation_str assert len(new_history.active_dataset_collections) == NUM_COLLECTIONS for hdca in new_history.active_dataset_collections: annotation_str = hdca.get_item_annotation_str(model.context, old_history.user, hdca) assert annotation_str == "annotation #%d" % hdca.hid, annotation_str
def test_populated_optimized_ok(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) d1 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) c1 = model.DatasetCollection(collection_type='paired') dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) model.session.add_all([d1, d2, c1, dce1, dce2]) model.session.flush() assert c1.populated assert c1.populated_optimized
def test_dataset_dbkeys_and_extensions_summary(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) d1 = model.HistoryDatasetAssociation(extension="bam", dbkey="hg19", history=h1, create_dataset=True, sa_session=model.session) d2 = model.HistoryDatasetAssociation(extension="txt", dbkey="hg19", history=h1, create_dataset=True, sa_session=model.session) c1 = model.DatasetCollection(collection_type='paired') dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) hdca = model.HistoryDatasetCollectionAssociation(collection=c1, history=h1) model.session.add_all([d1, d2, c1, dce1, dce2, hdca]) model.session.flush() assert hdca.dataset_dbkeys_and_extensions_summary[0] == {"hg19"} assert hdca.dataset_dbkeys_and_extensions_summary[1] == {"bam", "txt"}
def precreate_dataset_collection(self, structure, allow_unitialized_element=True): has_structure = not structure.is_leaf and structure.children_known if not has_structure and allow_unitialized_element: dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT elif not has_structure: collection_type_description = structure.collection_type_description dataset_collection = model.DatasetCollection(populated=False) dataset_collection.collection_type = collection_type_description.collection_type else: collection_type_description = structure.collection_type_description dataset_collection = model.DatasetCollection(populated=False) dataset_collection.collection_type = collection_type_description.collection_type elements = [] for index, (identifier, substructure) in enumerate(structure.children): # TODO: Open question - populate these now or later? if substructure.is_leaf: element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT else: element = self.precreate_dataset_collection( substructure, allow_unitialized_element=allow_unitialized_element) element = model.DatasetCollectionElement( element=element, element_identifier=identifier, element_index=index, ) elements.append(element) dataset_collection.elements = elements dataset_collection.element_count = len(elements) return dataset_collection
def test_collections_in_library_folders(self): model = self.model u = model.User(email="*****@*****.**", password="******") lf = model.LibraryFolder(name="RootFolder") l = model.Library(name="Library1", root_folder=lf) ld1 = model.LibraryDataset() ld2 = model.LibraryDataset() ldda1 = model.LibraryDatasetDatasetAssociation(extension="txt", library_dataset=ld1) ldda2 = model.LibraryDatasetDatasetAssociation(extension="txt", library_dataset=ld1) c1 = model.DatasetCollection(collection_type="pair") dce1 = model.DatasetCollectionElement(collection=c1, element=ldda1) dce2 = model.DatasetCollectionElement(collection=c1, element=ldda2) self.persist(u, l, lf, ld1, ld2, c1, ldda1, ldda2, dce1, dce2)
def __init__(self, implicit_output_name=None, job=None, hid=1): self.id = 124 self.copied_from_history_dataset_collection_association = None self.history_content_type = "dataset_collection" self.implicit_output_name = implicit_output_name self.hid = 1 self.collection = model.DatasetCollection() self.creating_job_associations = [] element = model.DatasetCollectionElement( collection=self.collection, element=model.HistoryDatasetAssociation(), element_index=0, element_identifier="moocow", ) element.dataset_instance.dataset = model.Dataset() element.dataset_instance.dataset.state = "ok" creating = model.JobToOutputDatasetAssociation( implicit_output_name, element.dataset_instance, ) creating.job = job element.dataset_instance.creating_job_associations = [ creating, ] self.collection.elements = [element]
def test_nested_collection_attributes(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) d1 = model.HistoryDatasetAssociation(extension="bam", history=h1, create_dataset=True, sa_session=model.session) index = NamedTemporaryFile("w") index.write("cool bam index") index2 = NamedTemporaryFile("w") index2.write("cool bam index 2") metadata_dict = {"bam_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index.name}), "bam_csi_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index2.name})} d1.metadata.from_JSON_dict(json_dict=metadata_dict) assert d1.metadata.bam_index assert d1.metadata.bam_csi_index assert isinstance(d1.metadata.bam_index, model.MetadataFile) assert isinstance(d1.metadata.bam_csi_index, model.MetadataFile) d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) c1 = model.DatasetCollection(collection_type='paired') dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) c2 = model.DatasetCollection(collection_type="list:paired") dce3 = model.DatasetCollectionElement(collection=c2, element=c1, element_identifier="inner_list", element_index=0) c3 = model.DatasetCollection(collection_type="list:list") c4 = model.DatasetCollection(collection_type="list:list:paired") dce4 = model.DatasetCollectionElement(collection=c4, element=c2, element_identifier="outer_list", element_index=0) model.session.add_all([d1, d2, c1, dce1, dce2, c2, dce3, c3, c4, dce4]) model.session.flush() q = c2._get_nested_collection_attributes(element_attributes=('element_identifier',), hda_attributes=('extension',), dataset_attributes=('state',)) assert [(r.keys()) for r in q] == [['element_identifier_0', 'element_identifier_1', 'extension', 'state'], ['element_identifier_0', 'element_identifier_1', 'extension', 'state']] assert q.all() == [('inner_list', 'forward', 'bam', 'new'), ('inner_list', 'reverse', 'txt', 'new')] q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation,)) assert q.all() == [d1, d2] q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation, model.Dataset)) assert q.all() == [(d1, d1.dataset), (d2, d2.dataset)] # Assert properties that use _get_nested_collection_attributes return correct content assert c2.dataset_instances == [d1, d2] assert c2.dataset_elements == [dce1, dce2] assert c2.dataset_action_tuples == [] assert c2.populated_optimized assert c2.dataset_states_and_extensions_summary == ({'new'}, {'txt', 'bam'}) assert c2.element_identifiers_extensions_paths_and_metadata_files == [[('inner_list', 'forward'), 'bam', 'mock_dataset_14.dat', [('bai', 'mock_dataset_14.dat'), ('bam.csi', 'mock_dataset_14.dat')]], [('inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat', []]] assert c3.dataset_instances == [] assert c3.dataset_elements == [] assert c3.dataset_states_and_extensions_summary == (set(), set()) q = c4._get_nested_collection_attributes(element_attributes=('element_identifier',)) assert q.all() == [('outer_list', 'inner_list', 'forward'), ('outer_list', 'inner_list', 'reverse')] assert c4.dataset_elements == [dce1, dce2] assert c4.element_identifiers_extensions_and_paths == [(('outer_list', 'inner_list', 'forward'), 'bam', 'mock_dataset_14.dat'), (('outer_list', 'inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat')]
def test_populated_optimized_empty_list_list_ok(self): model = self.model c1 = model.DatasetCollection(collection_type='list') c2 = model.DatasetCollection(collection_type='list:list') dce1 = model.DatasetCollectionElement(collection=c2, element=c1, element_identifier="empty_list", element_index=0) model.session.add_all([c1, c2, dce1]) model.session.flush() assert c1.populated assert c1.populated_optimized assert c2.populated assert c2.populated_optimized
def test_collection_get_interface(self): model = self.model u = model.User(email="*****@*****.**", password="******") h1 = model.History(name="History 1", user=u) d1 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session) c1 = model.DatasetCollection(collection_type="list") elements = 100 dces = [model.DatasetCollectionElement(collection=c1, element=d1, element_identifier=f"{i}", element_index=i) for i in range(elements)] self.persist(u, h1, d1, c1, *dces, flush=False, expunge=False) model.session.flush() for i in range(elements): assert c1[i] == dces[i]
def precreate_dataset_collection(self, structure, allow_unitialized_element=True, completed_collection=None, implicit_output_name=None): has_structure = not structure.is_leaf and structure.children_known if not has_structure and allow_unitialized_element: dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT elif not has_structure: collection_type_description = structure.collection_type_description dataset_collection = model.DatasetCollection(populated=False) dataset_collection.collection_type = collection_type_description.collection_type else: collection_type_description = structure.collection_type_description dataset_collection = model.DatasetCollection(populated=False) dataset_collection.collection_type = collection_type_description.collection_type elements = [] for index, (identifier, substructure) in enumerate(structure.children): # TODO: Open question - populate these now or later? element = None if completed_collection and implicit_output_name: job = completed_collection[index] if job: it = (jtiodca.dataset_collection for jtiodca in job.output_dataset_collections if jtiodca.name == implicit_output_name) element = next(it, None) if element is None: if substructure.is_leaf: element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT else: element = self.precreate_dataset_collection( substructure, allow_unitialized_element=allow_unitialized_element ) element = model.DatasetCollectionElement( collection=dataset_collection, element=element, element_identifier=identifier, element_index=index, ) elements.append(element) dataset_collection.element_count = len(elements) return dataset_collection
def test_import_export_edit_collection(): """Test modifying existing collections with imports.""" app = _mock_app() sa_session = app.model.context u = model.User(email="*****@*****.**", password="******") h = model.History(name="Test History", user=u) c1 = model.DatasetCollection(collection_type="list", populated=False) hc1 = model.HistoryDatasetCollectionAssociation( history=h, hid=1, collection=c1, name="HistoryCollectionTest1") sa_session.add(hc1) sa_session.add(h) sa_session.flush() import_history = model.History(name="Test History for Import", user=u) sa_session.add(import_history) temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app, for_edit=True) as export_store: export_store.add_dataset_collection(hc1) # Fabric editing metadata for collection... collections_metadata_path = os.path.join(temp_directory, store.ATTRS_FILENAME_COLLECTIONS) datasets_metadata_path = os.path.join(temp_directory, store.ATTRS_FILENAME_DATASETS) with open(collections_metadata_path, "r") as f: hdcas_metadata = json.load(f) assert len(hdcas_metadata) == 1 hdca_metadata = hdcas_metadata[0] assert hdca_metadata assert "id" in hdca_metadata assert "collection" in hdca_metadata collection_metadata = hdca_metadata["collection"] assert "populated_state" in collection_metadata assert collection_metadata[ "populated_state"] == model.DatasetCollection.populated_states.NEW collection_metadata[ "populated_state"] = model.DatasetCollection.populated_states.OK d1 = model.HistoryDatasetAssociation(extension="txt", create_dataset=True, flush=False) d1.hid = 1 d2 = model.HistoryDatasetAssociation(extension="txt", create_dataset=True, flush=False) d2.hid = 2 serialization_options = model.SerializationOptions(for_edit=True) dataset_list = [ d1.serialize(app.security, serialization_options), d2.serialize(app.security, serialization_options) ] dc = model.DatasetCollection( id=collection_metadata["id"], collection_type="list", element_count=2, ) dc.populated_state = model.DatasetCollection.populated_states.OK dce1 = model.DatasetCollectionElement( element=d1, element_index=0, element_identifier="first", ) dce2 = model.DatasetCollectionElement( element=d2, element_index=1, element_identifier="second", ) dc.elements = [dce1, dce2] with open(datasets_metadata_path, "w") as datasets_f: json.dump(dataset_list, datasets_f) hdca_metadata["collection"] = dc.serialize(app.security, serialization_options) with open(collections_metadata_path, "w") as collections_f: json.dump(hdcas_metadata, collections_f) _perform_import_from_directory(temp_directory, app, u, import_history, store.ImportOptions(allow_edit=True)) sa_session.refresh(c1) assert c1.populated_state == model.DatasetCollection.populated_states.OK, c1.populated_state assert len(c1.elements) == 2
def test_export_collection_with_mapping_history(): app, sa_session, h = _setup_history_for_export( "Collection Mapping History") d1, d2, d3, d4 = _create_datasets(sa_session, h, 4) c1 = model.DatasetCollection(collection_type="list") hc1 = model.HistoryDatasetCollectionAssociation( history=h, hid=1, collection=c1, name="HistoryCollectionTest1") dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="el1", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="el2", element_index=1) c2 = model.DatasetCollection(collection_type="list") hc2 = model.HistoryDatasetCollectionAssociation( history=h, hid=2, collection=c2, name="HistoryCollectionTest2") dce3 = model.DatasetCollectionElement(collection=c2, element=d3, element_identifier="el1", element_index=0) dce4 = model.DatasetCollectionElement(collection=c2, element=d4, element_identifier="el2", element_index=1) hc2.add_implicit_input_collection("input1", hc1) j1 = model.Job() j1.user = h.user j1.tool_id = "cat1" j1.add_input_dataset("input1", d1) j1.add_output_dataset("out_file1", d3) j2 = model.Job() j2.user = h.user j2.tool_id = "cat1" j2.add_input_dataset("input1", d2) j2.add_output_dataset("out_file1", d4) sa_session.add(dce1) sa_session.add(dce2) sa_session.add(dce3) sa_session.add(dce4) sa_session.add(hc1) sa_session.add(hc2) sa_session.add(j1) sa_session.add(j2) sa_session.flush() implicit_collection_jobs = model.ImplicitCollectionJobs() j1.add_output_dataset_collection("out_file1", hc2) # really? ija1 = model.ImplicitCollectionJobsJobAssociation() ija1.order_index = 0 ija1.implicit_collection_jobs = implicit_collection_jobs ija1.job = j1 j2.add_output_dataset_collection("out_file1", hc2) # really? ija2 = model.ImplicitCollectionJobsJobAssociation() ija2.order_index = 1 ija2.implicit_collection_jobs = implicit_collection_jobs ija2.job = j2 sa_session.add(implicit_collection_jobs) sa_session.add(ija1) sa_session.add(ija2) sa_session.flush() imported_history = _import_export(app, h) assert len(imported_history.jobs) == 2 imported_job0 = imported_history.jobs[0] imported_icj = imported_job0.implicit_collection_jobs_association.implicit_collection_jobs assert imported_icj assert len(imported_icj.jobs) == 2, len(imported_icj.jobs)
def test_export_collection_history(): app, sa_session, h = _setup_history_for_export("Collection History") d1, d2, d3, d4 = _create_datasets(sa_session, h, 4) c1 = model.DatasetCollection(collection_type="paired") hc1 = model.HistoryDatasetCollectionAssociation( history=h, hid=1, collection=c1, name="HistoryCollectionTest1") dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0) dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1) c2 = model.DatasetCollection(collection_type="list:paired") hc2 = model.HistoryDatasetCollectionAssociation( history=h, hid=2, collection=c2, name="HistoryCollectionTest2") cleaf = model.DatasetCollection(collection_type="paired") dce2leaf1 = model.DatasetCollectionElement(collection=cleaf, element=d3, element_identifier="forward", element_index=0) dce2leaf2 = model.DatasetCollectionElement(collection=cleaf, element=d4, element_identifier="reverse", element_index=1) dce21 = model.DatasetCollectionElement(collection=c2, element=cleaf, element_identifier="listel", element_index=0) j = model.Job() j.user = h.user j.tool_id = "cat1" j.add_input_dataset_collection("input1_collect", hc1) j.add_output_dataset_collection("output_collect", hc2) sa_session.add(dce1) sa_session.add(dce2) sa_session.add(dce21) sa_session.add(dce2leaf1) sa_session.add(dce2leaf2) sa_session.add(hc1) sa_session.add(hc2) sa_session.add(j) sa_session.flush() imported_history = _import_export(app, h) datasets = imported_history.datasets assert len(datasets) == 4 dataset_collections = list( imported_history.contents_iter(types=["dataset_collection"])) assert len(dataset_collections) == 2 imported_hdca1 = dataset_collections[0] imported_hdca2 = dataset_collections[1] imported_collection_2 = imported_hdca2.collection assert imported_hdca1.collection.collection_type == "paired" assert imported_collection_2.collection_type == "list:paired" assert len(imported_collection_2.elements) == 1 imported_top_level_element = imported_collection_2.elements[0] assert imported_top_level_element.element_identifier == "listel", imported_top_level_element.element_identifier assert imported_top_level_element.element_index == 0, imported_top_level_element.element_index imported_nested_collection = imported_top_level_element.child_collection assert len(imported_nested_collection.elements) == 2 assert imported_nested_collection.collection_type == "paired", imported_nested_collection.collection_type assert len(imported_history.jobs) == 1 imported_job = imported_history.jobs[0] assert imported_job assert len(imported_job.input_dataset_collections) == 1, len( imported_job.input_dataset_collections) assert len(imported_job.output_dataset_collection_instances) == 1 assert imported_job.id != j.id