def _new_pair_collection(self):
        hda_forward = self._new_hda(contents="Forward dataset.")
        hda_forward.id = 1
        hda_forward.extension = "txt"
        hda_reverse = self._new_hda(contents="Reverse dataset.")
        hda_reverse.id = 2
        hda_reverse.extension = "txt"

        collection = model.DatasetCollection()
        collection.id = 1
        element_forward = model.DatasetCollectionElement(
            collection=collection,
            element=hda_forward,
            element_index=0,
            element_identifier="forward",
        )
        element_forward.id = 1
        element_reverse = model.DatasetCollectionElement(
            collection=collection,
            element=hda_reverse,
            element_index=0,
            element_identifier="reverse",
        )
        element_reverse.id = 2
        collection.collection_type = "paired"
        return collection
Beispiel #2
0
def test_export_copied_collection():
    app, sa_session, h = _setup_history_for_export("Collection History with copied collection")

    d1, d2 = _create_datasets(sa_session, h, 2)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 4
    dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)

    sa_session.add_all((dce1, dce2, d1, d2, hc1))
    sa_session.flush()

    hc2 = hc1.copy(element_destination=h)
    h.add_pending_items()
    assert h.hid_counter == 7

    sa_session.add(hc2)
    sa_session.flush()

    assert hc2.copied_from_history_dataset_collection_association == hc1

    imported_history = _import_export(app, h)
    assert imported_history.hid_counter == 7

    assert len(imported_history.dataset_collections) == 2
    assert len(imported_history.datasets) == 4

    _assert_distinct_hids(imported_history)
    imported_by_hid = _hid_dict(imported_history)
    assert imported_by_hid[4].copied_from_history_dataset_association == imported_by_hid[1]
    assert imported_by_hid[5].copied_from_history_dataset_association == imported_by_hid[2]
    assert imported_by_hid[6].copied_from_history_dataset_collection_association == imported_by_hid[3]
Beispiel #3
0
def __assert_output_format_is(expected, output, input_extensions=[], param_context=[], add_collection=False):
    inputs = {}
    last_ext = "data"
    i = 1
    for name, ext in input_extensions:
        hda = model.HistoryDatasetAssociation(extension=ext)
        hda.metadata.random_field = str(i)  # Populate a random metadata field for testing
        inputs[name] = hda
        last_ext = ext
        i += 1

    input_collections = {}
    if add_collection:
        hda_forward = model.HistoryDatasetAssociation(extension="txt")
        hda_reverse = model.HistoryDatasetAssociation(extension="txt")
        c1 = model.DatasetCollection(collection_type="pair")
        hc1 = model.HistoryDatasetCollectionAssociation(collection=c1, name="HistoryCollectionTest1")

        dce1 = model.DatasetCollectionElement(collection=c1, element=hda_forward, element_identifier="forward", element_index=0)
        dce2 = model.DatasetCollectionElement(collection=c1, element=hda_reverse, element_identifier="reverse", element_index=1)
        c1.elements = [dce1, dce2]

        input_collections["hdcai"] = [(hc1, False)]

    actual_format = determine_output_format(output, param_context, inputs, input_collections, last_ext)
    assert actual_format == expected, "Actual format %s, does not match expected %s" % (actual_format, expected)
Beispiel #4
0
 def test_dataset_instance_order(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     elements = []
     list_pair = model.DatasetCollection(collection_type="list:paired")
     for i in range(20):
         pair = model.DatasetCollection(collection_type="pair")
         forward = model.HistoryDatasetAssociation(extension="txt", history=h1, name=f"forward_{i}", create_dataset=True, sa_session=model.session)
         reverse = model.HistoryDatasetAssociation(extension="bam", history=h1, name=f"reverse_{i}", create_dataset=True, sa_session=model.session)
         dce1 = model.DatasetCollectionElement(collection=pair, element=forward, element_identifier=f"forward_{i}", element_index=1)
         dce2 = model.DatasetCollectionElement(collection=pair, element=reverse, element_identifier=f"reverse_{i}", element_index=2)
         to_persist = [(forward, reverse), (dce1, dce2)]
         self.persist(pair)
         for item in to_persist:
             if i % 2:
                 self.persist(item[0])
                 self.persist(item[1])
             else:
                 self.persist(item[1])
                 self.persist(item[0])
         elements.append(model.DatasetCollectionElement(collection=list_pair, element=pair, element_index=i, element_identifier=str(i)))
     self.persist(list_pair)
     random.shuffle(elements)
     for item in elements:
         self.persist(item)
     forward = []
     reverse = []
     for i, dataset_instance in enumerate(list_pair.dataset_instances):
         if i % 2:
             reverse.append(dataset_instance)
         else:
             forward.append(dataset_instance)
     assert all(d.name == f"forward_{i}" for i, d in enumerate(forward))
     assert all(d.name == f"reverse_{i}" for i, d in enumerate(reverse))
Beispiel #5
0
    def test_collections_in_histories(self):
        model = self.model

        u = model.User(email="*****@*****.**", password="******")
        h1 = model.History(name="History 1", user=u)
        d1 = model.HistoryDatasetAssociation(extension="txt",
                                             history=h1,
                                             create_dataset=True,
                                             sa_session=model.session)
        d2 = model.HistoryDatasetAssociation(extension="txt",
                                             history=h1,
                                             create_dataset=True,
                                             sa_session=model.session)

        c1 = model.DatasetCollection(collection_type="pair")
        hc1 = model.HistoryDatasetCollectionAssociation(
            history=h1, collection=c1, name="HistoryCollectionTest1")

        dce1 = model.DatasetCollectionElement(collection=c1,
                                              element=d1,
                                              element_identifier="left")
        dce2 = model.DatasetCollectionElement(collection=c1,
                                              element=d2,
                                              element_identifier="right")

        self.persist(u, h1, d1, d2, c1, hc1, dce1, dce2)

        loaded_dataset_collection = self.query(
            model.HistoryDatasetCollectionAssociation).filter(
                model.HistoryDatasetCollectionAssociation.name ==
                "HistoryCollectionTest1").first().collection
        self.assertEqual(len(loaded_dataset_collection.elements), 2)
        assert loaded_dataset_collection.collection_type == "pair"
        assert loaded_dataset_collection["left"] == dce1
        assert loaded_dataset_collection["right"] == dce2
Beispiel #6
0
def test_export_collection_with_copied_datasets_and_overlapping_hids():
    app, sa_session, h = _setup_history_for_export("Collection History with dataset from other history")

    dataset_history = model.History(name="Dataset History", user=h.user)

    d1, d2 = _create_datasets(sa_session, dataset_history, 2)

    sa_session.add(d1)
    sa_session.add(d2)
    sa_session.add(dataset_history)
    sa_session.flush()

    app.object_store.update_from_file(d1, file_name="test-data/1.txt", create=True)
    app.object_store.update_from_file(d2, file_name="test-data/2.bed", create=True)

    d1_copy = d1.copy()
    d2_copy = d2.copy()

    d1_copy.history = h
    d2_copy.history = h

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 5
    dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)

    sa_session.add(dce1)
    sa_session.add(dce2)
    sa_session.add(d1_copy)
    sa_session.add(d2_copy)
    sa_session.add(hc1)
    sa_session.flush()

    _import_export(app, h)
Beispiel #7
0
def test_export_collection_hids():
    app, sa_session, h = _setup_history_for_export("Collection History with dataset from this history")

    d1, d2 = _create_datasets(sa_session, h, 2)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 4
    dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)

    sa_session.add(dce1)
    sa_session.add(dce2)
    sa_session.add(d1)
    sa_session.add(d2)
    sa_session.add(hc1)
    sa_session.flush()

    imported_history = _import_export(app, h)

    assert imported_history.hid_counter == 4, imported_history.hid_counter
    assert len(imported_history.dataset_collections) == 1
    assert len(imported_history.datasets) == 2
    for hdca in imported_history.dataset_collections:
        assert hdca.hid == 3, hdca.hid
    for hda in imported_history.datasets:
        assert hda.hid in [1, 2], hda.hid
    _assert_distinct_hids(imported_history)
def test_export_copied_objects_copied_outside_history():
    app, sa_session, h = _setup_history_for_export("Collection History with copied objects")

    d1, d2 = _create_datasets(sa_session, h, 2)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 4
    dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)

    sa_session.add_all((dce1, dce2, d1, d2, hc1))
    sa_session.flush()

    hc2 = hc1.copy(element_destination=h)
    h.add_dataset_collection(hc2)

    sa_session.add(hc2)

    other_h = model.History(name=h.name + "-other", user=h.user)
    sa_session.add(other_h)

    hc3 = hc2.copy(element_destination=other_h)
    other_h.add_dataset_collection(hc3)
    sa_session.add(hc3)
    sa_session.flush()

    hc4 = hc3.copy(element_destination=h)
    h.add_dataset_collection(hc4)
    sa_session.add(hc4)
    sa_session.flush()

    assert h.hid_counter == 10

    original_by_hid = _hid_dict(h)
    assert original_by_hid[7].copied_from_history_dataset_association != original_by_hid[4]
    assert original_by_hid[8].copied_from_history_dataset_association != original_by_hid[5]
    assert original_by_hid[9].copied_from_history_dataset_collection_association != original_by_hid[6]

    imported_history = _import_export(app, h)

    assert imported_history.hid_counter == 10
    assert len(imported_history.dataset_collections) == 3
    assert len(imported_history.datasets) == 6

    _assert_distinct_hids(imported_history)
    imported_by_hid = _hid_dict(imported_history)
    assert imported_by_hid[4].copied_from_history_dataset_association == imported_by_hid[1]
    assert imported_by_hid[5].copied_from_history_dataset_association == imported_by_hid[2]
    assert imported_by_hid[6].copied_from_history_dataset_collection_association == imported_by_hid[3]

    assert imported_by_hid[7].copied_from_history_dataset_association == imported_by_hid[4]
    assert imported_by_hid[8].copied_from_history_dataset_association == imported_by_hid[5]
    assert imported_by_hid[9].copied_from_history_dataset_collection_association == imported_by_hid[6]
def test_history_collection_copy(list_size=NUM_DATASETS):
    with _setup_mapping_and_user() as (test_config, object_store, model, old_history):
        for i in range(NUM_COLLECTIONS):
            hdas = []
            for i in range(list_size * 2):
                hda_path = test_config.write("moo", "test_metadata_original_%d" % i)
                hda = _create_hda(model, object_store, old_history, hda_path, visible=False, include_metadata_file=False)
                hdas.append(hda)

            list_elements = []
            list_collection = model.DatasetCollection(collection_type="list:paired")
            for j in range(list_size):
                paired_collection = model.DatasetCollection(collection_type="paired")
                forward_dce = model.DatasetCollectionElement(collection=paired_collection, element=hdas[j * 2])
                reverse_dce = model.DatasetCollectionElement(collection=paired_collection, element=hdas[j * 2 + 1])
                paired_collection.elements = [forward_dce, reverse_dce]
                paired_collection_element = model.DatasetCollectionElement(collection=list_collection, element=paired_collection)
                list_elements.append(paired_collection_element)
                model.context.add_all([forward_dce, reverse_dce, paired_collection_element])
            list_collection.elements = list_elements
            history_dataset_collection = model.HistoryDatasetCollectionAssociation(collection=list_collection)
            history_dataset_collection.user = old_history.user
            model.context.add(history_dataset_collection)

            model.context.flush()
            old_history.add_dataset_collection(history_dataset_collection)
            history_dataset_collection.add_item_annotation(model.context, old_history.user, history_dataset_collection, "annotation #%d" % history_dataset_collection.hid)

        model.context.flush()
        annotation_str = history_dataset_collection.get_item_annotation_str(model.context, old_history.user, history_dataset_collection)

        # Saving magic SA invocations for detecting full flushes that may harm performance.
        # from sqlalchemy import event
        # @event.listens_for(model.context, "before_flush")
        # def track_instances_before_flush(session, context, instances):
        #     if not instances:
        #         print("FULL FLUSH...")
        #     else:
        #         print("Flushing just %s" % instances)

        history_copy_timer = ExecutionTimer()
        new_history = old_history.copy(target_user=old_history.user)
        print("history copied %s" % history_copy_timer)

        for i, hda in enumerate(new_history.active_datasets):
            assert hda.get_size() == 3
            annotation_str = hda.get_item_annotation_str(model.context, old_history.user, hda)
            assert annotation_str == "annotation #%d" % hda.hid, annotation_str

        assert len(new_history.active_dataset_collections) == NUM_COLLECTIONS
        for hdca in new_history.active_dataset_collections:
            annotation_str = hdca.get_item_annotation_str(model.context, old_history.user, hdca)
            assert annotation_str == "annotation #%d" % hdca.hid, annotation_str
Beispiel #10
0
 def test_populated_optimized_ok(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     d1 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session)
     d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session)
     c1 = model.DatasetCollection(collection_type='paired')
     dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
     dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)
     model.session.add_all([d1, d2, c1, dce1, dce2])
     model.session.flush()
     assert c1.populated
     assert c1.populated_optimized
Beispiel #11
0
 def test_dataset_dbkeys_and_extensions_summary(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     d1 = model.HistoryDatasetAssociation(extension="bam", dbkey="hg19", history=h1, create_dataset=True, sa_session=model.session)
     d2 = model.HistoryDatasetAssociation(extension="txt", dbkey="hg19", history=h1, create_dataset=True, sa_session=model.session)
     c1 = model.DatasetCollection(collection_type='paired')
     dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
     dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)
     hdca = model.HistoryDatasetCollectionAssociation(collection=c1, history=h1)
     model.session.add_all([d1, d2, c1, dce1, dce2, hdca])
     model.session.flush()
     assert hdca.dataset_dbkeys_and_extensions_summary[0] == {"hg19"}
     assert hdca.dataset_dbkeys_and_extensions_summary[1] == {"bam", "txt"}
Beispiel #12
0
    def precreate_dataset_collection(self,
                                     structure,
                                     allow_unitialized_element=True):
        has_structure = not structure.is_leaf and structure.children_known
        if not has_structure and allow_unitialized_element:
            dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
        elif not has_structure:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
        else:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
            elements = []
            for index, (identifier,
                        substructure) in enumerate(structure.children):
                # TODO: Open question - populate these now or later?
                if substructure.is_leaf:
                    element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
                else:
                    element = self.precreate_dataset_collection(
                        substructure,
                        allow_unitialized_element=allow_unitialized_element)

                element = model.DatasetCollectionElement(
                    element=element,
                    element_identifier=identifier,
                    element_index=index,
                )
                elements.append(element)
            dataset_collection.elements = elements
            dataset_collection.element_count = len(elements)

        return dataset_collection
Beispiel #13
0
    def test_collections_in_library_folders(self):
        model = self.model

        u = model.User(email="*****@*****.**", password="******")
        lf = model.LibraryFolder(name="RootFolder")
        l = model.Library(name="Library1", root_folder=lf)
        ld1 = model.LibraryDataset()
        ld2 = model.LibraryDataset()

        ldda1 = model.LibraryDatasetDatasetAssociation(extension="txt", library_dataset=ld1)
        ldda2 = model.LibraryDatasetDatasetAssociation(extension="txt", library_dataset=ld1)

        c1 = model.DatasetCollection(collection_type="pair")
        dce1 = model.DatasetCollectionElement(collection=c1, element=ldda1)
        dce2 = model.DatasetCollectionElement(collection=c1, element=ldda2)
        self.persist(u, l, lf, ld1, ld2, c1, ldda1, ldda2, dce1, dce2)
 def __init__(self, implicit_output_name=None, job=None, hid=1):
     self.id = 124
     self.copied_from_history_dataset_collection_association = None
     self.history_content_type = "dataset_collection"
     self.implicit_output_name = implicit_output_name
     self.hid = 1
     self.collection = model.DatasetCollection()
     self.creating_job_associations = []
     element = model.DatasetCollectionElement(
         collection=self.collection,
         element=model.HistoryDatasetAssociation(),
         element_index=0,
         element_identifier="moocow",
     )
     element.dataset_instance.dataset = model.Dataset()
     element.dataset_instance.dataset.state = "ok"
     creating = model.JobToOutputDatasetAssociation(
         implicit_output_name,
         element.dataset_instance,
     )
     creating.job = job
     element.dataset_instance.creating_job_associations = [
         creating,
     ]
     self.collection.elements = [element]
Beispiel #15
0
 def test_nested_collection_attributes(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     d1 = model.HistoryDatasetAssociation(extension="bam", history=h1, create_dataset=True, sa_session=model.session)
     index = NamedTemporaryFile("w")
     index.write("cool bam index")
     index2 = NamedTemporaryFile("w")
     index2.write("cool bam index 2")
     metadata_dict = {"bam_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index.name}), "bam_csi_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index2.name})}
     d1.metadata.from_JSON_dict(json_dict=metadata_dict)
     assert d1.metadata.bam_index
     assert d1.metadata.bam_csi_index
     assert isinstance(d1.metadata.bam_index, model.MetadataFile)
     assert isinstance(d1.metadata.bam_csi_index, model.MetadataFile)
     d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session)
     c1 = model.DatasetCollection(collection_type='paired')
     dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
     dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)
     c2 = model.DatasetCollection(collection_type="list:paired")
     dce3 = model.DatasetCollectionElement(collection=c2, element=c1, element_identifier="inner_list", element_index=0)
     c3 = model.DatasetCollection(collection_type="list:list")
     c4 = model.DatasetCollection(collection_type="list:list:paired")
     dce4 = model.DatasetCollectionElement(collection=c4, element=c2, element_identifier="outer_list", element_index=0)
     model.session.add_all([d1, d2, c1, dce1, dce2, c2, dce3, c3, c4, dce4])
     model.session.flush()
     q = c2._get_nested_collection_attributes(element_attributes=('element_identifier',), hda_attributes=('extension',), dataset_attributes=('state',))
     assert [(r.keys()) for r in q] == [['element_identifier_0', 'element_identifier_1', 'extension', 'state'], ['element_identifier_0', 'element_identifier_1', 'extension', 'state']]
     assert q.all() == [('inner_list', 'forward', 'bam', 'new'), ('inner_list', 'reverse', 'txt', 'new')]
     q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation,))
     assert q.all() == [d1, d2]
     q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation, model.Dataset))
     assert q.all() == [(d1, d1.dataset), (d2, d2.dataset)]
     # Assert properties that use _get_nested_collection_attributes return correct content
     assert c2.dataset_instances == [d1, d2]
     assert c2.dataset_elements == [dce1, dce2]
     assert c2.dataset_action_tuples == []
     assert c2.populated_optimized
     assert c2.dataset_states_and_extensions_summary == ({'new'}, {'txt', 'bam'})
     assert c2.element_identifiers_extensions_paths_and_metadata_files == [[('inner_list', 'forward'), 'bam', 'mock_dataset_14.dat', [('bai', 'mock_dataset_14.dat'), ('bam.csi', 'mock_dataset_14.dat')]], [('inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat', []]]
     assert c3.dataset_instances == []
     assert c3.dataset_elements == []
     assert c3.dataset_states_and_extensions_summary == (set(), set())
     q = c4._get_nested_collection_attributes(element_attributes=('element_identifier',))
     assert q.all() == [('outer_list', 'inner_list', 'forward'), ('outer_list', 'inner_list', 'reverse')]
     assert c4.dataset_elements == [dce1, dce2]
     assert c4.element_identifiers_extensions_and_paths == [(('outer_list', 'inner_list', 'forward'), 'bam', 'mock_dataset_14.dat'), (('outer_list', 'inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat')]
Beispiel #16
0
 def test_populated_optimized_empty_list_list_ok(self):
     model = self.model
     c1 = model.DatasetCollection(collection_type='list')
     c2 = model.DatasetCollection(collection_type='list:list')
     dce1 = model.DatasetCollectionElement(collection=c2, element=c1, element_identifier="empty_list", element_index=0)
     model.session.add_all([c1, c2, dce1])
     model.session.flush()
     assert c1.populated
     assert c1.populated_optimized
     assert c2.populated
     assert c2.populated_optimized
Beispiel #17
0
 def test_collection_get_interface(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     d1 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session)
     c1 = model.DatasetCollection(collection_type="list")
     elements = 100
     dces = [model.DatasetCollectionElement(collection=c1, element=d1, element_identifier=f"{i}", element_index=i) for i in range(elements)]
     self.persist(u, h1, d1, c1, *dces, flush=False, expunge=False)
     model.session.flush()
     for i in range(elements):
         assert c1[i] == dces[i]
Beispiel #18
0
    def precreate_dataset_collection(self,
                                     structure,
                                     allow_unitialized_element=True,
                                     completed_collection=None,
                                     implicit_output_name=None):
        has_structure = not structure.is_leaf and structure.children_known
        if not has_structure and allow_unitialized_element:
            dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
        elif not has_structure:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
        else:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
            elements = []
            for index, (identifier,
                        substructure) in enumerate(structure.children):
                # TODO: Open question - populate these now or later?
                element = None
                if completed_collection and implicit_output_name:
                    job = completed_collection[index]
                    if job:
                        it = (jtiodca.dataset_collection
                              for jtiodca in job.output_dataset_collections
                              if jtiodca.name == implicit_output_name)
                        element = next(it, None)
                if element is None:
                    if substructure.is_leaf:
                        element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
                    else:
                        element = self.precreate_dataset_collection(
                            substructure,
                            allow_unitialized_element=allow_unitialized_element
                        )

                element = model.DatasetCollectionElement(
                    collection=dataset_collection,
                    element=element,
                    element_identifier=identifier,
                    element_index=index,
                )
                elements.append(element)
            dataset_collection.element_count = len(elements)

        return dataset_collection
Beispiel #19
0
def test_import_export_edit_collection():
    """Test modifying existing collections with imports."""
    app = _mock_app()
    sa_session = app.model.context

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    c1 = model.DatasetCollection(collection_type="list", populated=False)
    hc1 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=1, collection=c1, name="HistoryCollectionTest1")

    sa_session.add(hc1)
    sa_session.add(h)
    sa_session.flush()

    import_history = model.History(name="Test History for Import", user=u)
    sa_session.add(import_history)

    temp_directory = mkdtemp()
    with store.DirectoryModelExportStore(temp_directory,
                                         app=app,
                                         for_edit=True) as export_store:
        export_store.add_dataset_collection(hc1)

    # Fabric editing metadata for collection...
    collections_metadata_path = os.path.join(temp_directory,
                                             store.ATTRS_FILENAME_COLLECTIONS)
    datasets_metadata_path = os.path.join(temp_directory,
                                          store.ATTRS_FILENAME_DATASETS)
    with open(collections_metadata_path, "r") as f:
        hdcas_metadata = json.load(f)

    assert len(hdcas_metadata) == 1
    hdca_metadata = hdcas_metadata[0]
    assert hdca_metadata
    assert "id" in hdca_metadata
    assert "collection" in hdca_metadata
    collection_metadata = hdca_metadata["collection"]
    assert "populated_state" in collection_metadata
    assert collection_metadata[
        "populated_state"] == model.DatasetCollection.populated_states.NEW

    collection_metadata[
        "populated_state"] = model.DatasetCollection.populated_states.OK

    d1 = model.HistoryDatasetAssociation(extension="txt",
                                         create_dataset=True,
                                         flush=False)
    d1.hid = 1
    d2 = model.HistoryDatasetAssociation(extension="txt",
                                         create_dataset=True,
                                         flush=False)
    d2.hid = 2
    serialization_options = model.SerializationOptions(for_edit=True)
    dataset_list = [
        d1.serialize(app.security, serialization_options),
        d2.serialize(app.security, serialization_options)
    ]

    dc = model.DatasetCollection(
        id=collection_metadata["id"],
        collection_type="list",
        element_count=2,
    )
    dc.populated_state = model.DatasetCollection.populated_states.OK
    dce1 = model.DatasetCollectionElement(
        element=d1,
        element_index=0,
        element_identifier="first",
    )
    dce2 = model.DatasetCollectionElement(
        element=d2,
        element_index=1,
        element_identifier="second",
    )
    dc.elements = [dce1, dce2]
    with open(datasets_metadata_path, "w") as datasets_f:
        json.dump(dataset_list, datasets_f)

    hdca_metadata["collection"] = dc.serialize(app.security,
                                               serialization_options)
    with open(collections_metadata_path, "w") as collections_f:
        json.dump(hdcas_metadata, collections_f)

    _perform_import_from_directory(temp_directory, app, u, import_history,
                                   store.ImportOptions(allow_edit=True))

    sa_session.refresh(c1)
    assert c1.populated_state == model.DatasetCollection.populated_states.OK, c1.populated_state
    assert len(c1.elements) == 2
Beispiel #20
0
def test_export_collection_with_mapping_history():
    app, sa_session, h = _setup_history_for_export(
        "Collection Mapping History")

    d1, d2, d3, d4 = _create_datasets(sa_session, h, 4)

    c1 = model.DatasetCollection(collection_type="list")
    hc1 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=1, collection=c1, name="HistoryCollectionTest1")
    dce1 = model.DatasetCollectionElement(collection=c1,
                                          element=d1,
                                          element_identifier="el1",
                                          element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1,
                                          element=d2,
                                          element_identifier="el2",
                                          element_index=1)

    c2 = model.DatasetCollection(collection_type="list")
    hc2 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=2, collection=c2, name="HistoryCollectionTest2")
    dce3 = model.DatasetCollectionElement(collection=c2,
                                          element=d3,
                                          element_identifier="el1",
                                          element_index=0)
    dce4 = model.DatasetCollectionElement(collection=c2,
                                          element=d4,
                                          element_identifier="el2",
                                          element_index=1)

    hc2.add_implicit_input_collection("input1", hc1)

    j1 = model.Job()
    j1.user = h.user
    j1.tool_id = "cat1"
    j1.add_input_dataset("input1", d1)
    j1.add_output_dataset("out_file1", d3)

    j2 = model.Job()
    j2.user = h.user
    j2.tool_id = "cat1"
    j2.add_input_dataset("input1", d2)
    j2.add_output_dataset("out_file1", d4)

    sa_session.add(dce1)
    sa_session.add(dce2)
    sa_session.add(dce3)
    sa_session.add(dce4)
    sa_session.add(hc1)
    sa_session.add(hc2)
    sa_session.add(j1)
    sa_session.add(j2)
    sa_session.flush()

    implicit_collection_jobs = model.ImplicitCollectionJobs()
    j1.add_output_dataset_collection("out_file1", hc2)  # really?
    ija1 = model.ImplicitCollectionJobsJobAssociation()
    ija1.order_index = 0
    ija1.implicit_collection_jobs = implicit_collection_jobs
    ija1.job = j1

    j2.add_output_dataset_collection("out_file1", hc2)  # really?
    ija2 = model.ImplicitCollectionJobsJobAssociation()
    ija2.order_index = 1
    ija2.implicit_collection_jobs = implicit_collection_jobs
    ija2.job = j2

    sa_session.add(implicit_collection_jobs)
    sa_session.add(ija1)
    sa_session.add(ija2)
    sa_session.flush()

    imported_history = _import_export(app, h)
    assert len(imported_history.jobs) == 2
    imported_job0 = imported_history.jobs[0]

    imported_icj = imported_job0.implicit_collection_jobs_association.implicit_collection_jobs
    assert imported_icj
    assert len(imported_icj.jobs) == 2, len(imported_icj.jobs)
Beispiel #21
0
def test_export_collection_history():
    app, sa_session, h = _setup_history_for_export("Collection History")

    d1, d2, d3, d4 = _create_datasets(sa_session, h, 4)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=1, collection=c1, name="HistoryCollectionTest1")

    dce1 = model.DatasetCollectionElement(collection=c1,
                                          element=d1,
                                          element_identifier="forward",
                                          element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1,
                                          element=d2,
                                          element_identifier="reverse",
                                          element_index=1)

    c2 = model.DatasetCollection(collection_type="list:paired")
    hc2 = model.HistoryDatasetCollectionAssociation(
        history=h, hid=2, collection=c2, name="HistoryCollectionTest2")

    cleaf = model.DatasetCollection(collection_type="paired")
    dce2leaf1 = model.DatasetCollectionElement(collection=cleaf,
                                               element=d3,
                                               element_identifier="forward",
                                               element_index=0)
    dce2leaf2 = model.DatasetCollectionElement(collection=cleaf,
                                               element=d4,
                                               element_identifier="reverse",
                                               element_index=1)

    dce21 = model.DatasetCollectionElement(collection=c2,
                                           element=cleaf,
                                           element_identifier="listel",
                                           element_index=0)

    j = model.Job()
    j.user = h.user
    j.tool_id = "cat1"
    j.add_input_dataset_collection("input1_collect", hc1)
    j.add_output_dataset_collection("output_collect", hc2)

    sa_session.add(dce1)
    sa_session.add(dce2)
    sa_session.add(dce21)
    sa_session.add(dce2leaf1)
    sa_session.add(dce2leaf2)
    sa_session.add(hc1)
    sa_session.add(hc2)
    sa_session.add(j)
    sa_session.flush()

    imported_history = _import_export(app, h)

    datasets = imported_history.datasets
    assert len(datasets) == 4

    dataset_collections = list(
        imported_history.contents_iter(types=["dataset_collection"]))
    assert len(dataset_collections) == 2

    imported_hdca1 = dataset_collections[0]
    imported_hdca2 = dataset_collections[1]

    imported_collection_2 = imported_hdca2.collection
    assert imported_hdca1.collection.collection_type == "paired"
    assert imported_collection_2.collection_type == "list:paired"

    assert len(imported_collection_2.elements) == 1
    imported_top_level_element = imported_collection_2.elements[0]
    assert imported_top_level_element.element_identifier == "listel", imported_top_level_element.element_identifier
    assert imported_top_level_element.element_index == 0, imported_top_level_element.element_index
    imported_nested_collection = imported_top_level_element.child_collection
    assert len(imported_nested_collection.elements) == 2
    assert imported_nested_collection.collection_type == "paired", imported_nested_collection.collection_type

    assert len(imported_history.jobs) == 1
    imported_job = imported_history.jobs[0]
    assert imported_job
    assert len(imported_job.input_dataset_collections) == 1, len(
        imported_job.input_dataset_collections)
    assert len(imported_job.output_dataset_collection_instances) == 1
    assert imported_job.id != j.id