Example #1
0
 def test_dataset_instance_order(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     elements = []
     list_pair = model.DatasetCollection(collection_type="list:paired")
     for i in range(20):
         pair = model.DatasetCollection(collection_type="pair")
         forward = model.HistoryDatasetAssociation(extension="txt", history=h1, name=f"forward_{i}", create_dataset=True, sa_session=model.session)
         reverse = model.HistoryDatasetAssociation(extension="bam", history=h1, name=f"reverse_{i}", create_dataset=True, sa_session=model.session)
         dce1 = model.DatasetCollectionElement(collection=pair, element=forward, element_identifier=f"forward_{i}", element_index=1)
         dce2 = model.DatasetCollectionElement(collection=pair, element=reverse, element_identifier=f"reverse_{i}", element_index=2)
         to_persist = [(forward, reverse), (dce1, dce2)]
         self.persist(pair)
         for item in to_persist:
             if i % 2:
                 self.persist(item[0])
                 self.persist(item[1])
             else:
                 self.persist(item[1])
                 self.persist(item[0])
         elements.append(model.DatasetCollectionElement(collection=list_pair, element=pair, element_index=i, element_identifier=str(i)))
     self.persist(list_pair)
     random.shuffle(elements)
     for item in elements:
         self.persist(item)
     forward = []
     reverse = []
     for i, dataset_instance in enumerate(list_pair.dataset_instances):
         if i % 2:
             reverse.append(dataset_instance)
         else:
             forward.append(dataset_instance)
     assert all(d.name == f"forward_{i}" for i, d in enumerate(forward))
     assert all(d.name == f"reverse_{i}" for i, d in enumerate(reverse))
Example #2
0
    def precreate_dataset_collection(self,
                                     structure,
                                     allow_unitialized_element=True):
        has_structure = not structure.is_leaf and structure.children_known
        if not has_structure and allow_unitialized_element:
            dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
        elif not has_structure:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
        else:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
            elements = []
            for index, (identifier,
                        substructure) in enumerate(structure.children):
                # TODO: Open question - populate these now or later?
                if substructure.is_leaf:
                    element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
                else:
                    element = self.precreate_dataset_collection(
                        substructure,
                        allow_unitialized_element=allow_unitialized_element)

                element = model.DatasetCollectionElement(
                    element=element,
                    element_identifier=identifier,
                    element_index=index,
                )
                elements.append(element)
            dataset_collection.elements = elements
            dataset_collection.element_count = len(elements)

        return dataset_collection
Example #3
0
 def test_populated_optimized_empty_list_list_ok(self):
     model = self.model
     c1 = model.DatasetCollection(collection_type='list')
     c2 = model.DatasetCollection(collection_type='list:list')
     dce1 = model.DatasetCollectionElement(collection=c2, element=c1, element_identifier="empty_list", element_index=0)
     model.session.add_all([c1, c2, dce1])
     model.session.flush()
     assert c1.populated
     assert c1.populated_optimized
     assert c2.populated
     assert c2.populated_optimized
def test_history_collection_copy(list_size=NUM_DATASETS):
    with _setup_mapping_and_user() as (test_config, object_store, model, old_history):
        for i in range(NUM_COLLECTIONS):
            hdas = []
            for i in range(list_size * 2):
                hda_path = test_config.write("moo", "test_metadata_original_%d" % i)
                hda = _create_hda(model, object_store, old_history, hda_path, visible=False, include_metadata_file=False)
                hdas.append(hda)

            list_elements = []
            list_collection = model.DatasetCollection(collection_type="list:paired")
            for j in range(list_size):
                paired_collection = model.DatasetCollection(collection_type="paired")
                forward_dce = model.DatasetCollectionElement(collection=paired_collection, element=hdas[j * 2])
                reverse_dce = model.DatasetCollectionElement(collection=paired_collection, element=hdas[j * 2 + 1])
                paired_collection.elements = [forward_dce, reverse_dce]
                paired_collection_element = model.DatasetCollectionElement(collection=list_collection, element=paired_collection)
                list_elements.append(paired_collection_element)
                model.context.add_all([forward_dce, reverse_dce, paired_collection_element])
            list_collection.elements = list_elements
            history_dataset_collection = model.HistoryDatasetCollectionAssociation(collection=list_collection)
            history_dataset_collection.user = old_history.user
            model.context.add(history_dataset_collection)

            model.context.flush()
            old_history.add_dataset_collection(history_dataset_collection)
            history_dataset_collection.add_item_annotation(model.context, old_history.user, history_dataset_collection, "annotation #%d" % history_dataset_collection.hid)

        model.context.flush()
        annotation_str = history_dataset_collection.get_item_annotation_str(model.context, old_history.user, history_dataset_collection)

        # Saving magic SA invocations for detecting full flushes that may harm performance.
        # from sqlalchemy import event
        # @event.listens_for(model.context, "before_flush")
        # def track_instances_before_flush(session, context, instances):
        #     if not instances:
        #         print("FULL FLUSH...")
        #     else:
        #         print("Flushing just %s" % instances)

        history_copy_timer = ExecutionTimer()
        new_history = old_history.copy(target_user=old_history.user)
        print("history copied %s" % history_copy_timer)

        for i, hda in enumerate(new_history.active_datasets):
            assert hda.get_size() == 3
            annotation_str = hda.get_item_annotation_str(model.context, old_history.user, hda)
            assert annotation_str == "annotation #%d" % hda.hid, annotation_str

        assert len(new_history.active_dataset_collections) == NUM_COLLECTIONS
        for hdca in new_history.active_dataset_collections:
            annotation_str = hdca.get_item_annotation_str(model.context, old_history.user, hdca)
            assert annotation_str == "annotation #%d" % hdca.hid, annotation_str
Example #5
0
def __assert_output_format_is(expected, output, input_extensions=[], param_context=[], add_collection=False):
    inputs = {}
    last_ext = "data"
    i = 1
    for name, ext in input_extensions:
        hda = model.HistoryDatasetAssociation(extension=ext)
        hda.metadata.random_field = str(i)  # Populate a random metadata field for testing
        inputs[name] = hda
        last_ext = ext
        i += 1

    input_collections = {}
    if add_collection:
        hda_forward = model.HistoryDatasetAssociation(extension="txt")
        hda_reverse = model.HistoryDatasetAssociation(extension="txt")
        c1 = model.DatasetCollection(collection_type="pair")
        hc1 = model.HistoryDatasetCollectionAssociation(collection=c1, name="HistoryCollectionTest1")

        dce1 = model.DatasetCollectionElement(collection=c1, element=hda_forward, element_identifier="forward", element_index=0)
        dce2 = model.DatasetCollectionElement(collection=c1, element=hda_reverse, element_identifier="reverse", element_index=1)
        c1.elements = [dce1, dce2]

        input_collections["hdcai"] = [(hc1, False)]

    actual_format = determine_output_format(output, param_context, inputs, input_collections, last_ext)
    assert actual_format == expected, "Actual format %s, does not match expected %s" % (actual_format, expected)
Example #6
0
def test_export_copied_collection():
    app, sa_session, h = _setup_history_for_export("Collection History with copied collection")

    d1, d2 = _create_datasets(sa_session, h, 2)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 4
    dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)

    sa_session.add_all((dce1, dce2, d1, d2, hc1))
    sa_session.flush()

    hc2 = hc1.copy(element_destination=h)
    h.add_pending_items()
    assert h.hid_counter == 7

    sa_session.add(hc2)
    sa_session.flush()

    assert hc2.copied_from_history_dataset_collection_association == hc1

    imported_history = _import_export(app, h)
    assert imported_history.hid_counter == 7

    assert len(imported_history.dataset_collections) == 2
    assert len(imported_history.datasets) == 4

    _assert_distinct_hids(imported_history)
    imported_by_hid = _hid_dict(imported_history)
    assert imported_by_hid[4].copied_from_history_dataset_association == imported_by_hid[1]
    assert imported_by_hid[5].copied_from_history_dataset_association == imported_by_hid[2]
    assert imported_by_hid[6].copied_from_history_dataset_collection_association == imported_by_hid[3]
Example #7
0
    def test_collections_in_histories(self):
        model = self.model

        u = model.User(email="*****@*****.**", password="******")
        h1 = model.History(name="History 1", user=u)
        d1 = model.HistoryDatasetAssociation(extension="txt",
                                             history=h1,
                                             create_dataset=True,
                                             sa_session=model.session)
        d2 = model.HistoryDatasetAssociation(extension="txt",
                                             history=h1,
                                             create_dataset=True,
                                             sa_session=model.session)

        c1 = model.DatasetCollection(collection_type="pair")
        hc1 = model.HistoryDatasetCollectionAssociation(
            history=h1, collection=c1, name="HistoryCollectionTest1")

        dce1 = model.DatasetCollectionElement(collection=c1,
                                              element=d1,
                                              element_identifier="left")
        dce2 = model.DatasetCollectionElement(collection=c1,
                                              element=d2,
                                              element_identifier="right")

        self.persist(u, h1, d1, d2, c1, hc1, dce1, dce2)

        loaded_dataset_collection = self.query(
            model.HistoryDatasetCollectionAssociation).filter(
                model.HistoryDatasetCollectionAssociation.name ==
                "HistoryCollectionTest1").first().collection
        self.assertEqual(len(loaded_dataset_collection.elements), 2)
        assert loaded_dataset_collection.collection_type == "pair"
        assert loaded_dataset_collection["left"] == dce1
        assert loaded_dataset_collection["right"] == dce2
Example #8
0
    def __create_dataset_collection(
        self,
        trans,
        collection_type,
        element_identifiers=None,
        elements=None,
    ):
        if element_identifiers is None and elements is None:
            raise RequestParameterInvalidException(
                ERROR_INVALID_ELEMENTS_SPECIFICATION)
        if not collection_type:
            raise RequestParameterInvalidException(ERROR_NO_COLLECTION_TYPE)
        collection_type_description = self.collection_type_descriptions.for_collection_type(
            collection_type)
        # If we have elements, this is an internal request, don't need to load
        # objects from identifiers.
        if elements is None:
            if collection_type_description.has_subcollections():
                # Nested collection - recursively create collections and update identifiers.
                self.__recursively_create_collections(trans,
                                                      element_identifiers)
            elements = self.__load_elements(trans, element_identifiers)
        # else if elements is set, it better be an ordered dict!

        if elements is not self.ELEMENTS_UNINITIALIZED:
            type_plugin = collection_type_description.rank_type_plugin()
            dataset_collection = builder.build_collection(
                type_plugin, elements)
        else:
            dataset_collection = model.DatasetCollection(populated=False)
        dataset_collection.collection_type = collection_type
        return dataset_collection
Example #9
0
    def create_dataset_collection(self, trans, collection_type, element_identifiers=None, elements=None,
                                  hide_source_items=None):
        if element_identifiers is None and elements is None:
            raise RequestParameterInvalidException(ERROR_INVALID_ELEMENTS_SPECIFICATION)
        if not collection_type:
            raise RequestParameterInvalidException(ERROR_NO_COLLECTION_TYPE)
        collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)
        # If we have elements, this is an internal request, don't need to load
        # objects from identifiers.
        if elements is None:
            if collection_type_description.has_subcollections():
                # Nested collection - recursively create collections and update identifiers.
                self.__recursively_create_collections(trans, element_identifiers)
            new_collection = False
            for element_identifier in element_identifiers:
                if element_identifier.get("src") == "new_collection" and element_identifier.get('collection_type') == '':
                    new_collection = True
                    elements = self.__load_elements(trans, element_identifier['element_identifiers'])
            if not new_collection:
                elements = self.__load_elements(trans, element_identifiers)

        # else if elements is set, it better be an ordered dict!

        if elements is not self.ELEMENTS_UNINITIALIZED:
            type_plugin = collection_type_description.rank_type_plugin()
            dataset_collection = builder.build_collection(type_plugin, elements)
            if hide_source_items:
                log.debug("Hiding source items during dataset collection creation")
                for dataset in dataset_collection.dataset_instances:
                    dataset.visible = False
        else:
            dataset_collection = model.DatasetCollection(populated=False)
        dataset_collection.collection_type = collection_type
        return dataset_collection
Example #10
0
 def _new_collection_for_elements(self, elements):
     dataset_collection = model.DatasetCollection()
     for index, element in enumerate(elements):
         element.element_index = index
         element.collection = dataset_collection
     dataset_collection.elements = elements
     return dataset_collection
Example #11
0
    def _new_pair_collection(self):
        hda_forward = self._new_hda(contents="Forward dataset.")
        hda_forward.id = 1
        hda_forward.extension = "txt"
        hda_reverse = self._new_hda(contents="Reverse dataset.")
        hda_reverse.id = 2
        hda_reverse.extension = "txt"

        collection = model.DatasetCollection()
        collection.id = 1
        element_forward = model.DatasetCollectionElement(
            collection=collection,
            element=hda_forward,
            element_index=0,
            element_identifier="forward",
        )
        element_forward.id = 1
        element_reverse = model.DatasetCollectionElement(
            collection=collection,
            element=hda_reverse,
            element_index=0,
            element_identifier="reverse",
        )
        element_reverse.id = 2
        collection.collection_type = "paired"
        return collection
 def __init__(self, implicit_output_name=None, job=None, hid=1):
     self.id = 124
     self.copied_from_history_dataset_collection_association = None
     self.history_content_type = "dataset_collection"
     self.implicit_output_name = implicit_output_name
     self.hid = 1
     self.collection = model.DatasetCollection()
     self.creating_job_associations = []
     element = model.DatasetCollectionElement(
         collection=self.collection,
         element=model.HistoryDatasetAssociation(),
         element_index=0,
         element_identifier="moocow",
     )
     element.dataset_instance.dataset = model.Dataset()
     element.dataset_instance.dataset.state = "ok"
     creating = model.JobToOutputDatasetAssociation(
         implicit_output_name,
         element.dataset_instance,
     )
     creating.job = job
     element.dataset_instance.creating_job_associations = [
         creating,
     ]
     self.collection.elements = [element]
Example #13
0
def test_export_collection_with_copied_datasets_and_overlapping_hids():
    app, sa_session, h = _setup_history_for_export("Collection History with dataset from other history")

    dataset_history = model.History(name="Dataset History", user=h.user)

    d1, d2 = _create_datasets(sa_session, dataset_history, 2)

    sa_session.add(d1)
    sa_session.add(d2)
    sa_session.add(dataset_history)
    sa_session.flush()

    app.object_store.update_from_file(d1, file_name="test-data/1.txt", create=True)
    app.object_store.update_from_file(d2, file_name="test-data/2.bed", create=True)

    d1_copy = d1.copy()
    d2_copy = d2.copy()

    d1_copy.history = h
    d2_copy.history = h

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 5
    dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)

    sa_session.add(dce1)
    sa_session.add(dce2)
    sa_session.add(d1_copy)
    sa_session.add(d2_copy)
    sa_session.add(hc1)
    sa_session.flush()

    _import_export(app, h)
Example #14
0
    def create_dataset_collection(self, trans, collection_type, element_identifiers=None, elements=None,
                                  hide_source_items=None, copy_elements=False, history=None):
        # Make sure at least one of these is None.
        assert element_identifiers is None or elements is None

        if element_identifiers is None and elements is None:
            raise RequestParameterInvalidException(ERROR_INVALID_ELEMENTS_SPECIFICATION)
        if not collection_type:
            raise RequestParameterInvalidException(ERROR_NO_COLLECTION_TYPE)

        collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)
        has_subcollections = collection_type_description.has_subcollections()
        # If we have elements, this is an internal request, don't need to load
        # objects from identifiers.
        if elements is None:
            elements = self._element_identifiers_to_elements(trans,
                                                             collection_type_description=collection_type_description,
                                                             element_identifiers=element_identifiers,
                                                             hide_source_items=hide_source_items,
                                                             copy_elements=copy_elements,
                                                             history=history)
        else:
            if has_subcollections:
                # Nested collection - recursively create collections as needed.
                self.__recursively_create_collections_for_elements(trans, elements, hide_source_items, copy_elements=copy_elements, history=history)
        # else if elements is set, it better be an ordered dict!

        if elements is not self.ELEMENTS_UNINITIALIZED:
            type_plugin = collection_type_description.rank_type_plugin()
            dataset_collection = builder.build_collection(type_plugin, elements)
        else:
            dataset_collection = model.DatasetCollection(populated=False)
        dataset_collection.collection_type = collection_type
        return dataset_collection
Example #15
0
def test_export_collection_hids():
    app, sa_session, h = _setup_history_for_export("Collection History with dataset from this history")

    d1, d2 = _create_datasets(sa_session, h, 2)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 4
    dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)

    sa_session.add(dce1)
    sa_session.add(dce2)
    sa_session.add(d1)
    sa_session.add(d2)
    sa_session.add(hc1)
    sa_session.flush()

    imported_history = _import_export(app, h)

    assert imported_history.hid_counter == 4, imported_history.hid_counter
    assert len(imported_history.dataset_collections) == 1
    assert len(imported_history.datasets) == 2
    for hdca in imported_history.dataset_collections:
        assert hdca.hid == 3, hdca.hid
    for hda in imported_history.datasets:
        assert hda.hid in [1, 2], hda.hid
    _assert_distinct_hids(imported_history)
Example #16
0
    def create_dataset_collection(self, trans, collection_type, element_identifiers=None, elements=None,
                                  hide_source_items=None):
        # Make sure at least one of these is None.
        assert element_identifiers is None or elements is None

        if element_identifiers is None and elements is None:
            raise RequestParameterInvalidException(ERROR_INVALID_ELEMENTS_SPECIFICATION)
        if not collection_type:
            raise RequestParameterInvalidException(ERROR_NO_COLLECTION_TYPE)

        collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)

        # If we have elements, this is an internal request, don't need to load
        # objects from identifiers.
        if elements is None:
            elements = self._element_identifiers_to_elements(trans, collection_type_description, element_identifiers)
        # else if elements is set, it better be an ordered dict!

        if elements is not self.ELEMENTS_UNINITIALIZED:
            type_plugin = collection_type_description.rank_type_plugin()
            dataset_collection = builder.build_collection(type_plugin, elements)
            if hide_source_items:
                log.debug("Hiding source items during dataset collection creation")
                for dataset in dataset_collection.dataset_instances:
                    dataset.visible = False
        else:
            dataset_collection = model.DatasetCollection(populated=False)
        dataset_collection.collection_type = collection_type
        return dataset_collection
Example #17
0
 def test_nested_collection_attributes(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     d1 = model.HistoryDatasetAssociation(extension="bam", history=h1, create_dataset=True, sa_session=model.session)
     index = NamedTemporaryFile("w")
     index.write("cool bam index")
     index2 = NamedTemporaryFile("w")
     index2.write("cool bam index 2")
     metadata_dict = {"bam_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index.name}), "bam_csi_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index2.name})}
     d1.metadata.from_JSON_dict(json_dict=metadata_dict)
     assert d1.metadata.bam_index
     assert d1.metadata.bam_csi_index
     assert isinstance(d1.metadata.bam_index, model.MetadataFile)
     assert isinstance(d1.metadata.bam_csi_index, model.MetadataFile)
     d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session)
     c1 = model.DatasetCollection(collection_type='paired')
     dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
     dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)
     c2 = model.DatasetCollection(collection_type="list:paired")
     dce3 = model.DatasetCollectionElement(collection=c2, element=c1, element_identifier="inner_list", element_index=0)
     c3 = model.DatasetCollection(collection_type="list:list")
     c4 = model.DatasetCollection(collection_type="list:list:paired")
     dce4 = model.DatasetCollectionElement(collection=c4, element=c2, element_identifier="outer_list", element_index=0)
     model.session.add_all([d1, d2, c1, dce1, dce2, c2, dce3, c3, c4, dce4])
     model.session.flush()
     q = c2._get_nested_collection_attributes(element_attributes=('element_identifier',), hda_attributes=('extension',), dataset_attributes=('state',))
     assert [(r.keys()) for r in q] == [['element_identifier_0', 'element_identifier_1', 'extension', 'state'], ['element_identifier_0', 'element_identifier_1', 'extension', 'state']]
     assert q.all() == [('inner_list', 'forward', 'bam', 'new'), ('inner_list', 'reverse', 'txt', 'new')]
     q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation,))
     assert q.all() == [d1, d2]
     q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation, model.Dataset))
     assert q.all() == [(d1, d1.dataset), (d2, d2.dataset)]
     # Assert properties that use _get_nested_collection_attributes return correct content
     assert c2.dataset_instances == [d1, d2]
     assert c2.dataset_elements == [dce1, dce2]
     assert c2.dataset_action_tuples == []
     assert c2.populated_optimized
     assert c2.dataset_states_and_extensions_summary == ({'new'}, {'txt', 'bam'})
     assert c2.element_identifiers_extensions_paths_and_metadata_files == [[('inner_list', 'forward'), 'bam', 'mock_dataset_14.dat', [('bai', 'mock_dataset_14.dat'), ('bam.csi', 'mock_dataset_14.dat')]], [('inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat', []]]
     assert c3.dataset_instances == []
     assert c3.dataset_elements == []
     assert c3.dataset_states_and_extensions_summary == (set(), set())
     q = c4._get_nested_collection_attributes(element_attributes=('element_identifier',))
     assert q.all() == [('outer_list', 'inner_list', 'forward'), ('outer_list', 'inner_list', 'reverse')]
     assert c4.dataset_elements == [dce1, dce2]
     assert c4.element_identifiers_extensions_and_paths == [(('outer_list', 'inner_list', 'forward'), 'bam', 'mock_dataset_14.dat'), (('outer_list', 'inner_list', 'reverse'), 'txt', 'mock_dataset_14.dat')]
Example #18
0
    def precreate_dataset_collection(self,
                                     structure,
                                     allow_unitialized_element=True,
                                     completed_collection=None,
                                     implicit_output_name=None):
        has_structure = not structure.is_leaf and structure.children_known
        if not has_structure and allow_unitialized_element:
            dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
        elif not has_structure:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
        else:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
            elements = []
            for index, (identifier,
                        substructure) in enumerate(structure.children):
                # TODO: Open question - populate these now or later?
                element = None
                if completed_collection and implicit_output_name:
                    job = completed_collection[index]
                    if job:
                        it = (jtiodca.dataset_collection
                              for jtiodca in job.output_dataset_collections
                              if jtiodca.name == implicit_output_name)
                        element = next(it, None)
                if element is None:
                    if substructure.is_leaf:
                        element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
                    else:
                        element = self.precreate_dataset_collection(
                            substructure,
                            allow_unitialized_element=allow_unitialized_element
                        )

                element = model.DatasetCollectionElement(
                    collection=dataset_collection,
                    element=element,
                    element_identifier=identifier,
                    element_index=index,
                )
                elements.append(element)
            dataset_collection.element_count = len(elements)

        return dataset_collection
Example #19
0
    def test_annotations(self):
        model = self.model

        u = model.User(email="*****@*****.**", password="******")
        self.persist(u)

        def persist_and_check_annotation(annotation_class, **kwds):
            annotated_association = annotation_class()
            annotated_association.annotation = "Test Annotation"
            annotated_association.user = u
            for key, value in kwds.items():
                setattr(annotated_association, key, value)
            self.persist(annotated_association)
            self.expunge()
            stored_annotation = self.query(annotation_class).all()[0]
            assert stored_annotation.annotation == "Test Annotation"
            assert stored_annotation.user.email == "*****@*****.**"

        sw = model.StoredWorkflow()
        sw.user = u
        self.persist(sw)
        persist_and_check_annotation(model.StoredWorkflowAnnotationAssociation, stored_workflow=sw)

        workflow = model.Workflow()
        workflow.stored_workflow = sw
        self.persist(workflow)

        ws = model.WorkflowStep()
        ws.workflow = workflow
        self.persist(ws)
        persist_and_check_annotation(model.WorkflowStepAnnotationAssociation, workflow_step=ws)

        h = model.History(name="History for Annotation", user=u)
        self.persist(h)
        persist_and_check_annotation(model.HistoryAnnotationAssociation, history=h)

        d1 = model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=model.session)
        self.persist(d1)
        persist_and_check_annotation(model.HistoryDatasetAssociationAnnotationAssociation, hda=d1)

        page = model.Page()
        page.user = u
        self.persist(page)
        persist_and_check_annotation(model.PageAnnotationAssociation, page=page)

        visualization = model.Visualization()
        visualization.user = u
        self.persist(visualization)
        persist_and_check_annotation(model.VisualizationAnnotationAssociation, visualization=visualization)

        dataset_collection = model.DatasetCollection(collection_type="paired")
        history_dataset_collection = model.HistoryDatasetCollectionAssociation(collection=dataset_collection)
        self.persist(history_dataset_collection)
        persist_and_check_annotation(model.HistoryDatasetCollectionAssociationAnnotationAssociation, history_dataset_collection=history_dataset_collection)

        library_dataset_collection = model.LibraryDatasetCollectionAssociation(collection=dataset_collection)
        self.persist(library_dataset_collection)
        persist_and_check_annotation(model.LibraryDatasetCollectionAnnotationAssociation, library_dataset_collection=library_dataset_collection)
Example #20
0
def build_collection(type, dataset_instances):
    """
    Build DatasetCollection with populated DatasetcollectionElement objects
    corresponding to the supplied dataset instances or throw exception if
    this is not a valid collection of the specified type.
    """
    dataset_collection = model.DatasetCollection()
    set_collection_elements(dataset_collection, type, dataset_instances)
    return dataset_collection
Example #21
0
def test_export_copied_objects_copied_outside_history():
    app, sa_session, h = _setup_history_for_export("Collection History with copied objects")

    d1, d2 = _create_datasets(sa_session, h, 2)

    c1 = model.DatasetCollection(collection_type="paired")
    hc1 = model.HistoryDatasetCollectionAssociation(history=h, hid=3, collection=c1, name="HistoryCollectionTest1")
    h.hid_counter = 4
    dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
    dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)

    sa_session.add_all((dce1, dce2, d1, d2, hc1))
    sa_session.flush()

    hc2 = hc1.copy(element_destination=h)
    h.add_dataset_collection(hc2)

    sa_session.add(hc2)

    other_h = model.History(name=h.name + "-other", user=h.user)
    sa_session.add(other_h)

    hc3 = hc2.copy(element_destination=other_h)
    other_h.add_dataset_collection(hc3)
    sa_session.add(hc3)
    sa_session.flush()

    hc4 = hc3.copy(element_destination=h)
    h.add_dataset_collection(hc4)
    sa_session.add(hc4)
    sa_session.flush()

    assert h.hid_counter == 10

    original_by_hid = _hid_dict(h)
    assert original_by_hid[7].copied_from_history_dataset_association != original_by_hid[4]
    assert original_by_hid[8].copied_from_history_dataset_association != original_by_hid[5]
    assert original_by_hid[9].copied_from_history_dataset_collection_association != original_by_hid[6]

    imported_history = _import_export(app, h)

    assert imported_history.hid_counter == 10
    assert len(imported_history.dataset_collections) == 3
    assert len(imported_history.datasets) == 6

    _assert_distinct_hids(imported_history)
    imported_by_hid = _hid_dict(imported_history)
    assert imported_by_hid[4].copied_from_history_dataset_association == imported_by_hid[1]
    assert imported_by_hid[5].copied_from_history_dataset_association == imported_by_hid[2]
    assert imported_by_hid[6].copied_from_history_dataset_collection_association == imported_by_hid[3]

    assert imported_by_hid[7].copied_from_history_dataset_association == imported_by_hid[4]
    assert imported_by_hid[8].copied_from_history_dataset_association == imported_by_hid[5]
    assert imported_by_hid[9].copied_from_history_dataset_collection_association == imported_by_hid[6]
Example #22
0
    def prototype(self, plugin_type):
        plugin_type_object = self.get(plugin_type)
        if not hasattr(plugin_type_object, 'prototype_elements'):
            raise Exception(
                f"Cannot pre-determine structure for collection of type {plugin_type}"
            )

        dataset_collection = model.DatasetCollection()
        for e in plugin_type_object.prototype_elements():
            e.collection = dataset_collection
        return dataset_collection
Example #23
0
    def prototype(self, plugin_type):
        plugin_type_object = self.get(plugin_type)
        if not hasattr(plugin_type_object, 'prototype_elements'):
            raise Exception(
                "Cannot pre-determine structure for collection of type %s" %
                plugin_type)

        dataset_collection = model.DatasetCollection()
        elements = [e for e in plugin_type_object.prototype_elements()]
        dataset_collection.elements = elements
        return dataset_collection
Example #24
0
 def test_collection_get_interface(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     d1 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session)
     c1 = model.DatasetCollection(collection_type="list")
     elements = 100
     dces = [model.DatasetCollectionElement(collection=c1, element=d1, element_identifier=f"{i}", element_index=i) for i in range(elements)]
     self.persist(u, h1, d1, c1, *dces, flush=False, expunge=False)
     model.session.flush()
     for i in range(elements):
         assert c1[i] == dces[i]
Example #25
0
 def test_populated_optimized_ok(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     d1 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session)
     d2 = model.HistoryDatasetAssociation(extension="txt", history=h1, create_dataset=True, sa_session=model.session)
     c1 = model.DatasetCollection(collection_type='paired')
     dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
     dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)
     model.session.add_all([d1, d2, c1, dce1, dce2])
     model.session.flush()
     assert c1.populated
     assert c1.populated_optimized
Example #26
0
def test_job_context_discover_outputs_flushes_once(mocker):
    app = _mock_app()
    sa_session = app.model.context
    # mocker is a pytest-mock fixture

    u = model.User(email="*****@*****.**", password="******")
    h = model.History(name="Test History", user=u)

    tool = Tool(app)
    tool_provided_metadata = None
    job = model.Job()
    job.history = h
    sa_session.add(job)
    sa_session.flush()
    job_working_directory = tempfile.mkdtemp()
    setup_data(job_working_directory)
    permission_provider = PermissionProvider()
    metadata_source_provider = MetadataSourceProvider()
    object_store = app.object_store
    input_dbkey = '?'
    final_job_state = 'ok'
    collection_description = FilePatternDatasetCollectionDescription(
        pattern="__name__")
    collection = model.DatasetCollection(collection_type='list',
                                         populated=False)
    sa_session.add(collection)
    job_context = JobContext(tool, tool_provided_metadata, job,
                             job_working_directory, permission_provider,
                             metadata_source_provider, input_dbkey,
                             object_store, final_job_state)
    collection_builder = builder.BoundCollectionBuilder(collection)
    dataset_collectors = [dataset_collector(collection_description)]
    output_name = 'output'
    filenames = job_context.find_files(output_name, collection,
                                       dataset_collectors)
    assert len(filenames) == 10
    spy = mocker.spy(sa_session, 'flush')
    job_context.populate_collection_elements(
        collection,
        collection_builder,
        filenames,
        name=output_name,
        metadata_source_name='',
        final_job_state=job_context.final_job_state,
    )
    collection_builder.populate()
    assert spy.call_count == 0
    sa_session.flush()
    assert len(collection.dataset_instances) == 10
    assert collection.dataset_instances[0].dataset.file_size == 1
Example #27
0
def build_collection(type,
                     dataset_instances,
                     collection=None,
                     associated_identifiers=None):
    """
    Build DatasetCollection with populated DatasetcollectionElement objects
    corresponding to the supplied dataset instances or throw exception if
    this is not a valid collection of the specified type.
    """
    dataset_collection = collection or model.DatasetCollection()
    associated_identifiers = associated_identifiers or set()
    set_collection_elements(dataset_collection, type, dataset_instances,
                            associated_identifiers)
    return dataset_collection
Example #28
0
 def test_dataset_dbkeys_and_extensions_summary(self):
     model = self.model
     u = model.User(email="*****@*****.**", password="******")
     h1 = model.History(name="History 1", user=u)
     d1 = model.HistoryDatasetAssociation(extension="bam", dbkey="hg19", history=h1, create_dataset=True, sa_session=model.session)
     d2 = model.HistoryDatasetAssociation(extension="txt", dbkey="hg19", history=h1, create_dataset=True, sa_session=model.session)
     c1 = model.DatasetCollection(collection_type='paired')
     dce1 = model.DatasetCollectionElement(collection=c1, element=d1, element_identifier="forward", element_index=0)
     dce2 = model.DatasetCollectionElement(collection=c1, element=d2, element_identifier="reverse", element_index=1)
     hdca = model.HistoryDatasetCollectionAssociation(collection=c1, history=h1)
     model.session.add_all([d1, d2, c1, dce1, dce2, hdca])
     model.session.flush()
     assert hdca.dataset_dbkeys_and_extensions_summary[0] == {"hg19"}
     assert hdca.dataset_dbkeys_and_extensions_summary[1] == {"bam", "txt"}
Example #29
0
    def test_ratings(self):
        model = self.model

        u = model.User(email="*****@*****.**", password="******")
        self.persist(u)

        def persist_and_check_rating(rating_class, **kwds):
            rating_association = rating_class()
            rating_association.rating = 5
            rating_association.user = u
            for key, value in kwds.items():
                setattr(rating_association, key, value)
            self.persist(rating_association)
            self.expunge()
            stored_annotation = self.query(rating_class).all()[0]
            assert stored_annotation.rating == 5
            assert stored_annotation.user.email == "*****@*****.**"

        sw = model.StoredWorkflow()
        sw.user = u
        self.persist(sw)
        persist_and_check_rating(model.StoredWorkflowRatingAssociation, stored_workflow=sw)

        h = model.History(name="History for Rating", user=u)
        self.persist(h)
        persist_and_check_rating(model.HistoryRatingAssociation, history=h)

        d1 = model.HistoryDatasetAssociation(extension="txt", history=h, create_dataset=True, sa_session=model.session)
        self.persist(d1)
        persist_and_check_rating(model.HistoryDatasetAssociationRatingAssociation, hda=d1)

        page = model.Page()
        page.user = u
        self.persist(page)
        persist_and_check_rating(model.PageRatingAssociation, page=page)

        visualization = model.Visualization()
        visualization.user = u
        self.persist(visualization)
        persist_and_check_rating(model.VisualizationRatingAssociation, visualization=visualization)

        dataset_collection = model.DatasetCollection(collection_type="paired")
        history_dataset_collection = model.HistoryDatasetCollectionAssociation(collection=dataset_collection)
        self.persist(history_dataset_collection)
        persist_and_check_rating(model.HistoryDatasetCollectionRatingAssociation, history_dataset_collection=history_dataset_collection)

        library_dataset_collection = model.LibraryDatasetCollectionAssociation(collection=dataset_collection)
        self.persist(library_dataset_collection)
        persist_and_check_rating(model.LibraryDatasetCollectionRatingAssociation, library_dataset_collection=library_dataset_collection)
Example #30
0
    def test_collections_in_library_folders(self):
        model = self.model

        u = model.User(email="*****@*****.**", password="******")
        lf = model.LibraryFolder(name="RootFolder")
        l = model.Library(name="Library1", root_folder=lf)
        ld1 = model.LibraryDataset()
        ld2 = model.LibraryDataset()

        ldda1 = model.LibraryDatasetDatasetAssociation(extension="txt", library_dataset=ld1)
        ldda2 = model.LibraryDatasetDatasetAssociation(extension="txt", library_dataset=ld1)

        c1 = model.DatasetCollection(collection_type="pair")
        dce1 = model.DatasetCollectionElement(collection=c1, element=ldda1)
        dce2 = model.DatasetCollectionElement(collection=c1, element=ldda2)
        self.persist(u, l, lf, ld1, ld2, c1, ldda1, ldda2, dce1, dce2)