Example #1
0
 def __init__( self, app ):
     self.type_registry = DatasetCollectionTypesRegistry( app )
     self.collection_type_descriptions = CollectionTypeDescriptionFactory( self.type_registry )
     self.model = app.model
     self.security = app.security
     self.hda_manager = hdas.HDAManager()
     self.history_manager = histories.HistoryManager()
     self.tag_manager = tags.TagsManager( app )
     self.ldda_manager = lddas.LDDAManager( )
Example #2
0
    def __init__( self, app ):
        self.type_registry = DatasetCollectionTypesRegistry( app )
        self.collection_type_descriptions = CollectionTypeDescriptionFactory( self.type_registry )
        self.model = app.model
        self.security = app.security

        self.hda_manager = hdas.HDAManager( app )
        self.history_manager = histories.HistoryManager( app )
        self.tag_manager = tags.TagManager( app )
        self.ldda_manager = lddas.LDDAManager( app )
Example #3
0
class DatasetCollectionManager( object ):
    """
    Abstraction for interfacing with dataset collections instance - ideally abstarcts
    out model and plugin details.
    """
    ELEMENTS_UNINITIALIZED = object()

    def __init__( self, app ):
        self.type_registry = DatasetCollectionTypesRegistry( app )
        self.collection_type_descriptions = CollectionTypeDescriptionFactory( self.type_registry )
        self.model = app.model
        self.security = app.security

        self.hda_manager = hdas.HDAManager( app )
        self.history_manager = histories.HistoryManager( app )
        self.tag_manager = tags.TagManager( app )
        self.ldda_manager = lddas.LDDAManager( app )

    def create(
        self,
        trans,
        parent,
        # PRECONDITION: security checks on ability to add to parent
        # occurred during load.
        name,
        collection_type,
        element_identifiers=None,
        elements=None,
        implicit_collection_info=None,
        trusted_identifiers=None,  # Trust preloaded element objects
    ):
        """
        """
        # Trust embedded, newly created objects created by tool subsystem.
        if trusted_identifiers is None:
            trusted_identifiers = implicit_collection_info is not None

        if element_identifiers and not trusted_identifiers:
            validate_input_element_identifiers( element_identifiers )

        dataset_collection = self.create_dataset_collection(
            trans=trans,
            collection_type=collection_type,
            element_identifiers=element_identifiers,
            elements=elements,
        )

        if isinstance( parent, model.History ):
            dataset_collection_instance = self.model.HistoryDatasetCollectionAssociation(
                collection=dataset_collection,
                name=name,
            )
            if implicit_collection_info:
                for input_name, input_collection in implicit_collection_info[ "implicit_inputs" ]:
                    dataset_collection_instance.add_implicit_input_collection( input_name, input_collection )
                for output_dataset in implicit_collection_info.get( "outputs" ):
                    if isinstance( output_dataset, model.HistoryDatasetAssociation ):
                        output_dataset.hidden_beneath_collection_instance = dataset_collection_instance
                    elif isinstance( output_dataset, model.HistoryDatasetCollectionAssociation ):
                        dataset_collection_instance.add_implicit_input_collection( input_name, input_collection )
                    else:
                        # dataset collection, don't need to do anything...
                        pass
                    trans.sa_session.add( output_dataset )

                dataset_collection_instance.implicit_output_name = implicit_collection_info[ "implicit_output_name" ]

            log.debug("Created collection with %d elements" % ( len( dataset_collection_instance.collection.elements ) ) )
            # Handle setting hid
            parent.add_dataset_collection( dataset_collection_instance )

        elif isinstance( parent, model.LibraryFolder ):
            dataset_collection_instance = self.model.LibraryDatasetCollectionAssociation(
                collection=dataset_collection,
                folder=parent,
                name=name,
            )

        else:
            message = "Internal logic error - create called with unknown parent type %s" % type( parent )
            log.exception( message )
            raise MessageException( message )

        return self.__persist( dataset_collection_instance )

    def create_dataset_collection(
        self,
        trans,
        collection_type,
        element_identifiers=None,
        elements=None,
    ):
        if element_identifiers is None and elements is None:
            raise RequestParameterInvalidException( ERROR_INVALID_ELEMENTS_SPECIFICATION )
        if not collection_type:
            raise RequestParameterInvalidException( ERROR_NO_COLLECTION_TYPE )
        collection_type_description = self.collection_type_descriptions.for_collection_type( collection_type )
        # If we have elements, this is an internal request, don't need to load
        # objects from identifiers.
        if elements is None:
            if collection_type_description.has_subcollections( ):
                # Nested collection - recursively create collections and update identifiers.
                self.__recursively_create_collections( trans, element_identifiers )
            elements = self.__load_elements( trans, element_identifiers )
        # else if elements is set, it better be an ordered dict!

        if elements is not self.ELEMENTS_UNINITIALIZED:
            type_plugin = collection_type_description.rank_type_plugin()
            dataset_collection = builder.build_collection( type_plugin, elements )
        else:
            dataset_collection = model.DatasetCollection( populated=False )
        dataset_collection.collection_type = collection_type
        return dataset_collection

    def set_collection_elements( self, dataset_collection, dataset_instances ):
        if dataset_collection.populated:
            raise Exception("Cannot reset elements of an already populated dataset collection.")

        collection_type = dataset_collection.collection_type
        collection_type_description = self.collection_type_descriptions.for_collection_type( collection_type )
        type_plugin = collection_type_description.rank_type_plugin()
        builder.set_collection_elements( dataset_collection, type_plugin, dataset_instances )
        dataset_collection.mark_as_populated()

        return dataset_collection

    def collection_builder_for( self, dataset_collection ):
        collection_type = dataset_collection.collection_type
        collection_type_description = self.collection_type_descriptions.for_collection_type( collection_type )
        return builder.BoundCollectionBuilder( dataset_collection, collection_type_description )

    def delete( self, trans, instance_type, id ):
        dataset_collection_instance = self.get_dataset_collection_instance( trans, instance_type, id, check_ownership=True )
        dataset_collection_instance.deleted = True
        trans.sa_session.add( dataset_collection_instance )
        trans.sa_session.flush( )

    def update( self, trans, instance_type, id, payload ):
        dataset_collection_instance = self.get_dataset_collection_instance( trans, instance_type, id, check_ownership=True )
        if trans.user is None:
            anon_allowed_payload = {}
            if 'deleted' in payload:
                anon_allowed_payload[ 'deleted' ] = payload[ 'deleted' ]
            if 'visible' in payload:
                anon_allowed_payload[ 'visible' ] = payload[ 'visible' ]
            payload = self._validate_and_parse_update_payload( anon_allowed_payload )
        else:
            payload = self._validate_and_parse_update_payload( payload )
        changed = self._set_from_dict( trans, dataset_collection_instance, payload )
        return changed

    def copy(
        self,
        trans,
        parent,
        # PRECONDITION: security checks on ability to add to parent
        # occurred during load.
        source,
        encoded_source_id,
    ):
        assert source == "hdca"  # for now
        source_hdca = self.__get_history_collection_instance( trans, encoded_source_id )
        new_hdca = source_hdca.copy()
        parent.add_dataset_collection( new_hdca )
        trans.sa_session.add( new_hdca )
        trans.sa_session.flush()
        return new_hdca

    def _set_from_dict( self, trans, dataset_collection_instance, new_data ):
        # send what we can down into the model
        changed = dataset_collection_instance.set_from_dict( new_data )
        # the rest (often involving the trans) - do here
        if 'annotation' in new_data.keys() and trans.get_user():
            dataset_collection_instance.add_item_annotation( trans.sa_session, trans.get_user(), dataset_collection_instance, new_data[ 'annotation' ] )
            changed[ 'annotation' ] = new_data[ 'annotation' ]
        if 'tags' in new_data.keys() and trans.get_user():
            self.tag_manager.set_tags_from_list( trans.get_user(), dataset_collection_instance, new_data[ 'tags' ] )

        if changed.keys():
            trans.sa_session.flush()

        return changed

    def _validate_and_parse_update_payload( self, payload ):
        validated_payload = {}
        for key, val in payload.items():
            if val is None:
                continue
            if key in ( 'name' ):
                val = validation.validate_and_sanitize_basestring( key, val )
                validated_payload[ key ] = val
            if key in ( 'deleted', 'visible' ):
                validated_payload[ key ] = validation.validate_boolean( key, val )
            elif key == 'tags':
                validated_payload[ key ] = validation.validate_and_sanitize_basestring_list( key, val )
        return validated_payload

    def history_dataset_collections(self, history, query):
        collections = history.active_dataset_collections
        collections = filter( query.direct_match, collections )
        return collections

    def __persist( self, dataset_collection_instance ):
        context = self.model.context
        context.add( dataset_collection_instance )
        context.flush()
        return dataset_collection_instance

    def __recursively_create_collections( self, trans, element_identifiers ):
        for index, element_identifier in enumerate( element_identifiers ):
            try:
                if not element_identifier[ "src" ] == "new_collection":
                    # not a new collection, keep moving...
                    continue
            except KeyError:
                # Not a dictionary, just an id of an HDA - move along.
                continue

            # element identifier is a dict with src new_collection...
            collection_type = element_identifier.get( "collection_type", None )
            collection = self.create_dataset_collection(
                trans=trans,
                collection_type=collection_type,
                element_identifiers=element_identifier[ "element_identifiers" ],
            )
            element_identifier[ "__object__" ] = collection

        return element_identifiers

    def __load_elements( self, trans, element_identifiers ):
        elements = odict.odict()
        for element_identifier in element_identifiers:
            elements[ element_identifier[ "name" ] ] = self.__load_element( trans, element_identifier )
        return elements

    def __load_element( self, trans, element_identifier ):
        # if not isinstance( element_identifier, dict ):
        #    # Is allowing this to just be the id of an hda too clever? Somewhat
        #    # consistent with other API methods though.
        #    element_identifier = dict( src='hda', id=str( element_identifier ) )

        # Previously created collection already found in request, just pass
        # through as is.
        if "__object__" in element_identifier:
            return element_identifier[ "__object__" ]

        # dateset_identifier is dict {src=hda|ldda|hdca|new_collection, id=<encoded_id>}
        try:
            src_type = element_identifier.get( 'src', 'hda' )
        except AttributeError:
            raise MessageException( "Dataset collection element definition (%s) not dictionary-like." % element_identifier )
        encoded_id = element_identifier.get( 'id', None )
        if not src_type or not encoded_id:
            message_template = "Problem decoding element identifier %s - must contain a 'src' and a 'id'."
            message = message_template % element_identifier
            raise RequestParameterInvalidException( message )

        if src_type == 'hda':
            decoded_id = int( trans.app.security.decode_id( encoded_id ) )
            element = self.hda_manager.get_accessible( decoded_id, trans.user )
        elif src_type == 'ldda':
            element = self.ldda_manager.get( trans, encoded_id )
        elif src_type == 'hdca':
            # TODO: Option to copy? Force copy? Copy or allow if not owned?
            element = self.__get_history_collection_instance( trans, encoded_id ).collection
        # TODO: ldca.
        else:
            raise RequestParameterInvalidException( "Unknown src_type parameter supplied '%s'." % src_type )
        return element

    def match_collections( self, collections_to_match ):
        """
        May seem odd to place it here, but planning to grow sophistication and
        get plugin types involved so it will likely make sense in the future.
        """
        return MatchingCollections.for_collections( collections_to_match, self.collection_type_descriptions )

    def get_dataset_collection_instance( self, trans, instance_type, id, **kwds ):
        """
        """
        if instance_type == "history":
            return self.__get_history_collection_instance( trans, id, **kwds )
        elif instance_type == "library":
            return self.__get_library_collection_instance( trans, id, **kwds )

    def get_dataset_collection( self, trans, encoded_id ):
        collection_id = int( trans.app.security.decode_id( encoded_id ) )
        collection = trans.sa_session.query( trans.app.model.DatasetCollection ).get( collection_id )
        return collection

    def __get_history_collection_instance( self, trans, id, check_ownership=False, check_accessible=True ):
        instance_id = int( trans.app.security.decode_id( id ) )
        collection_instance = trans.sa_session.query( trans.app.model.HistoryDatasetCollectionAssociation ).get( instance_id )
        if check_ownership:
            self.history_manager.error_unless_owner( collection_instance.history, trans.user, current_history=trans.history )
        if check_accessible:
            self.history_manager.error_unless_accessible( collection_instance.history, trans.user, current_history=trans.history )
        return collection_instance

    def __get_library_collection_instance( self, trans, id, check_ownership=False, check_accessible=True ):
        if check_ownership:
            raise NotImplemented( "Functionality (getting library dataset collection with ownership check) unimplemented." )
        instance_id = int( trans.security.decode_id( id ) )
        collection_instance = trans.sa_session.query( trans.app.model.LibraryDatasetCollectionAssociation ).get( instance_id )
        if check_accessible:
            if not trans.app.security_agent.can_access_library_item( trans.get_current_user_roles(), collection_instance, trans.user ):
                raise ItemAccessibilityException( "LibraryDatasetCollectionAssociation is not accessible to the current user", type='error' )
        return collection_instance
Example #4
0
class DatasetCollectionManager( object ):
    """
    Abstraction for interfacing with dataset collections instance - ideally abstarcts
    out model and plugin details.
    """

    def __init__( self, app ):
        self.type_registry = DatasetCollectionTypesRegistry( app )
        self.collection_type_descriptions = CollectionTypeDescriptionFactory( self.type_registry )
        self.model = app.model
        self.security = app.security
        self.hda_manager = hdas.HDAManager()
        self.history_manager = histories.HistoryManager()
        self.tag_manager = tags.TagsManager( app )
        self.ldda_manager = lddas.LDDAManager( )

    def create(
        self,
        trans,
        parent,  # PRECONDITION: security checks on ability to add to parent occurred during load.
        name,
        collection_type,
        element_identifiers=None,
        elements=None,
        implicit_collection_info=None,
    ):
        """
        """
        # Trust embedded, newly created objects created by tool subsystem.
        trusted_identifiers = implicit_collection_info is not None

        if element_identifiers and not trusted_identifiers:
            validate_input_element_identifiers( element_identifiers )

        dataset_collection = self.__create_dataset_collection(
            trans=trans,
            collection_type=collection_type,
            element_identifiers=element_identifiers,
            elements=elements,
        )
        if isinstance( parent, model.History ):
            dataset_collection_instance = self.model.HistoryDatasetCollectionAssociation(
                collection=dataset_collection,
                name=name,
            )
            if implicit_collection_info:
                for input_name, input_collection in implicit_collection_info[ "implicit_inputs" ]:
                    dataset_collection_instance.add_implicit_input_collection( input_name, input_collection )
                for output_dataset in implicit_collection_info.get( "outputs" ):
                    output_dataset.hidden_beneath_collection_instance = dataset_collection_instance
                    trans.sa_session.add( output_dataset )

                dataset_collection_instance.implicit_output_name = implicit_collection_info[ "implicit_output_name" ]
            log.debug("Created collection with %d elements" % ( len( dataset_collection_instance.collection.elements ) ) )
            # Handle setting hid
            parent.add_dataset_collection( dataset_collection_instance )
        elif isinstance( parent, model.LibraryFolder ):
            dataset_collection_instance = self.model.LibraryDatasetCollectionAssociation(
                collection=dataset_collection,
                folder=parent,
                name=name,
            )
        else:
            message = "Internal logic error - create called with unknown parent type %s" % type( parent )
            log.exception( message )
            raise MessageException( message )

        return self.__persist( dataset_collection_instance )

    def __create_dataset_collection(
        self,
        trans,
        collection_type,
        element_identifiers=None,
        elements=None,
    ):
        if element_identifiers is None and elements is None:
            raise RequestParameterInvalidException( ERROR_INVALID_ELEMENTS_SPECIFICATION )
        if not collection_type:
            raise RequestParameterInvalidException( ERROR_NO_COLLECTION_TYPE )
        collection_type_description = self.collection_type_descriptions.for_collection_type( collection_type )
        # If we have elements, this is an internal request, don't need to load
        # objects from identifiers.
        if elements is None:
            if collection_type_description.has_subcollections( ):
                # Nested collection - recursively create collections and update identifiers.
                self.__recursively_create_collections( trans, element_identifiers )
            elements = self.__load_elements( trans, element_identifiers )
        # else if elements is set, it better be an ordered dict!

        type_plugin = collection_type_description.rank_type_plugin()
        dataset_collection = type_plugin.build_collection( elements )
        dataset_collection.collection_type = collection_type
        return dataset_collection

    def delete( self, trans, instance_type, id ):
        dataset_collection_instance = self.get_dataset_collection_instance( trans, instance_type, id, check_ownership=True )
        dataset_collection_instance.deleted = True
        trans.sa_session.add( dataset_collection_instance )
        trans.sa_session.flush( )

    def update( self, trans, instance_type, id, payload ):
        dataset_collection_instance = self.get_dataset_collection_instance( trans, instance_type, id, check_ownership=True )
        if trans.user is None:
            anon_allowed_payload = {}
            if 'deleted' in payload:
                anon_allowed_payload[ 'deleted' ] = payload[ 'deleted' ]
            if 'visible' in payload:
                anon_allowed_payload[ 'visible' ] = payload[ 'visible' ]
            payload = self._validate_and_parse_update_payload( anon_allowed_payload )
        else:
            payload = self._validate_and_parse_update_payload( payload )
        changed = self._set_from_dict( trans, dataset_collection_instance, payload )
        return changed

    def copy(
        self,
        trans,
        parent,  # PRECONDITION: security checks on ability to add to parent occurred during load.
        source,
        encoded_source_id,
    ):
        assert source == "hdca"  # for now
        source_hdca = self.__get_history_collection_instance( trans, encoded_source_id )
        new_hdca = source_hdca.copy()
        parent.add_dataset_collection( new_hdca )
        trans.sa_session.add( new_hdca )
        trans.sa_session.flush()
        return source_hdca

    def _set_from_dict( self, trans, dataset_collection_instance, new_data ):
        # Blatantly stolen from UsesHistoryDatasetAssociationMixin.set_hda_from_dict.

        # send what we can down into the model
        changed = dataset_collection_instance.set_from_dict( new_data )
        # the rest (often involving the trans) - do here
        if 'annotation' in new_data.keys() and trans.get_user():
            dataset_collection_instance.add_item_annotation( trans.sa_session, trans.get_user(), dataset_collection_instance, new_data[ 'annotation' ] )
            changed[ 'annotation' ] = new_data[ 'annotation' ]
        if 'tags' in new_data.keys() and trans.get_user():
            self.tag_manager.set_tags_from_list( trans, dataset_collection_instance, new_data[ 'tags' ], user=trans.user )

        if changed.keys():
            trans.sa_session.flush()

        return changed

    def _validate_and_parse_update_payload( self, payload ):
        validated_payload = {}
        for key, val in payload.items():
            if val is None:
                continue
            if key in ( 'name' ):
                val = validation.validate_and_sanitize_basestring( key, val )
                validated_payload[ key ] = val
            if key in ( 'deleted', 'visible' ):
                validated_payload[ key ] = validation.validate_boolean( key, val )
            elif key == 'tags':
                validated_payload[ key ] = validation.validate_and_sanitize_basestring_list( key, val )
        return validated_payload

    def history_dataset_collections(self, history, query):
        collections = history.active_dataset_collections
        collections = filter( query.direct_match, collections )
        return collections

    def __persist( self, dataset_collection_instance ):
        context = self.model.context
        context.add( dataset_collection_instance )
        context.flush()
        return dataset_collection_instance

    def __recursively_create_collections( self, trans, element_identifiers ):
        for index, element_identifier in enumerate( element_identifiers ):
            try:
                if not element_identifier[ "src" ] == "new_collection":
                    # not a new collection, keep moving...
                    continue
            except KeyError:
                # Not a dictionary, just an id of an HDA - move along.
                continue

            # element identifier is a dict with src new_collection...
            collection_type = element_identifier.get( "collection_type", None )
            collection = self.__create_dataset_collection(
                trans=trans,
                collection_type=collection_type,
                element_identifiers=element_identifier[ "element_identifiers" ],
            )
            element_identifier[ "__object__" ] = collection

        return element_identifiers

    def __load_elements( self, trans, element_identifiers ):
        elements = odict.odict()
        for element_identifier in element_identifiers:
            elements[ element_identifier[ "name" ] ] = self.__load_element( trans, element_identifier )
        return elements

    def __load_element( self, trans, element_identifier ):
        #if not isinstance( element_identifier, dict ):
        #    # Is allowing this to just be the id of an hda too clever? Somewhat
        #    # consistent with other API methods though.
        #    element_identifier = dict( src='hda', id=str( element_identifier ) )

        # Previously created collection already found in request, just pass
        # through as is.
        if "__object__" in element_identifier:
            return element_identifier[ "__object__" ]

        # dateset_identifier is dict {src=hda|ldda|hdca|new_collection, id=<encoded_id>}
        try:
            src_type = element_identifier.get( 'src', 'hda' )
        except AttributeError:
            raise MessageException( "Dataset collection element definition (%s) not dictionary-like." % element_identifier )
        encoded_id = element_identifier.get( 'id', None )
        if not src_type or not encoded_id:
            raise RequestParameterInvalidException( "Problem decoding element identifier %s" % element_identifier )

        if src_type == 'hda':
            decoded_id = int( trans.app.security.decode_id( encoded_id ) )
            element = self.hda_manager.get( trans, decoded_id, check_ownership=False )
        elif src_type == 'ldda':
            element = self.ldda_manager.get( trans, encoded_id )
        elif src_type == 'hdca':
            # TODO: Option to copy? Force copy? Copy or allow if not owned?
            element = self.__get_history_collection_instance( trans, encoded_id ).collection
        # TODO: ldca.
        else:
            raise RequestParameterInvalidException( "Unknown src_type parameter supplied '%s'." % src_type )
        return element

    def match_collections( self, collections_to_match ):
        """
        May seem odd to place it here, but planning to grow sophistication and
        get plugin types involved so it will likely make sense in the future.
        """
        return MatchingCollections.for_collections( collections_to_match, self.collection_type_descriptions )

    def get_dataset_collection_instance( self, trans, instance_type, id, **kwds ):
        """
        """
        if instance_type == "history":
            return self.__get_history_collection_instance( trans, id, **kwds )
        elif instance_type == "library":
            return self.__get_library_collection_instance( trans, id, **kwds )

    def get_dataset_collection( self, trans, encoded_id ):
        collection_id = int( trans.app.security.decode_id( encoded_id ) )
        collection = trans.sa_session.query( trans.app.model.DatasetCollection ).get( collection_id )
        return collection

    def __get_history_collection_instance( self, trans, id, check_ownership=False, check_accessible=True ):
        instance_id = int( trans.app.security.decode_id( id ) )
        collection_instance = trans.sa_session.query( trans.app.model.HistoryDatasetCollectionAssociation ).get( instance_id )
        self.history_manager.secure( trans, collection_instance.history, check_ownership=check_ownership, check_accessible=check_accessible )
        return collection_instance

    def __get_library_collection_instance( self, trans, id, check_ownership=False, check_accessible=True ):
        if check_ownership:
            raise NotImplemented( "Functionality (getting library dataset collection with ownership check) unimplemented." )
        instance_id = int( trans.security.decode_id( id ) )
        collection_instance = trans.sa_session.query( trans.app.model.LibraryDatasetCollectionAssociation ).get( instance_id )
        if check_accessible:
            if not trans.app.security_agent.can_access_library_item( trans.get_current_user_roles(), collection_instance, trans.user ):
                raise ItemAccessibilityException( "LibraryDatasetCollectionAssociation is not accessible to the current user", type='error' )
        return collection_instance
class DatasetCollectionManager(object):
    """
    Abstraction for interfacing with dataset collections instance - ideally abstracts
    out model and plugin details.
    """
    ELEMENTS_UNINITIALIZED = object()

    def __init__(self, app):
        self.type_registry = DatasetCollectionTypesRegistry(app)
        self.collection_type_descriptions = CollectionTypeDescriptionFactory(
            self.type_registry)
        self.model = app.model
        self.security = app.security

        self.hda_manager = hdas.HDAManager(app)
        self.history_manager = histories.HistoryManager(app)
        self.tag_manager = tags.GalaxyTagManager(app.model.context)
        self.ldda_manager = lddas.LDDAManager(app)

    def precreate_dataset_collection_instance(self,
                                              trans,
                                              parent,
                                              name,
                                              structure,
                                              implicit_inputs=None,
                                              implicit_output_name=None,
                                              tags=None):
        # TODO: prebuild all required HIDs and send them in so no need to flush in between.
        dataset_collection = self.precreate_dataset_collection(
            structure,
            allow_unitialized_element=implicit_output_name is not None)
        instance = self._create_instance_for_collection(
            trans,
            parent,
            name,
            dataset_collection,
            implicit_inputs=implicit_inputs,
            implicit_output_name=implicit_output_name,
            flush=False,
            tags=tags)
        return instance

    def precreate_dataset_collection(self,
                                     structure,
                                     allow_unitialized_element=True):
        has_structure = not structure.is_leaf and structure.children_known
        if not has_structure and allow_unitialized_element:
            dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
        elif not has_structure:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
        else:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
            elements = []
            for index, (identifier,
                        substructure) in enumerate(structure.children):
                # TODO: Open question - populate these now or later?
                if substructure.is_leaf:
                    element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
                else:
                    element = self.precreate_dataset_collection(
                        substructure,
                        allow_unitialized_element=allow_unitialized_element)

                element = model.DatasetCollectionElement(
                    element=element,
                    element_identifier=identifier,
                    element_index=index,
                )
                elements.append(element)
            dataset_collection.elements = elements
            dataset_collection.element_count = len(elements)

        return dataset_collection

    def create(self,
               trans,
               parent,
               name,
               collection_type,
               element_identifiers=None,
               elements=None,
               implicit_collection_info=None,
               trusted_identifiers=None,
               hide_source_items=False,
               tags=None,
               copy_elements=False):
        """
        PRECONDITION: security checks on ability to add to parent
        occurred during load.
        """
        # Trust embedded, newly created objects created by tool subsystem.
        if trusted_identifiers is None:
            trusted_identifiers = implicit_collection_info is not None

        if element_identifiers and not trusted_identifiers:
            validate_input_element_identifiers(element_identifiers)

        dataset_collection = self.create_dataset_collection(
            trans=trans,
            collection_type=collection_type,
            element_identifiers=element_identifiers,
            elements=elements,
            hide_source_items=hide_source_items,
            copy_elements=copy_elements,
        )

        implicit_inputs = []
        if implicit_collection_info:
            implicit_inputs = implicit_collection_info.get(
                'implicit_inputs', [])

        implicit_output_name = None
        if implicit_collection_info:
            implicit_output_name = implicit_collection_info[
                "implicit_output_name"]

        return self._create_instance_for_collection(
            trans,
            parent,
            name,
            dataset_collection,
            implicit_inputs=implicit_inputs,
            implicit_output_name=implicit_output_name,
            tags=tags)

    def _create_instance_for_collection(self,
                                        trans,
                                        parent,
                                        name,
                                        dataset_collection,
                                        implicit_output_name=None,
                                        implicit_inputs=None,
                                        tags=None,
                                        flush=True):
        if isinstance(parent, model.History):
            dataset_collection_instance = self.model.HistoryDatasetCollectionAssociation(
                collection=dataset_collection,
                name=name,
            )
            if implicit_inputs:
                for input_name, input_collection in implicit_inputs:
                    dataset_collection_instance.add_implicit_input_collection(
                        input_name, input_collection)

            if implicit_output_name:
                dataset_collection_instance.implicit_output_name = implicit_output_name

            log.debug("Created collection with %d elements" %
                      (len(dataset_collection_instance.collection.elements)))
            # Handle setting hid
            parent.add_dataset_collection(dataset_collection_instance)

        elif isinstance(parent, model.LibraryFolder):
            dataset_collection_instance = self.model.LibraryDatasetCollectionAssociation(
                collection=dataset_collection,
                folder=parent,
                name=name,
            )

        else:
            message = "Internal logic error - create called with unknown parent type %s" % type(
                parent)
            log.exception(message)
            raise MessageException(message)

        # Tags may be coming in as a dictionary of tag model objects if copying them from other
        # existing Galaxy objects or as a list of strings if the tags are coming from user supplied
        # values.
        if isinstance(tags, list):
            assert implicit_inputs is None, implicit_inputs
            tags = self.tag_manager.add_tags_from_list(
                trans.user, dataset_collection_instance, tags)
        else:
            tags = self._append_tags(dataset_collection_instance,
                                     implicit_inputs, tags)
        return self.__persist(dataset_collection_instance, flush=flush)

    def create_dataset_collection(self,
                                  trans,
                                  collection_type,
                                  element_identifiers=None,
                                  elements=None,
                                  hide_source_items=None,
                                  copy_elements=False):
        # Make sure at least one of these is None.
        assert element_identifiers is None or elements is None

        if element_identifiers is None and elements is None:
            raise RequestParameterInvalidException(
                ERROR_INVALID_ELEMENTS_SPECIFICATION)
        if not collection_type:
            raise RequestParameterInvalidException(ERROR_NO_COLLECTION_TYPE)

        collection_type_description = self.collection_type_descriptions.for_collection_type(
            collection_type)
        has_subcollections = collection_type_description.has_subcollections()
        # If we have elements, this is an internal request, don't need to load
        # objects from identifiers.
        if elements is None:
            elements = self._element_identifiers_to_elements(
                trans,
                collection_type_description=collection_type_description,
                element_identifiers=element_identifiers,
                hide_source_items=hide_source_items,
                copy_elements=copy_elements)
        else:
            if has_subcollections:
                # Nested collection - recursively create collections as needed.
                self.__recursively_create_collections_for_elements(
                    trans,
                    elements,
                    hide_source_items,
                    copy_elements=copy_elements)
        # else if elements is set, it better be an ordered dict!

        if elements is not self.ELEMENTS_UNINITIALIZED:
            type_plugin = collection_type_description.rank_type_plugin()
            dataset_collection = builder.build_collection(
                type_plugin, elements)
        else:
            dataset_collection = model.DatasetCollection(populated=False)
        dataset_collection.collection_type = collection_type
        return dataset_collection

    def _element_identifiers_to_elements(self,
                                         trans,
                                         collection_type_description,
                                         element_identifiers,
                                         hide_source_items=False,
                                         copy_elements=False):
        if collection_type_description.has_subcollections():
            # Nested collection - recursively create collections and update identifiers.
            self.__recursively_create_collections_for_identifiers(
                trans, element_identifiers, hide_source_items, copy_elements)
        new_collection = False
        for element_identifier in element_identifiers:
            if element_identifier.get(
                    "src") == "new_collection" and element_identifier.get(
                        'collection_type') == '':
                new_collection = True
                elements = self.__load_elements(
                    trans=trans,
                    element_identifiers=element_identifier[
                        'element_identifiers'],
                    hide_source_items=hide_source_items,
                    copy_elements=copy_elements)
        if not new_collection:
            elements = self.__load_elements(
                trans=trans,
                element_identifiers=element_identifiers,
                hide_source_items=hide_source_items,
                copy_elements=copy_elements)
        return elements

    def _append_tags(self,
                     dataset_collection_instance,
                     implicit_inputs=None,
                     tags=None):
        tags = tags or {}
        implicit_inputs = implicit_inputs or []
        for _, v in implicit_inputs:
            for tag in v.auto_propagated_tags:
                tags[tag.value] = tag
        for _, tag in tags.items():
            dataset_collection_instance.tags.append(
                tag.copy(cls=model.HistoryDatasetCollectionTagAssociation))

    def set_collection_elements(self, dataset_collection, dataset_instances):
        if dataset_collection.populated:
            raise Exception(
                "Cannot reset elements of an already populated dataset collection."
            )

        collection_type = dataset_collection.collection_type
        collection_type_description = self.collection_type_descriptions.for_collection_type(
            collection_type)
        type_plugin = collection_type_description.rank_type_plugin()
        builder.set_collection_elements(dataset_collection, type_plugin,
                                        dataset_instances)
        dataset_collection.mark_as_populated()

        return dataset_collection

    def collection_builder_for(self, dataset_collection):
        collection_type = dataset_collection.collection_type
        collection_type_description = self.collection_type_descriptions.for_collection_type(
            collection_type)
        return builder.BoundCollectionBuilder(dataset_collection,
                                              collection_type_description)

    def delete(self, trans, instance_type, id, recursive=False, purge=False):
        dataset_collection_instance = self.get_dataset_collection_instance(
            trans, instance_type, id, check_ownership=True)
        dataset_collection_instance.deleted = True
        trans.sa_session.add(dataset_collection_instance)

        if recursive:
            for dataset in dataset_collection_instance.collection.dataset_instances:
                self.hda_manager.error_unless_owner(
                    dataset,
                    user=trans.get_user(),
                    current_history=trans.history)
                if not dataset.deleted:
                    dataset.deleted = True

                if purge and not dataset.purged:
                    self.hda_manager.purge(dataset)

        trans.sa_session.flush()

    def update(self, trans, instance_type, id, payload):
        dataset_collection_instance = self.get_dataset_collection_instance(
            trans, instance_type, id, check_ownership=True)
        if trans.user is None:
            anon_allowed_payload = {}
            if 'deleted' in payload:
                anon_allowed_payload['deleted'] = payload['deleted']
            if 'visible' in payload:
                anon_allowed_payload['visible'] = payload['visible']
            payload = self._validate_and_parse_update_payload(
                anon_allowed_payload)
        else:
            payload = self._validate_and_parse_update_payload(payload)
        changed = self._set_from_dict(trans, dataset_collection_instance,
                                      payload)
        return changed

    def copy(self,
             trans,
             parent,
             source,
             encoded_source_id,
             copy_elements=False):
        """
        PRECONDITION: security checks on ability to add to parent occurred
        during load.
        """
        assert source == "hdca"  # for now
        source_hdca = self.__get_history_collection_instance(
            trans, encoded_source_id)
        copy_kwds = {}
        if copy_elements:
            copy_kwds["element_destination"] = parent
        new_hdca = source_hdca.copy(**copy_kwds)
        tags_str = self.tag_manager.get_tags_str(source_hdca.tags)
        self.tag_manager.apply_item_tags(trans.get_user(), new_hdca, tags_str)
        parent.add_dataset_collection(new_hdca)
        trans.sa_session.add(new_hdca)
        trans.sa_session.flush()
        return new_hdca

    def _set_from_dict(self, trans, dataset_collection_instance, new_data):
        # send what we can down into the model
        changed = dataset_collection_instance.set_from_dict(new_data)
        # the rest (often involving the trans) - do here
        if 'annotation' in new_data.keys() and trans.get_user():
            dataset_collection_instance.add_item_annotation(
                trans.sa_session, trans.get_user(),
                dataset_collection_instance, new_data['annotation'])
            changed['annotation'] = new_data['annotation']
        if 'tags' in new_data.keys() and trans.get_user():
            # set_tags_from_list will flush on its own, no need to add to 'changed' here and incur a second flush.
            self.tag_manager.set_tags_from_list(trans.get_user(),
                                                dataset_collection_instance,
                                                new_data['tags'])

        if changed.keys():
            trans.sa_session.flush()

        return changed

    def _validate_and_parse_update_payload(self, payload):
        validated_payload = {}
        for key, val in payload.items():
            if val is None:
                continue
            if key in ('name'):
                val = validation.validate_and_sanitize_basestring(key, val)
                validated_payload[key] = val
            if key in ('deleted', 'visible'):
                validated_payload[key] = validation.validate_boolean(key, val)
            elif key == 'tags':
                validated_payload[
                    key] = validation.validate_and_sanitize_basestring_list(
                        key, val)
        return validated_payload

    def history_dataset_collections(self, history, query):
        collections = history.active_dataset_collections
        collections = list(filter(query.direct_match, collections))
        return collections

    def __persist(self, dataset_collection_instance, flush=True):
        context = self.model.context
        context.add(dataset_collection_instance)
        if flush:
            context.flush()
        return dataset_collection_instance

    def __recursively_create_collections_for_identifiers(
            self, trans, element_identifiers, hide_source_items,
            copy_elements):
        for index, element_identifier in enumerate(element_identifiers):
            try:
                if element_identifier.get("src", None) != "new_collection":
                    # not a new collection, keep moving...
                    continue
            except KeyError:
                # Not a dictionary, just an id of an HDA - move along.
                continue

            # element identifier is a dict with src new_collection...
            collection_type = element_identifier.get("collection_type", None)
            collection = self.create_dataset_collection(
                trans=trans,
                collection_type=collection_type,
                element_identifiers=element_identifier["element_identifiers"],
                hide_source_items=hide_source_items,
                copy_elements=copy_elements,
            )
            element_identifier["__object__"] = collection

        return element_identifiers

    def __recursively_create_collections_for_elements(self, trans, elements,
                                                      hide_source_items,
                                                      copy_elements):
        if elements is self.ELEMENTS_UNINITIALIZED:
            return

        new_elements = odict.odict()
        for key, element in elements.items():
            if isinstance(element, model.DatasetCollection):
                continue

            if element.get("src", None) != "new_collection":
                continue

            # element is a dict with src new_collection and
            # and odict of named elements
            collection_type = element.get("collection_type", None)
            sub_elements = element["elements"]
            collection = self.create_dataset_collection(
                trans=trans,
                collection_type=collection_type,
                elements=sub_elements,
                hide_source_items=hide_source_items,
                copy_elements=copy_elements)
            new_elements[key] = collection
        elements.update(new_elements)

    def __load_elements(self,
                        trans,
                        element_identifiers,
                        hide_source_items=False,
                        copy_elements=False):
        elements = odict.odict()
        for element_identifier in element_identifiers:
            elements[element_identifier["name"]] = self.__load_element(
                trans,
                element_identifier=element_identifier,
                hide_source_items=hide_source_items,
                copy_elements=copy_elements)
        return elements

    def __load_element(self, trans, element_identifier, hide_source_items,
                       copy_elements):
        # if not isinstance( element_identifier, dict ):
        #    # Is allowing this to just be the id of an hda too clever? Somewhat
        #    # consistent with other API methods though.
        #    element_identifier = dict( src='hda', id=str( element_identifier ) )

        # Previously created collection already found in request, just pass
        # through as is.
        if "__object__" in element_identifier:
            the_object = element_identifier["__object__"]
            if the_object is not None and the_object.id:
                context = self.model.context
                if the_object not in context:
                    the_object = context.query(type(the_object)).get(
                        the_object.id)
            return the_object

        # dataset_identifier is dict {src=hda|ldda|hdca|new_collection, id=<encoded_id>}
        try:
            src_type = element_identifier.get('src', 'hda')
        except AttributeError:
            raise MessageException(
                "Dataset collection element definition (%s) not dictionary-like."
                % element_identifier)
        encoded_id = element_identifier.get('id', None)
        if not src_type or not encoded_id:
            message_template = "Problem decoding element identifier %s - must contain a 'src' and a 'id'."
            message = message_template % element_identifier
            raise RequestParameterInvalidException(message)

        tags = element_identifier.pop('tags', None)
        tag_str = ''
        if tags:
            tag_str = ",".join(str(_) for _ in tags)
        if src_type == 'hda':
            decoded_id = int(trans.app.security.decode_id(encoded_id))
            hda = self.hda_manager.get_accessible(decoded_id, trans.user)
            if copy_elements:
                element = self.hda_manager.copy(hda,
                                                history=trans.history,
                                                hide_copy=True)
            else:
                element = hda
            if hide_source_items and self.hda_manager.get_owned(
                    hda.id, user=trans.user, current_history=trans.history):
                hda.visible = False
            self.tag_manager.apply_item_tags(user=trans.user,
                                             item=element,
                                             tags_str=tag_str)
        elif src_type == 'ldda':
            element = self.ldda_manager.get(trans,
                                            encoded_id,
                                            check_accessible=True)
            element = element.to_history_dataset_association(
                trans.history, add_to_history=True)
            self.tag_manager.apply_item_tags(user=trans.user,
                                             item=element,
                                             tags_str=tag_str)
        elif src_type == 'hdca':
            # TODO: Option to copy? Force copy? Copy or allow if not owned?
            element = self.__get_history_collection_instance(
                trans, encoded_id).collection
        # TODO: ldca.
        else:
            raise RequestParameterInvalidException(
                "Unknown src_type parameter supplied '%s'." % src_type)
        return element

    def match_collections(self, collections_to_match):
        """
        May seem odd to place it here, but planning to grow sophistication and
        get plugin types involved so it will likely make sense in the future.
        """
        return MatchingCollections.for_collections(
            collections_to_match, self.collection_type_descriptions)

    def get_dataset_collection_instance(self, trans, instance_type, id,
                                        **kwds):
        """
        """
        if instance_type == "history":
            return self.__get_history_collection_instance(trans, id, **kwds)
        elif instance_type == "library":
            return self.__get_library_collection_instance(trans, id, **kwds)

    def get_dataset_collection(self, trans, encoded_id):
        collection_id = int(trans.app.security.decode_id(encoded_id))
        collection = trans.sa_session.query(
            trans.app.model.DatasetCollection).get(collection_id)
        return collection

    def apply_rules(self, hdca, rule_set, handle_dataset):
        hdca_collection = hdca.collection
        collection_type = hdca_collection.collection_type
        elements = hdca_collection.elements
        collection_type_description = self.collection_type_descriptions.for_collection_type(
            collection_type)
        initial_data, initial_sources = self.__init_rule_data(
            elements, collection_type_description)
        data, sources = rule_set.apply(initial_data, initial_sources)

        collection_type = rule_set.collection_type
        collection_type_description = self.collection_type_descriptions.for_collection_type(
            collection_type)
        elements = self._build_elements_from_rule_data(
            collection_type_description, rule_set, data, sources,
            handle_dataset)
        return elements

    def _build_elements_from_rule_data(self, collection_type_description,
                                       rule_set, data, sources,
                                       handle_dataset):
        identifier_columns = rule_set.identifier_columns
        elements = odict.odict()
        for data_index, row_data in enumerate(data):
            # For each row, find place in depth for this element.
            collection_type_at_depth = collection_type_description
            elements_at_depth = elements

            for i, identifier_column in enumerate(identifier_columns):
                identifier = row_data[identifier_column]

                if i + 1 == len(identifier_columns):
                    # At correct final position in nested structure for this dataset.
                    if collection_type_at_depth.collection_type == "paired":
                        if identifier.lower() in ["f", "1", "r1", "forward"]:
                            identifier = "forward"
                        elif identifier.lower() in ["r", "2", "r2", "reverse"]:
                            identifier = "reverse"
                        else:
                            raise Exception(
                                "Unknown indicator of paired status encountered - only values of F, R, 1, 2, R1, R2, forward, or reverse are allowed."
                            )

                    elements_at_depth[identifier] = handle_dataset(
                        sources[data_index]["dataset"])
                else:
                    collection_type_at_depth = collection_type_at_depth.child_collection_type_description(
                    )
                    found = False
                    if identifier in elements_at_depth:
                        elements_at_depth = elements_at_depth[identifier][
                            "elements"]
                        found = True

                    if not found:
                        sub_collection = {}
                        sub_collection["src"] = "new_collection"
                        sub_collection[
                            "collection_type"] = collection_type_at_depth.collection_type
                        sub_collection["elements"] = odict.odict()
                        elements_at_depth[identifier] = sub_collection
                        elements_at_depth = sub_collection["elements"]

        return elements

    def __init_rule_data(self,
                         elements,
                         collection_type_description,
                         parent_identifiers=None):
        parent_identifiers = parent_identifiers or []
        data, sources = [], []
        for element in elements:
            element_object = element.element_object
            identifiers = parent_identifiers + [element.element_identifier]
            if not element.is_collection:
                data.append([])
                source = {
                    "identifiers": identifiers,
                    "dataset": element_object,
                    "tags": element_object.make_tag_string_list()
                }
                sources.append(source)
            else:
                child_collection_type_description = collection_type_description.child_collection_type_description(
                )
                element_data, element_sources = self.__init_rule_data(
                    element_object.elements, child_collection_type_description,
                    identifiers)
                data.extend(element_data)
                sources.extend(element_sources)

        return data, sources

    def __get_history_collection_instance(self,
                                          trans,
                                          id,
                                          check_ownership=False,
                                          check_accessible=True):
        instance_id = int(trans.app.security.decode_id(id))
        collection_instance = trans.sa_session.query(
            trans.app.model.HistoryDatasetCollectionAssociation).get(
                instance_id)
        history = getattr(trans, 'history', collection_instance.history)
        if check_ownership:
            self.history_manager.error_unless_owner(
                collection_instance.history,
                trans.user,
                current_history=history)
        if check_accessible:
            self.history_manager.error_unless_accessible(
                collection_instance.history,
                trans.user,
                current_history=history)
        return collection_instance

    def __get_library_collection_instance(self,
                                          trans,
                                          id,
                                          check_ownership=False,
                                          check_accessible=True):
        if check_ownership:
            raise NotImplemented(
                "Functionality (getting library dataset collection with ownership check) unimplemented."
            )
        instance_id = int(trans.security.decode_id(id))
        collection_instance = trans.sa_session.query(
            trans.app.model.LibraryDatasetCollectionAssociation).get(
                instance_id)
        if check_accessible:
            if not trans.app.security_agent.can_access_library_item(
                    trans.get_current_user_roles(), collection_instance,
                    trans.user):
                raise ItemAccessibilityException(
                    "LibraryDatasetCollectionAssociation is not accessible to the current user",
                    type='error')
        return collection_instance
Example #6
0
class DatasetCollectionManager(object):
    """
    Abstraction for interfacing with dataset collections instance - ideally abstracts
    out model and plugin details.
    """
    ELEMENTS_UNINITIALIZED = object()

    def __init__(self, app):
        self.type_registry = DatasetCollectionTypesRegistry(app)
        self.collection_type_descriptions = CollectionTypeDescriptionFactory(self.type_registry)
        self.model = app.model
        self.security = app.security

        self.hda_manager = hdas.HDAManager(app)
        self.history_manager = histories.HistoryManager(app)
        self.tag_manager = tags.GalaxyTagManager(app.model.context)
        self.ldda_manager = lddas.LDDAManager(app)

    def precreate_dataset_collection_instance(self, trans, parent, name, structure, implicit_inputs=None, implicit_output_name=None, tags=None):
        # TODO: prebuild all required HIDs and send them in so no need to flush in between.
        dataset_collection = self.precreate_dataset_collection(structure, allow_unitialized_element=implicit_output_name is not None)
        instance = self._create_instance_for_collection(
            trans, parent, name, dataset_collection, implicit_inputs=implicit_inputs, implicit_output_name=implicit_output_name, flush=False, tags=tags
        )
        return instance

    def precreate_dataset_collection(self, structure, allow_unitialized_element=True):
        has_structure = not structure.is_leaf and structure.children_known
        if not has_structure and allow_unitialized_element:
            dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
        elif not has_structure:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
        else:
            collection_type_description = structure.collection_type_description
            dataset_collection = model.DatasetCollection(populated=False)
            dataset_collection.collection_type = collection_type_description.collection_type
            elements = []
            for index, (identifier, substructure) in enumerate(structure.children):
                # TODO: Open question - populate these now or later?
                if substructure.is_leaf:
                    element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
                else:
                    element = self.precreate_dataset_collection(substructure, allow_unitialized_element=allow_unitialized_element)

                element = model.DatasetCollectionElement(
                    element=element,
                    element_identifier=identifier,
                    element_index=index,
                )
                elements.append(element)
            dataset_collection.elements = elements
            dataset_collection.element_count = len(elements)

        return dataset_collection

    def create(self, trans, parent, name, collection_type, element_identifiers=None,
               elements=None, implicit_collection_info=None, trusted_identifiers=None,
               hide_source_items=False, tags=None, copy_elements=False):
        """
        PRECONDITION: security checks on ability to add to parent
        occurred during load.
        """
        # Trust embedded, newly created objects created by tool subsystem.
        if trusted_identifiers is None:
            trusted_identifiers = implicit_collection_info is not None

        if element_identifiers and not trusted_identifiers:
            validate_input_element_identifiers(element_identifiers)

        dataset_collection = self.create_dataset_collection(
            trans=trans,
            collection_type=collection_type,
            element_identifiers=element_identifiers,
            elements=elements,
            hide_source_items=hide_source_items,
            copy_elements=copy_elements,
        )

        implicit_inputs = []
        if implicit_collection_info:
            implicit_inputs = implicit_collection_info.get('implicit_inputs', [])

        implicit_output_name = None
        if implicit_collection_info:
            implicit_output_name = implicit_collection_info["implicit_output_name"]

        return self._create_instance_for_collection(
            trans, parent, name, dataset_collection, implicit_inputs=implicit_inputs, implicit_output_name=implicit_output_name, tags=tags
        )

    def _create_instance_for_collection(self, trans, parent, name, dataset_collection, implicit_output_name=None, implicit_inputs=None, tags=None, flush=True):
        if isinstance(parent, model.History):
            dataset_collection_instance = self.model.HistoryDatasetCollectionAssociation(
                collection=dataset_collection,
                name=name,
            )
            if implicit_inputs:
                for input_name, input_collection in implicit_inputs:
                    dataset_collection_instance.add_implicit_input_collection(input_name, input_collection)

            if implicit_output_name:
                dataset_collection_instance.implicit_output_name = implicit_output_name

            log.debug("Created collection with %d elements" % (len(dataset_collection_instance.collection.elements)))
            # Handle setting hid
            parent.add_dataset_collection(dataset_collection_instance)

        elif isinstance(parent, model.LibraryFolder):
            dataset_collection_instance = self.model.LibraryDatasetCollectionAssociation(
                collection=dataset_collection,
                folder=parent,
                name=name,
            )

        else:
            message = "Internal logic error - create called with unknown parent type %s" % type(parent)
            log.exception(message)
            raise MessageException(message)

        # Tags may be coming in as a dictionary of tag model objects if copying them from other
        # existing Galaxy objects or as a list of strings if the tags are coming from user supplied
        # values.
        if isinstance(tags, list):
            assert implicit_inputs is None, implicit_inputs
            tags = self.tag_manager.add_tags_from_list(trans.user, dataset_collection_instance, tags)
        else:
            tags = self._append_tags(dataset_collection_instance, implicit_inputs, tags)
        return self.__persist(dataset_collection_instance, flush=flush)

    def create_dataset_collection(self, trans, collection_type, element_identifiers=None, elements=None,
                                  hide_source_items=None, copy_elements=False):
        # Make sure at least one of these is None.
        assert element_identifiers is None or elements is None

        if element_identifiers is None and elements is None:
            raise RequestParameterInvalidException(ERROR_INVALID_ELEMENTS_SPECIFICATION)
        if not collection_type:
            raise RequestParameterInvalidException(ERROR_NO_COLLECTION_TYPE)

        collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)
        has_subcollections = collection_type_description.has_subcollections()
        # If we have elements, this is an internal request, don't need to load
        # objects from identifiers.
        if elements is None:
            elements = self._element_identifiers_to_elements(trans,
                                                             collection_type_description=collection_type_description,
                                                             element_identifiers=element_identifiers,
                                                             hide_source_items=hide_source_items,
                                                             copy_elements=copy_elements)
        else:
            if has_subcollections:
                # Nested collection - recursively create collections as needed.
                self.__recursively_create_collections_for_elements(trans, elements, hide_source_items, copy_elements=copy_elements)
        # else if elements is set, it better be an ordered dict!

        if elements is not self.ELEMENTS_UNINITIALIZED:
            type_plugin = collection_type_description.rank_type_plugin()
            dataset_collection = builder.build_collection(type_plugin, elements)
        else:
            dataset_collection = model.DatasetCollection(populated=False)
        dataset_collection.collection_type = collection_type
        return dataset_collection

    def _element_identifiers_to_elements(self,
                                         trans,
                                         collection_type_description,
                                         element_identifiers,
                                         hide_source_items=False,
                                         copy_elements=False):
        if collection_type_description.has_subcollections():
            # Nested collection - recursively create collections and update identifiers.
            self.__recursively_create_collections_for_identifiers(trans, element_identifiers, hide_source_items, copy_elements)
        new_collection = False
        for element_identifier in element_identifiers:
            if element_identifier.get("src") == "new_collection" and element_identifier.get('collection_type') == '':
                new_collection = True
                elements = self.__load_elements(trans=trans,
                                                element_identifiers=element_identifier['element_identifiers'],
                                                hide_source_items=hide_source_items,
                                                copy_elements=copy_elements)
        if not new_collection:
            elements = self.__load_elements(trans=trans,
                                            element_identifiers=element_identifiers,
                                            hide_source_items=hide_source_items,
                                            copy_elements=copy_elements)
        return elements

    def _append_tags(self, dataset_collection_instance, implicit_inputs=None, tags=None):
        tags = tags or {}
        implicit_inputs = implicit_inputs or []
        for _, v in implicit_inputs:
            for tag in v.auto_propagated_tags:
                tags[tag.value] = tag
        for _, tag in tags.items():
            dataset_collection_instance.tags.append(tag.copy(cls=model.HistoryDatasetCollectionTagAssociation))

    def set_collection_elements(self, dataset_collection, dataset_instances):
        if dataset_collection.populated:
            raise Exception("Cannot reset elements of an already populated dataset collection.")

        collection_type = dataset_collection.collection_type
        collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)
        type_plugin = collection_type_description.rank_type_plugin()
        builder.set_collection_elements(dataset_collection, type_plugin, dataset_instances)
        dataset_collection.mark_as_populated()

        return dataset_collection

    def collection_builder_for(self, dataset_collection):
        collection_type = dataset_collection.collection_type
        collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)
        return builder.BoundCollectionBuilder(dataset_collection, collection_type_description)

    def delete(self, trans, instance_type, id, recursive=False, purge=False):
        dataset_collection_instance = self.get_dataset_collection_instance(trans, instance_type, id, check_ownership=True)
        dataset_collection_instance.deleted = True
        trans.sa_session.add(dataset_collection_instance)

        if recursive:
            for dataset in dataset_collection_instance.collection.dataset_instances:
                self.hda_manager.error_unless_owner(dataset, user=trans.get_user(), current_history=trans.history)
                if not dataset.deleted:
                    dataset.deleted = True

                if purge and not dataset.purged:
                    self.hda_manager.purge(dataset)

        trans.sa_session.flush()

    def update(self, trans, instance_type, id, payload):
        dataset_collection_instance = self.get_dataset_collection_instance(trans, instance_type, id, check_ownership=True)
        if trans.user is None:
            anon_allowed_payload = {}
            if 'deleted' in payload:
                anon_allowed_payload['deleted'] = payload['deleted']
            if 'visible' in payload:
                anon_allowed_payload['visible'] = payload['visible']
            payload = self._validate_and_parse_update_payload(anon_allowed_payload)
        else:
            payload = self._validate_and_parse_update_payload(payload)
        changed = self._set_from_dict(trans, dataset_collection_instance, payload)
        return changed

    def copy(self, trans, parent, source, encoded_source_id, copy_elements=False):
        """
        PRECONDITION: security checks on ability to add to parent occurred
        during load.
        """
        assert source == "hdca"  # for now
        source_hdca = self.__get_history_collection_instance(trans, encoded_source_id)
        copy_kwds = {}
        if copy_elements:
            copy_kwds["element_destination"] = parent
        new_hdca = source_hdca.copy(**copy_kwds)
        tags_str = self.tag_manager.get_tags_str(source_hdca.tags)
        self.tag_manager.apply_item_tags(trans.get_user(), new_hdca, tags_str)
        parent.add_dataset_collection(new_hdca)
        trans.sa_session.add(new_hdca)
        trans.sa_session.flush()
        return new_hdca

    def _set_from_dict(self, trans, dataset_collection_instance, new_data):
        # send what we can down into the model
        changed = dataset_collection_instance.set_from_dict(new_data)
        # the rest (often involving the trans) - do here
        if 'annotation' in new_data.keys() and trans.get_user():
            dataset_collection_instance.add_item_annotation(trans.sa_session, trans.get_user(), dataset_collection_instance, new_data['annotation'])
            changed['annotation'] = new_data['annotation']
        if 'tags' in new_data.keys() and trans.get_user():
            # set_tags_from_list will flush on its own, no need to add to 'changed' here and incur a second flush.
            self.tag_manager.set_tags_from_list(trans.get_user(), dataset_collection_instance, new_data['tags'])

        if changed.keys():
            trans.sa_session.flush()

        return changed

    def _validate_and_parse_update_payload(self, payload):
        validated_payload = {}
        for key, val in payload.items():
            if val is None:
                continue
            if key in ('name'):
                val = validation.validate_and_sanitize_basestring(key, val)
                validated_payload[key] = val
            if key in ('deleted', 'visible'):
                validated_payload[key] = validation.validate_boolean(key, val)
            elif key == 'tags':
                validated_payload[key] = validation.validate_and_sanitize_basestring_list(key, val)
        return validated_payload

    def history_dataset_collections(self, history, query):
        collections = history.active_dataset_collections
        collections = list(filter(query.direct_match, collections))
        return collections

    def __persist(self, dataset_collection_instance, flush=True):
        context = self.model.context
        context.add(dataset_collection_instance)
        if flush:
            context.flush()
        return dataset_collection_instance

    def __recursively_create_collections_for_identifiers(self, trans, element_identifiers, hide_source_items, copy_elements):
        for index, element_identifier in enumerate(element_identifiers):
            try:
                if element_identifier.get("src", None) != "new_collection":
                    # not a new collection, keep moving...
                    continue
            except KeyError:
                # Not a dictionary, just an id of an HDA - move along.
                continue

            # element identifier is a dict with src new_collection...
            collection_type = element_identifier.get("collection_type", None)
            collection = self.create_dataset_collection(
                trans=trans,
                collection_type=collection_type,
                element_identifiers=element_identifier["element_identifiers"],
                hide_source_items=hide_source_items,
                copy_elements=copy_elements,
            )
            element_identifier["__object__"] = collection

        return element_identifiers

    def __recursively_create_collections_for_elements(self, trans, elements, hide_source_items, copy_elements):
        if elements is self.ELEMENTS_UNINITIALIZED:
            return

        new_elements = odict.odict()
        for key, element in elements.items():
            if isinstance(element, model.DatasetCollection):
                continue

            if element.get("src", None) != "new_collection":
                continue

            # element is a dict with src new_collection and
            # and odict of named elements
            collection_type = element.get("collection_type", None)
            sub_elements = element["elements"]
            collection = self.create_dataset_collection(
                trans=trans,
                collection_type=collection_type,
                elements=sub_elements,
                hide_source_items=hide_source_items,
                copy_elements=copy_elements
            )
            new_elements[key] = collection
        elements.update(new_elements)

    def __load_elements(self, trans, element_identifiers, hide_source_items=False, copy_elements=False):
        elements = odict.odict()
        for element_identifier in element_identifiers:
            elements[element_identifier["name"]] = self.__load_element(trans,
                                                                       element_identifier=element_identifier,
                                                                       hide_source_items=hide_source_items,
                                                                       copy_elements=copy_elements)
        return elements

    def __load_element(self, trans, element_identifier, hide_source_items, copy_elements):
        # if not isinstance( element_identifier, dict ):
        #    # Is allowing this to just be the id of an hda too clever? Somewhat
        #    # consistent with other API methods though.
        #    element_identifier = dict( src='hda', id=str( element_identifier ) )

        # Previously created collection already found in request, just pass
        # through as is.
        if "__object__" in element_identifier:
            the_object = element_identifier["__object__"]
            if the_object is not None and the_object.id:
                context = self.model.context
                if the_object not in context:
                    the_object = context.query(type(the_object)).get(the_object.id)
            return the_object

        # dataset_identifier is dict {src=hda|ldda|hdca|new_collection, id=<encoded_id>}
        try:
            src_type = element_identifier.get('src', 'hda')
        except AttributeError:
            raise MessageException("Dataset collection element definition (%s) not dictionary-like." % element_identifier)
        encoded_id = element_identifier.get('id', None)
        if not src_type or not encoded_id:
            message_template = "Problem decoding element identifier %s - must contain a 'src' and a 'id'."
            message = message_template % element_identifier
            raise RequestParameterInvalidException(message)

        tags = element_identifier.pop('tags', None)
        tag_str = ''
        if tags:
            tag_str = ",".join(str(_) for _ in tags)
        if src_type == 'hda':
            decoded_id = int(trans.app.security.decode_id(encoded_id))
            hda = self.hda_manager.get_accessible(decoded_id, trans.user)
            if copy_elements:
                element = self.hda_manager.copy(hda, history=trans.history, hide_copy=True)
            else:
                element = hda
            if hide_source_items and self.hda_manager.get_owned(hda.id, user=trans.user, current_history=trans.history):
                hda.visible = False
            self.tag_manager.apply_item_tags(user=trans.user, item=element, tags_str=tag_str)
        elif src_type == 'ldda':
            element = self.ldda_manager.get(trans, encoded_id, check_accessible=True)
            element = element.to_history_dataset_association(trans.history, add_to_history=True)
            self.tag_manager.apply_item_tags(user=trans.user, item=element, tags_str=tag_str)
        elif src_type == 'hdca':
            # TODO: Option to copy? Force copy? Copy or allow if not owned?
            element = self.__get_history_collection_instance(trans, encoded_id).collection
        # TODO: ldca.
        else:
            raise RequestParameterInvalidException("Unknown src_type parameter supplied '%s'." % src_type)
        return element

    def match_collections(self, collections_to_match):
        """
        May seem odd to place it here, but planning to grow sophistication and
        get plugin types involved so it will likely make sense in the future.
        """
        return MatchingCollections.for_collections(collections_to_match, self.collection_type_descriptions)

    def get_dataset_collection_instance(self, trans, instance_type, id, **kwds):
        """
        """
        if instance_type == "history":
            return self.__get_history_collection_instance(trans, id, **kwds)
        elif instance_type == "library":
            return self.__get_library_collection_instance(trans, id, **kwds)

    def get_dataset_collection(self, trans, encoded_id):
        collection_id = int(trans.app.security.decode_id(encoded_id))
        collection = trans.sa_session.query(trans.app.model.DatasetCollection).get(collection_id)
        return collection

    def apply_rules(self, hdca, rule_set, handle_dataset):
        hdca_collection = hdca.collection
        collection_type = hdca_collection.collection_type
        elements = hdca_collection.elements
        collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)
        initial_data, initial_sources = self.__init_rule_data(elements, collection_type_description)
        data, sources = rule_set.apply(initial_data, initial_sources)

        collection_type = rule_set.collection_type
        collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)
        elements = self._build_elements_from_rule_data(collection_type_description, rule_set, data, sources, handle_dataset)
        return elements

    def _build_elements_from_rule_data(self, collection_type_description, rule_set, data, sources, handle_dataset):
        identifier_columns = rule_set.identifier_columns
        elements = odict.odict()
        for data_index, row_data in enumerate(data):
            # For each row, find place in depth for this element.
            collection_type_at_depth = collection_type_description
            elements_at_depth = elements

            for i, identifier_column in enumerate(identifier_columns):
                identifier = row_data[identifier_column]

                if i + 1 == len(identifier_columns):
                    # At correct final position in nested structure for this dataset.
                    if collection_type_at_depth.collection_type == "paired":
                        if identifier.lower() in ["f", "1", "r1", "forward"]:
                            identifier = "forward"
                        elif identifier.lower() in ["r", "2", "r2", "reverse"]:
                            identifier = "reverse"
                        else:
                            raise Exception("Unknown indicator of paired status encountered - only values of F, R, 1, 2, R1, R2, forward, or reverse are allowed.")

                    elements_at_depth[identifier] = handle_dataset(sources[data_index]["dataset"])
                else:
                    collection_type_at_depth = collection_type_at_depth.child_collection_type_description()
                    found = False
                    if identifier in elements_at_depth:
                        elements_at_depth = elements_at_depth[identifier]["elements"]
                        found = True

                    if not found:
                        sub_collection = {}
                        sub_collection["src"] = "new_collection"
                        sub_collection["collection_type"] = collection_type_at_depth.collection_type
                        sub_collection["elements"] = odict.odict()
                        elements_at_depth[identifier] = sub_collection
                        elements_at_depth = sub_collection["elements"]

        return elements

    def __init_rule_data(self, elements, collection_type_description, parent_identifiers=None):
        parent_identifiers = parent_identifiers or []
        data, sources = [], []
        for element in elements:
            element_object = element.element_object
            identifiers = parent_identifiers + [element.element_identifier]
            if not element.is_collection:
                data.append([])
                source = {"identifiers": identifiers, "dataset": element_object, "tags": element_object.make_tag_string_list()}
                sources.append(source)
            else:
                child_collection_type_description = collection_type_description.child_collection_type_description()
                element_data, element_sources = self.__init_rule_data(
                    element_object.elements, child_collection_type_description, identifiers
                )
                data.extend(element_data)
                sources.extend(element_sources)

        return data, sources

    def __get_history_collection_instance(self, trans, id, check_ownership=False, check_accessible=True):
        instance_id = int(trans.app.security.decode_id(id))
        collection_instance = trans.sa_session.query(trans.app.model.HistoryDatasetCollectionAssociation).get(instance_id)
        history = getattr(trans, 'history', collection_instance.history)
        if check_ownership:
            self.history_manager.error_unless_owner(collection_instance.history, trans.user, current_history=history)
        if check_accessible:
            self.history_manager.error_unless_accessible(collection_instance.history, trans.user, current_history=history)
        return collection_instance

    def __get_library_collection_instance(self, trans, id, check_ownership=False, check_accessible=True):
        if check_ownership:
            raise NotImplementedError("Functionality (getting library dataset collection with ownership check) unimplemented.")
        instance_id = int(trans.security.decode_id(id))
        collection_instance = trans.sa_session.query(trans.app.model.LibraryDatasetCollectionAssociation).get(instance_id)
        if check_accessible:
            if not trans.app.security_agent.can_access_library_item(trans.get_current_user_roles(), collection_instance, trans.user):
                raise ItemAccessibilityException("LibraryDatasetCollectionAssociation is not accessible to the current user", type='error')
        return collection_instance