Beispiel #1
0
        def _remap(container, line):
            id_match = re.search(UNENCODED_ID_PATTERN, line)
            object_id = None
            encoded_id = None
            if id_match:
                object_id = int(id_match.group(2))
                encoded_id = trans.security.encode_id(object_id)
                line = line.replace(id_match.group(),
                                    "%s=%s" % (id_match.group(1), encoded_id))

            if container == "history_dataset_display":
                assert object_id is not None
                hda = hda_manager.get_accessible(object_id, trans.user)
                rval = self.handle_dataset_display(line, hda)
            elif container == "history_dataset_embedded":
                assert object_id is not None
                hda = hda_manager.get_accessible(object_id, trans.user)
                rval = self.handle_dataset_embedded(line, hda)
            elif container == "history_dataset_as_image":
                assert object_id is not None
                hda = hda_manager.get_accessible(object_id, trans.user)
                rval = self.handle_dataset_as_image(line, hda)
            elif container == "history_dataset_peek":
                assert object_id is not None
                hda = hda_manager.get_accessible(object_id, trans.user)
                rval = self.handle_dataset_peek(line, hda)
            elif container == "history_dataset_info":
                assert object_id is not None
                hda = hda_manager.get_accessible(object_id, trans.user)
                rval = self.handle_dataset_info(line, hda)
            elif container == "workflow_display":
                stored_workflow = workflow_manager.get_stored_accessible_workflow(
                    trans, encoded_id)
                # TODO: should be workflow id...
                rval = self.handle_workflow_display(line, stored_workflow)
            elif container == "history_dataset_collection_display":
                hdca = collection_manager.get_dataset_collection_instance(
                    trans, "history", encoded_id)
                rval = self.handle_dataset_collection_display(line, hdca)
            elif container == "tool_stdout":
                job = job_manager.get_accessible_job(trans, object_id)
                rval = self.handle_tool_stdout(line, job)
            elif container == "tool_stderr":
                job = job_manager.get_accessible_job(trans, object_id)
                rval = self.handle_tool_stdout(line, job)
            elif container == "job_parameters":
                job = job_manager.get_accessible_job(trans, object_id)
                rval = self.handle_job_parameters(line, job)
            elif container == "job_metrics":
                job = job_manager.get_accessible_job(trans, object_id)
                rval = self.handle_job_metrics(line, job)
            else:
                raise MalformedContents(
                    "Unknown Galaxy Markdown directive encountered [%s]" %
                    container)
            if rval is not None:
                return rval
            else:
                return (line, False)
Beispiel #2
0
def contains_required_data(response=None, is_new=False, **kwargs):
    """
    This function is called as part of authentication and authorization
    pipeline before user is authenticated or authorized (see AUTH_PIPELINE).

    This function asserts if all the data required by Galaxy for a user
    is provided. It raises an exception if any of the required data is missing,
    and returns void if otherwise.

    :type  response: dict
    :param response:  a dictionary containing decoded response from
                      OIDC backend that contain the following keys
                      among others:

                        -   id_token;       see: http://openid.net/specs/openid-connect-core-1_0.html#IDToken
                        -   access_token;   see: https://tools.ietf.org/html/rfc6749#section-1.4
                        -   refresh_token;  see: https://tools.ietf.org/html/rfc6749#section-1.5
                        -   token_type;     see: https://tools.ietf.org/html/rfc6750#section-6.1.1
                        -   scope;          see: http://openid.net/specs/openid-connect-core-1_0.html#AuthRequest
                        -   expires_in;     is the expiration time of the access and ID tokens in seconds since
                                            the response was generated.

    :type  is_new: bool
    :param is_new: has the user been authenticated?

    :param kwargs:      may contain the following keys among others:

                        -   uid:        user ID
                        -   user:       Galaxy user; if user is already authenticated
                        -   backend:    the backend that is used for user authentication.
                        -   storage:    an instance of Storage class.
                        -   strategy:   an instance of the Strategy class.
                        -   state:      the state code received from identity provider.
                        -   details:    details about the user's third-party identity as requested in `scope`.

    :rtype:  void
    :return: Raises an exception if any of the required arguments is missing, and pass if all are given.
    """
    hint_msg = "Visit the identity provider's permitted applications page " \
               "(e.g., visit `https://myaccount.google.com/u/0/permissions` " \
               "for Google), then revoke the access of this Galaxy instance, " \
               "and then retry to login. If the problem persists, contact " \
               "the Admin of this Galaxy instance."
    if response is None or not isinstance(response, dict):
        # This can happen only if PSA is not able to decode the `authnz code`
        # sent back from the identity provider. PSA internally handles such
        # scenarios; however, this case is implemented to prevent uncaught
        # server-side errors.
        raise MalformedContents(err_msg=f"`response` not found. {hint_msg}")
    if not response.get("id_token"):
        # This can happen if a non-OIDC compliant backend is used;
        # e.g., an OAuth2.0-based backend that only generates access token.
        raise MalformedContents(err_msg=f"Missing identity token. {hint_msg}")
    if is_new and not response.get("refresh_token"):
        # An identity provider (e.g., Google) sends a refresh token the first
        # time user consents Galaxy's access (i.e., the first time user logs in
        # to a galaxy instance using their credentials with the identity provider).
        # There could be variety of scenarios under which a refresh token might
        # be missing; e.g., a manipulated Galaxy's database, where a user's records
        # from galaxy_user and oidc_user_authnz_tokens tables deleted after the
        # user has provided consent. This can also happen under dev efforts.
        # The solution is to revoke the consent by visiting the identity provider's
        # website, and then retry the login process.
        raise MalformedContents(err_msg=f"Missing refresh token. {hint_msg}")
Beispiel #3
0
 def _remap(container, line):
     id_match = re.search(UNENCODED_ID_PATTERN, line)
     object_id = None
     encoded_id = None
     if id_match:
         object_id = int(id_match.group(2))
         encoded_id = trans.security.encode_id(object_id)
         line = line.replace(id_match.group(), f"{id_match.group(1)}={encoded_id}")
     if container == "history_link":
         _check_object(object_id, line)
         history = history_manager.get_accessible(object_id, trans.user)
         rval = self.handle_history_link(line, history)
     elif container == "history_dataset_display":
         _check_object(object_id, line)
         hda = hda_manager.get_accessible(object_id, trans.user)
         rval = self.handle_dataset_display(line, hda)
     elif container == "history_dataset_link":
         _check_object(object_id, line)
         hda = hda_manager.get_accessible(object_id, trans.user)
         rval = self.handle_dataset_display(line, hda)
     elif container == "history_dataset_index":
         _check_object(object_id, line)
         hda = hda_manager.get_accessible(object_id, trans.user)
         rval = self.handle_dataset_display(line, hda)
     elif container == "history_dataset_embedded":
         _check_object(object_id, line)
         hda = hda_manager.get_accessible(object_id, trans.user)
         rval = self.handle_dataset_embedded(line, hda)
     elif container == "history_dataset_as_image":
         _check_object(object_id, line)
         hda = hda_manager.get_accessible(object_id, trans.user)
         rval = self.handle_dataset_as_image(line, hda)
     elif container == "history_dataset_peek":
         _check_object(object_id, line)
         hda = hda_manager.get_accessible(object_id, trans.user)
         rval = self.handle_dataset_peek(line, hda)
     elif container == "history_dataset_info":
         _check_object(object_id, line)
         hda = hda_manager.get_accessible(object_id, trans.user)
         rval = self.handle_dataset_info(line, hda)
     elif container == "history_dataset_type":
         _check_object(object_id, line)
         hda = hda_manager.get_accessible(object_id, trans.user)
         rval = self.handle_dataset_type(line, hda)
     elif container == "history_dataset_name":
         _check_object(object_id, line)
         hda = hda_manager.get_accessible(object_id, trans.user)
         rval = self.handle_dataset_name(line, hda)
     elif container == "workflow_display":
         stored_workflow = workflow_manager.get_stored_accessible_workflow(trans, encoded_id)
         rval = self.handle_workflow_display(line, stored_workflow)
     elif container == "history_dataset_collection_display":
         hdca = collection_manager.get_dataset_collection_instance(trans, "history", encoded_id)
         rval = self.handle_dataset_collection_display(line, hdca)
     elif container == "tool_stdout":
         job = job_manager.get_accessible_job(trans, object_id)
         rval = self.handle_tool_stdout(line, job)
     elif container == "tool_stderr":
         job = job_manager.get_accessible_job(trans, object_id)
         rval = self.handle_tool_stderr(line, job)
     elif container == "job_parameters":
         job = job_manager.get_accessible_job(trans, object_id)
         rval = self.handle_job_parameters(line, job)
     elif container == "job_metrics":
         job = job_manager.get_accessible_job(trans, object_id)
         rval = self.handle_job_metrics(line, job)
     elif container == "generate_galaxy_version":
         version = trans.app.config.version_major
         rval = self.handle_generate_galaxy_version(line, version)
     elif container == "generate_time":
         rval = self.handle_generate_time(line, now())
     elif container == "invocation_time":
         invocation = workflow_manager.get_invocation(trans, object_id)
         rval = self.handle_invocation_time(line, invocation)
     elif container == "visualization":
         rval = None
     else:
         raise MalformedContents(f"Unknown Galaxy Markdown directive encountered [{container}].")
     if rval is not None:
         return rval
     else:
         return (line, False)
Beispiel #4
0
 def _check_object(object_id, line):
     if object_id is None:
         raise MalformedContents(f"Missing object identifier [{line}].")
Beispiel #5
0
def _validate(*args, **kwds):
    """Light wrapper around validate_galaxy_markdown to throw galaxy exceptions instead of ValueError."""
    try:
        return validate_galaxy_markdown(*args, **kwds)
    except ValueError as e:
        raise MalformedContents(str(e))
Beispiel #6
0
    def cleanup_after_job(self):
        """ Set history, datasets, and jobs' attributes and clean up archive directory. """

        #
        # Helper methods.
        #

        def file_in_dir(file_path, a_dir):
            """ Returns true if file is in directory. """
            abs_file_path = os.path.abspath(file_path)
            return os.path.split(abs_file_path)[0] == a_dir

        def get_tag_str(tag, value):
            """ Builds a tag string for a tag, value pair. """
            if not value:
                return tag
            else:
                return tag + ":" + value

        #
        # Import history.
        #

        jiha = self.sa_session.query(model.JobImportHistoryArchive).filter_by(
            job_id=self.job_id).first()
        if jiha:
            try:
                archive_dir = jiha.archive_dir
                archive_dir = os.path.realpath(archive_dir)
                user = jiha.job.user

                # Bioblend previous to 17.01 exported histories with an extra subdir.
                if not os.path.exists(
                        os.path.join(archive_dir, 'history_attrs.txt')):
                    for d in os.listdir(archive_dir):
                        if os.path.isdir(os.path.join(archive_dir, d)):
                            archive_dir = os.path.join(archive_dir, d)
                            break

                #
                # Create history.
                #
                history_attr_file_name = os.path.join(archive_dir,
                                                      'history_attrs.txt')
                history_attrs = load(open(history_attr_file_name))

                # Create history.
                new_history = model.History(name='imported from archive: %s' %
                                            history_attrs['name'],
                                            user=user)
                new_history.importing = True
                new_history.hid_counter = history_attrs['hid_counter']
                new_history.genome_build = history_attrs['genome_build']
                self.sa_session.add(new_history)
                jiha.history = new_history
                self.sa_session.flush()

                # Add annotation, tags.
                if user:
                    self.add_item_annotation(self.sa_session, user,
                                             new_history,
                                             history_attrs['annotation'])
                    """
                    TODO: figure out to how add tags to item.
                    for tag, value in history_attrs[ 'tags' ].items():
                        trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) )
                    """

                #
                # Create datasets.
                #
                datasets_attrs_file_name = os.path.join(
                    archive_dir, 'datasets_attrs.txt')
                datasets_attrs = load(open(datasets_attrs_file_name))
                provenance_file_name = datasets_attrs_file_name + ".provenance"

                if os.path.exists(provenance_file_name):
                    provenance_attrs = load(open(provenance_file_name))
                    datasets_attrs += provenance_attrs

                # Get counts of how often each dataset file is used; a file can
                # be linked to multiple dataset objects (HDAs).
                datasets_usage_counts = {}
                for dataset_attrs in datasets_attrs:
                    temp_dataset_file_name = \
                        os.path.realpath(os.path.join(archive_dir, dataset_attrs['file_name']))
                    if (temp_dataset_file_name not in datasets_usage_counts):
                        datasets_usage_counts[temp_dataset_file_name] = 0
                    datasets_usage_counts[temp_dataset_file_name] += 1

                # Create datasets.
                for dataset_attrs in datasets_attrs:
                    metadata = dataset_attrs['metadata']

                    # Create dataset and HDA.
                    hda = model.HistoryDatasetAssociation(
                        name=dataset_attrs['name'],
                        extension=dataset_attrs['extension'],
                        info=dataset_attrs['info'],
                        blurb=dataset_attrs['blurb'],
                        peek=dataset_attrs['peek'],
                        designation=dataset_attrs['designation'],
                        visible=dataset_attrs['visible'],
                        dbkey=metadata['dbkey'],
                        metadata=metadata,
                        history=new_history,
                        create_dataset=True,
                        sa_session=self.sa_session)
                    if 'uuid' in dataset_attrs:
                        hda.dataset.uuid = dataset_attrs["uuid"]
                    if dataset_attrs.get('exported', True) is False:
                        hda.state = hda.states.DISCARDED
                        hda.deleted = True
                        hda.purged = True
                    else:
                        hda.state = hda.states.OK
                    self.sa_session.add(hda)
                    self.sa_session.flush()
                    new_history.add_dataset(hda, genome_build=None)
                    hda.hid = dataset_attrs[
                        'hid']  # Overwrite default hid set when HDA added to history.
                    # TODO: Is there a way to recover permissions? Is this needed?
                    # permissions = trans.app.security_agent.history_get_default_permissions( new_history )
                    # trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions )
                    self.sa_session.flush()
                    if dataset_attrs.get('exported', True) is True:
                        # Do security check and move/copy dataset data.
                        temp_dataset_file_name = \
                            os.path.realpath(os.path.abspath(os.path.join(archive_dir, dataset_attrs['file_name'])))
                        if not file_in_dir(
                                temp_dataset_file_name,
                                os.path.join(archive_dir, "datasets")):
                            raise MalformedContents(
                                "Invalid dataset path: %s" %
                                temp_dataset_file_name)
                        if datasets_usage_counts[temp_dataset_file_name] == 1:
                            self.app.object_store.update_from_file(
                                hda.dataset,
                                file_name=temp_dataset_file_name,
                                create=True)

                            # Import additional files if present. Histories exported previously might not have this attribute set.
                            dataset_extra_files_path = dataset_attrs.get(
                                'extra_files_path', None)
                            if dataset_extra_files_path:
                                try:
                                    file_list = os.listdir(
                                        os.path.join(archive_dir,
                                                     dataset_extra_files_path))
                                except OSError:
                                    file_list = []

                                if file_list:
                                    for extra_file in file_list:
                                        self.app.object_store.update_from_file(
                                            hda.dataset,
                                            extra_dir='dataset_%s_files' %
                                            hda.dataset.id,
                                            alt_name=extra_file,
                                            file_name=os.path.join(
                                                archive_dir,
                                                dataset_extra_files_path,
                                                extra_file),
                                            create=True)
                        else:
                            datasets_usage_counts[temp_dataset_file_name] -= 1
                            shutil.copyfile(temp_dataset_file_name,
                                            hda.file_name)
                        hda.dataset.set_total_size(
                        )  # update the filesize record in the database

                    # Set tags, annotations.
                    if user:
                        self.add_item_annotation(self.sa_session, user, hda,
                                                 dataset_attrs['annotation'])
                        # TODO: Set tags.
                        """
                        for tag, value in dataset_attrs[ 'tags' ].items():
                            trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) )
                            self.sa_session.flush()
                        """

                    # Although metadata is set above, need to set metadata to recover BAI for BAMs.
                    if hda.extension == 'bam':
                        self.app.datatypes_registry.set_external_metadata_tool.tool_action.execute_via_app(
                            self.app.datatypes_registry.
                            set_external_metadata_tool,
                            self.app,
                            jiha.job.session_id,
                            new_history.id,
                            jiha.job.user,
                            incoming={'input1': hda},
                            overwrite=False)

                #
                # Create jobs.
                #

                # Decode jobs attributes.
                def as_hda(obj_dct):
                    """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by
                        the encoded object. This only works because HDAs are created above. """
                    if obj_dct.get('__HistoryDatasetAssociation__', False):
                        return self.sa_session.query(model.HistoryDatasetAssociation) \
                            .filter_by(history=new_history, hid=obj_dct['hid']).first()
                    return obj_dct

                jobs_attr_file_name = os.path.join(archive_dir,
                                                   'jobs_attrs.txt')
                jobs_attrs = load(open(jobs_attr_file_name),
                                  object_hook=as_hda)

                # Create each job.
                for job_attrs in jobs_attrs:
                    imported_job = model.Job()
                    imported_job.user = user
                    # TODO: set session?
                    # imported_job.session = trans.get_galaxy_session().id
                    imported_job.history = new_history
                    imported_job.imported = True
                    imported_job.tool_id = job_attrs['tool_id']
                    imported_job.tool_version = job_attrs['tool_version']
                    imported_job.set_state(job_attrs['state'])
                    imported_job.info = job_attrs.get('info', None)
                    imported_job.exit_code = job_attrs.get('exit_code', None)
                    imported_job.traceback = job_attrs.get('traceback', None)
                    imported_job.stdout = job_attrs.get('stdout', None)
                    imported_job.stderr = job_attrs.get('stderr', None)
                    imported_job.command_line = job_attrs.get(
                        'command_line', None)
                    try:
                        imported_job.create_time = datetime.datetime.strptime(
                            job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f")
                        imported_job.update_time = datetime.datetime.strptime(
                            job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f")
                    except Exception:
                        pass
                    self.sa_session.add(imported_job)
                    self.sa_session.flush()

                    class HistoryDatasetAssociationIDEncoder(json.JSONEncoder):
                        """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """
                        def default(self, obj):
                            """ Encode an HDA, default encoding for everything else. """
                            if isinstance(obj,
                                          model.HistoryDatasetAssociation):
                                return obj.id
                            return json.JSONEncoder.default(self, obj)

                    # Set parameters. May be useful to look at metadata.py for creating parameters.
                    # TODO: there may be a better way to set parameters, e.g.:
                    #   for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
                    #       job.add_parameter( name, value )
                    # to make this work, we'd need to flesh out the HDA objects. The code below is
                    # relatively similar.
                    for name, value in job_attrs['params'].items():
                        # Transform parameter values when necessary.
                        if isinstance(value, model.HistoryDatasetAssociation):
                            # HDA input: use hid to find input.
                            input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \
                                            .filter_by(history=new_history, hid=value.hid).first()
                            value = input_hda.id
                        # print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id )
                        imported_job.add_parameter(
                            name,
                            dumps(value,
                                  cls=HistoryDatasetAssociationIDEncoder))

                    # TODO: Connect jobs to input datasets.

                    # Connect jobs to output datasets.
                    for output_hid in job_attrs['output_datasets']:
                        # print "%s job has output dataset %i" % (imported_job.id, output_hid)
                        output_hda = self.sa_session.query(model.HistoryDatasetAssociation) \
                            .filter_by(history=new_history, hid=output_hid).first()
                        if output_hda:
                            imported_job.add_output_dataset(
                                output_hda.name, output_hda)

                    # Connect jobs to input datasets.
                    if 'input_mapping' in job_attrs:
                        for input_name, input_hid in job_attrs[
                                'input_mapping'].items():
                            input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \
                                            .filter_by(history=new_history, hid=input_hid).first()
                            if input_hda:
                                imported_job.add_input_dataset(
                                    input_name, input_hda)

                    self.sa_session.flush()

                # Done importing.
                new_history.importing = False
                self.sa_session.flush()

                # Cleanup.
                if os.path.exists(archive_dir):
                    shutil.rmtree(archive_dir)
            except Exception as e:
                jiha.job.stderr += "Error cleaning up history import job: %s" % e
                self.sa_session.flush()
                raise