def _remap(container, line): id_match = re.search(UNENCODED_ID_PATTERN, line) object_id = None encoded_id = None if id_match: object_id = int(id_match.group(2)) encoded_id = trans.security.encode_id(object_id) line = line.replace(id_match.group(), "%s=%s" % (id_match.group(1), encoded_id)) if container == "history_dataset_display": assert object_id is not None hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_display(line, hda) elif container == "history_dataset_embedded": assert object_id is not None hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_embedded(line, hda) elif container == "history_dataset_as_image": assert object_id is not None hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_as_image(line, hda) elif container == "history_dataset_peek": assert object_id is not None hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_peek(line, hda) elif container == "history_dataset_info": assert object_id is not None hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_info(line, hda) elif container == "workflow_display": stored_workflow = workflow_manager.get_stored_accessible_workflow( trans, encoded_id) # TODO: should be workflow id... rval = self.handle_workflow_display(line, stored_workflow) elif container == "history_dataset_collection_display": hdca = collection_manager.get_dataset_collection_instance( trans, "history", encoded_id) rval = self.handle_dataset_collection_display(line, hdca) elif container == "tool_stdout": job = job_manager.get_accessible_job(trans, object_id) rval = self.handle_tool_stdout(line, job) elif container == "tool_stderr": job = job_manager.get_accessible_job(trans, object_id) rval = self.handle_tool_stdout(line, job) elif container == "job_parameters": job = job_manager.get_accessible_job(trans, object_id) rval = self.handle_job_parameters(line, job) elif container == "job_metrics": job = job_manager.get_accessible_job(trans, object_id) rval = self.handle_job_metrics(line, job) else: raise MalformedContents( "Unknown Galaxy Markdown directive encountered [%s]" % container) if rval is not None: return rval else: return (line, False)
def contains_required_data(response=None, is_new=False, **kwargs): """ This function is called as part of authentication and authorization pipeline before user is authenticated or authorized (see AUTH_PIPELINE). This function asserts if all the data required by Galaxy for a user is provided. It raises an exception if any of the required data is missing, and returns void if otherwise. :type response: dict :param response: a dictionary containing decoded response from OIDC backend that contain the following keys among others: - id_token; see: http://openid.net/specs/openid-connect-core-1_0.html#IDToken - access_token; see: https://tools.ietf.org/html/rfc6749#section-1.4 - refresh_token; see: https://tools.ietf.org/html/rfc6749#section-1.5 - token_type; see: https://tools.ietf.org/html/rfc6750#section-6.1.1 - scope; see: http://openid.net/specs/openid-connect-core-1_0.html#AuthRequest - expires_in; is the expiration time of the access and ID tokens in seconds since the response was generated. :type is_new: bool :param is_new: has the user been authenticated? :param kwargs: may contain the following keys among others: - uid: user ID - user: Galaxy user; if user is already authenticated - backend: the backend that is used for user authentication. - storage: an instance of Storage class. - strategy: an instance of the Strategy class. - state: the state code received from identity provider. - details: details about the user's third-party identity as requested in `scope`. :rtype: void :return: Raises an exception if any of the required arguments is missing, and pass if all are given. """ hint_msg = "Visit the identity provider's permitted applications page " \ "(e.g., visit `https://myaccount.google.com/u/0/permissions` " \ "for Google), then revoke the access of this Galaxy instance, " \ "and then retry to login. If the problem persists, contact " \ "the Admin of this Galaxy instance." if response is None or not isinstance(response, dict): # This can happen only if PSA is not able to decode the `authnz code` # sent back from the identity provider. PSA internally handles such # scenarios; however, this case is implemented to prevent uncaught # server-side errors. raise MalformedContents(err_msg=f"`response` not found. {hint_msg}") if not response.get("id_token"): # This can happen if a non-OIDC compliant backend is used; # e.g., an OAuth2.0-based backend that only generates access token. raise MalformedContents(err_msg=f"Missing identity token. {hint_msg}") if is_new and not response.get("refresh_token"): # An identity provider (e.g., Google) sends a refresh token the first # time user consents Galaxy's access (i.e., the first time user logs in # to a galaxy instance using their credentials with the identity provider). # There could be variety of scenarios under which a refresh token might # be missing; e.g., a manipulated Galaxy's database, where a user's records # from galaxy_user and oidc_user_authnz_tokens tables deleted after the # user has provided consent. This can also happen under dev efforts. # The solution is to revoke the consent by visiting the identity provider's # website, and then retry the login process. raise MalformedContents(err_msg=f"Missing refresh token. {hint_msg}")
def _remap(container, line): id_match = re.search(UNENCODED_ID_PATTERN, line) object_id = None encoded_id = None if id_match: object_id = int(id_match.group(2)) encoded_id = trans.security.encode_id(object_id) line = line.replace(id_match.group(), f"{id_match.group(1)}={encoded_id}") if container == "history_link": _check_object(object_id, line) history = history_manager.get_accessible(object_id, trans.user) rval = self.handle_history_link(line, history) elif container == "history_dataset_display": _check_object(object_id, line) hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_display(line, hda) elif container == "history_dataset_link": _check_object(object_id, line) hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_display(line, hda) elif container == "history_dataset_index": _check_object(object_id, line) hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_display(line, hda) elif container == "history_dataset_embedded": _check_object(object_id, line) hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_embedded(line, hda) elif container == "history_dataset_as_image": _check_object(object_id, line) hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_as_image(line, hda) elif container == "history_dataset_peek": _check_object(object_id, line) hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_peek(line, hda) elif container == "history_dataset_info": _check_object(object_id, line) hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_info(line, hda) elif container == "history_dataset_type": _check_object(object_id, line) hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_type(line, hda) elif container == "history_dataset_name": _check_object(object_id, line) hda = hda_manager.get_accessible(object_id, trans.user) rval = self.handle_dataset_name(line, hda) elif container == "workflow_display": stored_workflow = workflow_manager.get_stored_accessible_workflow(trans, encoded_id) rval = self.handle_workflow_display(line, stored_workflow) elif container == "history_dataset_collection_display": hdca = collection_manager.get_dataset_collection_instance(trans, "history", encoded_id) rval = self.handle_dataset_collection_display(line, hdca) elif container == "tool_stdout": job = job_manager.get_accessible_job(trans, object_id) rval = self.handle_tool_stdout(line, job) elif container == "tool_stderr": job = job_manager.get_accessible_job(trans, object_id) rval = self.handle_tool_stderr(line, job) elif container == "job_parameters": job = job_manager.get_accessible_job(trans, object_id) rval = self.handle_job_parameters(line, job) elif container == "job_metrics": job = job_manager.get_accessible_job(trans, object_id) rval = self.handle_job_metrics(line, job) elif container == "generate_galaxy_version": version = trans.app.config.version_major rval = self.handle_generate_galaxy_version(line, version) elif container == "generate_time": rval = self.handle_generate_time(line, now()) elif container == "invocation_time": invocation = workflow_manager.get_invocation(trans, object_id) rval = self.handle_invocation_time(line, invocation) elif container == "visualization": rval = None else: raise MalformedContents(f"Unknown Galaxy Markdown directive encountered [{container}].") if rval is not None: return rval else: return (line, False)
def _check_object(object_id, line): if object_id is None: raise MalformedContents(f"Missing object identifier [{line}].")
def _validate(*args, **kwds): """Light wrapper around validate_galaxy_markdown to throw galaxy exceptions instead of ValueError.""" try: return validate_galaxy_markdown(*args, **kwds) except ValueError as e: raise MalformedContents(str(e))
def cleanup_after_job(self): """ Set history, datasets, and jobs' attributes and clean up archive directory. """ # # Helper methods. # def file_in_dir(file_path, a_dir): """ Returns true if file is in directory. """ abs_file_path = os.path.abspath(file_path) return os.path.split(abs_file_path)[0] == a_dir def get_tag_str(tag, value): """ Builds a tag string for a tag, value pair. """ if not value: return tag else: return tag + ":" + value # # Import history. # jiha = self.sa_session.query(model.JobImportHistoryArchive).filter_by( job_id=self.job_id).first() if jiha: try: archive_dir = jiha.archive_dir archive_dir = os.path.realpath(archive_dir) user = jiha.job.user # Bioblend previous to 17.01 exported histories with an extra subdir. if not os.path.exists( os.path.join(archive_dir, 'history_attrs.txt')): for d in os.listdir(archive_dir): if os.path.isdir(os.path.join(archive_dir, d)): archive_dir = os.path.join(archive_dir, d) break # # Create history. # history_attr_file_name = os.path.join(archive_dir, 'history_attrs.txt') history_attrs = load(open(history_attr_file_name)) # Create history. new_history = model.History(name='imported from archive: %s' % history_attrs['name'], user=user) new_history.importing = True new_history.hid_counter = history_attrs['hid_counter'] new_history.genome_build = history_attrs['genome_build'] self.sa_session.add(new_history) jiha.history = new_history self.sa_session.flush() # Add annotation, tags. if user: self.add_item_annotation(self.sa_session, user, new_history, history_attrs['annotation']) """ TODO: figure out to how add tags to item. for tag, value in history_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) ) """ # # Create datasets. # datasets_attrs_file_name = os.path.join( archive_dir, 'datasets_attrs.txt') datasets_attrs = load(open(datasets_attrs_file_name)) provenance_file_name = datasets_attrs_file_name + ".provenance" if os.path.exists(provenance_file_name): provenance_attrs = load(open(provenance_file_name)) datasets_attrs += provenance_attrs # Get counts of how often each dataset file is used; a file can # be linked to multiple dataset objects (HDAs). datasets_usage_counts = {} for dataset_attrs in datasets_attrs: temp_dataset_file_name = \ os.path.realpath(os.path.join(archive_dir, dataset_attrs['file_name'])) if (temp_dataset_file_name not in datasets_usage_counts): datasets_usage_counts[temp_dataset_file_name] = 0 datasets_usage_counts[temp_dataset_file_name] += 1 # Create datasets. for dataset_attrs in datasets_attrs: metadata = dataset_attrs['metadata'] # Create dataset and HDA. hda = model.HistoryDatasetAssociation( name=dataset_attrs['name'], extension=dataset_attrs['extension'], info=dataset_attrs['info'], blurb=dataset_attrs['blurb'], peek=dataset_attrs['peek'], designation=dataset_attrs['designation'], visible=dataset_attrs['visible'], dbkey=metadata['dbkey'], metadata=metadata, history=new_history, create_dataset=True, sa_session=self.sa_session) if 'uuid' in dataset_attrs: hda.dataset.uuid = dataset_attrs["uuid"] if dataset_attrs.get('exported', True) is False: hda.state = hda.states.DISCARDED hda.deleted = True hda.purged = True else: hda.state = hda.states.OK self.sa_session.add(hda) self.sa_session.flush() new_history.add_dataset(hda, genome_build=None) hda.hid = dataset_attrs[ 'hid'] # Overwrite default hid set when HDA added to history. # TODO: Is there a way to recover permissions? Is this needed? # permissions = trans.app.security_agent.history_get_default_permissions( new_history ) # trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions ) self.sa_session.flush() if dataset_attrs.get('exported', True) is True: # Do security check and move/copy dataset data. temp_dataset_file_name = \ os.path.realpath(os.path.abspath(os.path.join(archive_dir, dataset_attrs['file_name']))) if not file_in_dir( temp_dataset_file_name, os.path.join(archive_dir, "datasets")): raise MalformedContents( "Invalid dataset path: %s" % temp_dataset_file_name) if datasets_usage_counts[temp_dataset_file_name] == 1: self.app.object_store.update_from_file( hda.dataset, file_name=temp_dataset_file_name, create=True) # Import additional files if present. Histories exported previously might not have this attribute set. dataset_extra_files_path = dataset_attrs.get( 'extra_files_path', None) if dataset_extra_files_path: try: file_list = os.listdir( os.path.join(archive_dir, dataset_extra_files_path)) except OSError: file_list = [] if file_list: for extra_file in file_list: self.app.object_store.update_from_file( hda.dataset, extra_dir='dataset_%s_files' % hda.dataset.id, alt_name=extra_file, file_name=os.path.join( archive_dir, dataset_extra_files_path, extra_file), create=True) else: datasets_usage_counts[temp_dataset_file_name] -= 1 shutil.copyfile(temp_dataset_file_name, hda.file_name) hda.dataset.set_total_size( ) # update the filesize record in the database # Set tags, annotations. if user: self.add_item_annotation(self.sa_session, user, hda, dataset_attrs['annotation']) # TODO: Set tags. """ for tag, value in dataset_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) ) self.sa_session.flush() """ # Although metadata is set above, need to set metadata to recover BAI for BAMs. if hda.extension == 'bam': self.app.datatypes_registry.set_external_metadata_tool.tool_action.execute_via_app( self.app.datatypes_registry. set_external_metadata_tool, self.app, jiha.job.session_id, new_history.id, jiha.job.user, incoming={'input1': hda}, overwrite=False) # # Create jobs. # # Decode jobs attributes. def as_hda(obj_dct): """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by the encoded object. This only works because HDAs are created above. """ if obj_dct.get('__HistoryDatasetAssociation__', False): return self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=obj_dct['hid']).first() return obj_dct jobs_attr_file_name = os.path.join(archive_dir, 'jobs_attrs.txt') jobs_attrs = load(open(jobs_attr_file_name), object_hook=as_hda) # Create each job. for job_attrs in jobs_attrs: imported_job = model.Job() imported_job.user = user # TODO: set session? # imported_job.session = trans.get_galaxy_session().id imported_job.history = new_history imported_job.imported = True imported_job.tool_id = job_attrs['tool_id'] imported_job.tool_version = job_attrs['tool_version'] imported_job.set_state(job_attrs['state']) imported_job.info = job_attrs.get('info', None) imported_job.exit_code = job_attrs.get('exit_code', None) imported_job.traceback = job_attrs.get('traceback', None) imported_job.stdout = job_attrs.get('stdout', None) imported_job.stderr = job_attrs.get('stderr', None) imported_job.command_line = job_attrs.get( 'command_line', None) try: imported_job.create_time = datetime.datetime.strptime( job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f") imported_job.update_time = datetime.datetime.strptime( job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f") except Exception: pass self.sa_session.add(imported_job) self.sa_session.flush() class HistoryDatasetAssociationIDEncoder(json.JSONEncoder): """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """ def default(self, obj): """ Encode an HDA, default encoding for everything else. """ if isinstance(obj, model.HistoryDatasetAssociation): return obj.id return json.JSONEncoder.default(self, obj) # Set parameters. May be useful to look at metadata.py for creating parameters. # TODO: there may be a better way to set parameters, e.g.: # for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): # job.add_parameter( name, value ) # to make this work, we'd need to flesh out the HDA objects. The code below is # relatively similar. for name, value in job_attrs['params'].items(): # Transform parameter values when necessary. if isinstance(value, model.HistoryDatasetAssociation): # HDA input: use hid to find input. input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=value.hid).first() value = input_hda.id # print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id ) imported_job.add_parameter( name, dumps(value, cls=HistoryDatasetAssociationIDEncoder)) # TODO: Connect jobs to input datasets. # Connect jobs to output datasets. for output_hid in job_attrs['output_datasets']: # print "%s job has output dataset %i" % (imported_job.id, output_hid) output_hda = self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=output_hid).first() if output_hda: imported_job.add_output_dataset( output_hda.name, output_hda) # Connect jobs to input datasets. if 'input_mapping' in job_attrs: for input_name, input_hid in job_attrs[ 'input_mapping'].items(): input_hda = self.sa_session.query(model.HistoryDatasetAssociation) \ .filter_by(history=new_history, hid=input_hid).first() if input_hda: imported_job.add_input_dataset( input_name, input_hda) self.sa_session.flush() # Done importing. new_history.importing = False self.sa_session.flush() # Cleanup. if os.path.exists(archive_dir): shutil.rmtree(archive_dir) except Exception as e: jiha.job.stderr += "Error cleaning up history import job: %s" % e self.sa_session.flush() raise