def decorator( self, trans, *args, **kwargs ): def error( environ, start_response ): start_response( error_status, [('Content-type', 'text/plain')] ) return error_message error_status = '403 Forbidden' if trans.error_message: return trans.error_message if user_required and trans.anonymous: error_message = "API Authentication Required for this request" return error if trans.request.body: try: kwargs['payload'] = __extract_payload_from_request(trans, func, kwargs) except ValueError: error_status = '400 Bad Request' error_message = 'Your request did not appear to be valid JSON, please consult the API documentation' return error trans.response.set_content_type( "application/json" ) # send 'do not cache' headers to handle IE's caching of ajax get responses trans.response.headers[ 'Cache-Control' ] = "max-age=0,no-cache,no-store" # Perform api_run_as processing, possibly changing identity if 'payload' in kwargs and 'run_as' in kwargs['payload']: if not trans.user_can_do_run_as(): error_message = 'User does not have permissions to run jobs as another user' return error try: decoded_user_id = trans.security.decode_id( kwargs['payload']['run_as'] ) except TypeError: trans.response.status = 400 return "Malformed user id ( %s ) specified, unable to decode." % str( kwargs['payload']['run_as'] ) try: user = trans.sa_session.query( trans.app.model.User ).get( decoded_user_id ) trans.api_inherit_admin = trans.user_is_admin() trans.set_user(user) except: trans.response.status = 400 return "That user does not exist." try: rval = func( self, trans, *args, **kwargs) if to_json and trans.debug: rval = dumps( rval, indent=4, sort_keys=True ) elif to_json: rval = dumps( rval ) return rval except paste.httpexceptions.HTTPException: raise # handled except: log.exception( 'Uncaught exception in exposed API method:' ) raise paste.httpexceptions.HTTPServerError()
def add_sequencer( sequencer_index, sequencer_form_definition_id, sequencer_info ): '''Adds a new sequencer to the sequencer table along with its form values.''' # Create a new form values record with the supplied sequencer information values = dumps( { 'field_0': sequencer_info.get( 'host', '' ), 'field_1': sequencer_info.get( 'username', '' ), 'field_2': sequencer_info.get( 'password', '' ), 'field_3': sequencer_info.get( 'data_dir', '' ), 'field_4': sequencer_info.get( 'rename_dataset', '' ) } ) cmd = "INSERT INTO form_values VALUES ( %s, %s, %s, %s, '%s' )" % ( nextval( 'form_values' ), localtimestamp(), localtimestamp(), sequencer_form_definition_id, values ) migrate_engine.execute(cmd) sequencer_form_values_id = get_latest_id( 'form_values' ) # Create a new sequencer record with reference to the form value created above. name = 'Sequencer_%i' % sequencer_index desc = '' version = '' result_datasets = dict() sequencer_type_id = 'simple_unknown_sequencer' cmd = "INSERT INTO sequencer VALUES ( %s, %s, %s, '%s', '%s', '%s', '%s', %s, %s, %s )" cmd = cmd % ( nextval('sequencer'), localtimestamp(), localtimestamp(), name, desc, sequencer_type_id, version, sequencer_form_definition_id, sequencer_form_values_id, boolean( 'false' ) ) migrate_engine.execute(cmd) return get_latest_id( 'sequencer' )
def create_job( trans, params, tool, json_file_path, data_list, folder=None, history=None ): """ Create the upload job. """ job = trans.app.model.Job() galaxy_session = trans.get_galaxy_session() if type( galaxy_session ) == trans.model.GalaxySession: job.session_id = galaxy_session.id if trans.user is not None: job.user_id = trans.user.id if folder: job.library_folder_id = folder.id else: if not history: history = trans.history job.history_id = history.id job.tool_id = tool.id job.tool_version = tool.version job.state = job.states.UPLOAD trans.sa_session.add( job ) trans.sa_session.flush() log.info( 'tool %s created job id %d' % ( tool.id, job.id ) ) trans.log_event( 'created job id %d' % job.id, tool_id=tool.id ) for name, value in tool.params_to_strings( params, trans.app ).iteritems(): job.add_parameter( name, value ) job.add_parameter( 'paramfile', dumps( json_file_path ) ) object_store_id = None for i, dataset in enumerate( data_list ): if folder: job.add_output_library_dataset( 'output%i' % i, dataset ) else: job.add_output_dataset( 'output%i' % i, dataset ) # Create an empty file immediately if not dataset.dataset.external_filename: dataset.dataset.object_store_id = object_store_id try: trans.app.object_store.create( dataset.dataset ) except ObjectInvalid: raise Exception('Unable to create output dataset: object store is full') object_store_id = dataset.dataset.object_store_id trans.sa_session.add( dataset ) # open( dataset.file_name, "w" ).close() job.object_store_id = object_store_id job.state = job.states.NEW job.set_handler(tool.get_job_handler(None)) trans.sa_session.add( job ) trans.sa_session.flush() # Queue the job for execution trans.app.job_queue.put( job.id, job.tool_id ) trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id ) output = odict() for i, v in enumerate( data_list ): output[ 'output%i' % i ] = v return job, output
def get_account_info(self, trans, key_id, secret): """ Get EC2 Account Info """ account_info = {} cml = cloudman.launch.CloudManLauncher(key_id, secret) ec2_conn = cml.connect_ec2(key_id, secret) kps = ec2_conn.get_all_key_pairs() account_info['clusters'] = cml.get_clusters_pd() account_info['keypairs'] = [akp.name for akp in kps] return dumps(account_info)
def job_param_filter(view, left, operator, right): view.do_query = True alias = aliased( JobParameter ) param_name = re.sub(r'^param.', '', left) view.query = view.query.filter( and_( Job.id == alias.job_id, alias.name == param_name, alias.value == dumps(right) ) )
def handle( self ): request = self.request.recv( 8192 ) response = {} valid, request, response = json.validate_jsonrpc_request( request, ( 'get_state', ), () ) if valid: self.request.send( json.dumps( json.jsonrpc_response( request=request, result=self.server.state_result.result ) ) ) else: error_msg = 'Unable to serve request: %s' % response['error']['message'] if 'data' in response['error']: error_msg += ': %s' % response['error']['data'] log.error( error_msg ) log.debug( 'Original request was: %s' % request )
def file_err( msg, dataset, json_file ): json_file.write( dumps( dict( type='dataset', ext='data', dataset_id=dataset.dataset_id, stderr=msg ) ) + "\n" ) # never remove a server-side upload if dataset.type in ( 'server_dir', 'path_paste' ): return try: os.remove( dataset.path ) except: pass
def create_archive( history_attrs_file, datasets_attrs_file, jobs_attrs_file, out_file, gzip=False ): """ Create archive from the given attribute/metadata files and save it to out_file. """ tarfile_mode = "w" if gzip: tarfile_mode += ":gz" try: history_archive = tarfile.open( out_file, tarfile_mode ) # Read datasets attributes from file. datasets_attr_in = open( datasets_attrs_file, 'rb' ) datasets_attr_str = '' buffsize = 1048576 try: while True: datasets_attr_str += datasets_attr_in.read( buffsize ) if not datasets_attr_str or len( datasets_attr_str ) % buffsize != 0: break except OverflowError: pass datasets_attr_in.close() datasets_attrs = loads( datasets_attr_str ) # Add datasets to archive and update dataset attributes. # TODO: security check to ensure that files added are in Galaxy dataset directory? for dataset_attrs in datasets_attrs: if dataset_attrs['exported']: dataset_file_name = dataset_attrs[ 'file_name' ] # Full file name. dataset_archive_name = os.path.join( 'datasets', get_dataset_filename( dataset_attrs[ 'name' ], dataset_attrs[ 'extension' ] ) ) history_archive.add( dataset_file_name, arcname=dataset_archive_name ) # Update dataset filename to be archive name. dataset_attrs[ 'file_name' ] = dataset_archive_name # Rewrite dataset attributes file. datasets_attrs_out = open( datasets_attrs_file, 'w' ) datasets_attrs_out.write( dumps( datasets_attrs ) ) datasets_attrs_out.close() # Finish archive. history_archive.add( history_attrs_file, arcname="history_attrs.txt" ) history_archive.add( datasets_attrs_file, arcname="datasets_attrs.txt" ) if os.path.exists( datasets_attrs_file + ".provenance" ): history_archive.add( datasets_attrs_file + ".provenance", arcname="datasets_attrs.txt.provenance" ) history_archive.add( jobs_attrs_file, arcname="jobs_attrs.txt" ) history_archive.close() # Status. return 'Created history archive.' except Exception, e: return 'Error creating history archive: %s' % str( e ), sys.stderr
def params_to_strings(params, param_values, app): """ Convert a dictionary of parameter values to a dictionary of strings suitable for persisting. The `value_to_basic` method of each parameter is called to convert its value to basic types, the result of which is then json encoded (this allowing complex nested parameters and such). """ rval = dict() for key, value in param_values.iteritems(): if key in params: value = params[key].value_to_basic(value, app) rval[key] = str(dumps(value)) return rval
def get_chunk(self, trans, dataset, chunk): ck_index = int(chunk) f = open(dataset.file_name) f.seek(ck_index * trans.app.config.display_chunk_size) # If we aren't at the start of the file, seek to next newline. Do this better eventually. if f.tell() != 0: cursor = f.read(1) while cursor and cursor != '\n': cursor = f.read(1) ck_data = f.read(trans.app.config.display_chunk_size) cursor = f.read(1) while cursor and ck_data[-1] != '\n': ck_data += cursor cursor = f.read(1) return dumps( { 'ck_data': util.unicodify( ck_data ), 'ck_index': ck_index + 1 } )
def get_account_info(self, trans, key_id, secret): """ Get EC2 Account Info """ try: account_info = {} cml = cloudman.launch.CloudManLauncher(key_id, secret) ec2_conn = cml.connect_ec2(key_id, secret) kps = ec2_conn.get_all_key_pairs() account_info['clusters'] = cml.get_clusters_pd() account_info['keypairs'] = [akp.name for akp in kps] return dumps(account_info) except EC2ResponseError as e: trans.response.status = 400 return e.message
def launch_instance(self, trans, cluster_name, password, key_id, secret, instance_type, share_string, keypair, ami=None, zone=None, bucket_default=None, **kwargs): ami = ami or trans.app.config.cloudlaunch_default_ami cfg = cloudman.CloudManConfig(key_id, secret, cluster_name, ami, instance_type, password, placement=zone) cml = cloudman.launch.CloudManLauncher(key_id, secret) # This should probably be handled better on the bioblend side, but until # an egg update can be made, this needs to conditionally include the # parameter or not, even if the value is None. if bucket_default: result = cml.launch(cluster_name, ami, instance_type, password, cfg.kernel_id, cfg.ramdisk_id, cfg.key_name, cfg.security_groups, cfg.placement, bucket_default=bucket_default) else: result = cml.launch(cluster_name, ami, instance_type, password, cfg.kernel_id, cfg.ramdisk_id, cfg.key_name, cfg.security_groups, cfg.placement) # result is a dict with sg_names, kp_name, kp_material, rs, and instance_id if not result['rs']: trans.response.status = 400 return "Instance failure, but no specific error was detected. Please check your AWS Console." instance = result['rs'].instances[0] while not instance.public_dns_name: try: instance.update() except EC2ResponseError: # This can happen when update is invoked before the instance is fully registered. pass time.sleep(1) if result['kp_material']: # We have created a keypair. Save to tempfile for one time retrieval. (fd, fname) = tempfile.mkstemp(prefix=PKEY_PREFIX, dir=trans.app.config.new_file_path) f = os.fdopen(fd, 'wt') f.write(result['kp_material']) f.close() kp_material_tag = fname[fname.rfind(PKEY_PREFIX) + len(PKEY_PREFIX):] else: kp_material_tag = None return dumps({'cluster_name': cluster_name, 'instance_id': result['rs'].instances[0].id, 'image_id': result['rs'].instances[0].image_id, 'public_dns_name': result['rs'].instances[0].public_dns_name, 'kp_name': result['kp_name'], 'kp_material_tag': kp_material_tag})
def __api_error_response( trans, **kwds ): error_dict = __api_error_message( trans, **kwds ) exception = kwds.get( "exception", None ) # If we are given an status code directly - use it - otherwise check # the exception for a status_code attribute. if "status_code" in kwds: status_code = int( kwds.get( "status_code" ) ) elif hasattr( exception, "status_code" ): status_code = int( exception.status_code ) else: status_code = 500 response = trans.response if not response.status or str(response.status).startswith("20"): # Unset status code appears to be string '200 OK', if anything # non-success (i.e. not 200 or 201) has been set, do not override # underlying controller. response.status = status_code return dumps( error_dict )
def handle_incoming(cls, incoming): npd = {} for key, val in incoming.iteritems(): if key.startswith("pja"): sp = key.split("__") ao_key = sp[2] + sp[1] # flag / output_name / pjatype / desc if ao_key not in npd: npd[ao_key] = {"action_type": sp[2], "output_name": sp[1], "action_arguments": {}} if len(sp) > 3: if sp[3] == "output_name": npd[ao_key]["output_name"] = val else: npd[ao_key]["action_arguments"][sp[3]] = val else: # Not pja stuff. pass return dumps(npd)
def handle_incoming(cls, incoming): npd = {} for key, val in incoming.iteritems(): if key.startswith('pja'): sp = key.split('__') ao_key = sp[2] + sp[1] # flag / output_name / pjatype / desc if ao_key not in npd: npd[ao_key] = {'action_type': sp[2], 'output_name': sp[1], 'action_arguments': {}} if len(sp) > 3: if sp[3] == 'output_name': npd[ao_key]['output_name'] = val else: npd[ao_key]['action_arguments'][sp[3]] = val else: # Not pja stuff. pass return dumps(npd)
def get_html( self, prefix="", disabled=False ): primary_field = self.primary_field html = '<div class="switch-option">' html += primary_field.get_html( prefix=prefix, disabled=disabled ) html += '<input name="__switch_default__" type="hidden" value="%s" />' % self.default_field options = [] for name, delegate_field in self.delegate_fields.items(): field = escape( dumps( delegate_field.to_dict() ) ) option = " '%s': %s" % ( name, field ) options.append( option ) html += '<script>$(document).ready( function() {\nvar switchOptions = {\n' html += ','.join( options ) html += '};\n' html += 'if ( window.enhanced_galaxy_tools ) {\n' html += 'require( [ "galaxy.tools" ], function( mod_tools ) { new mod_tools.SwitchSelectView({\n' html += 'el: $(\'[name="%s%s"]\').closest( "div.switch-option" ),' % ( prefix, primary_field.name ) html += 'default_option: "%s",\n' % self.default_field html += 'prefix: "%s",\n' % prefix html += 'switch_options: switchOptions }); } )\n' html += "}" html += '});\n</script></div>' return html
def get_state(self, transfer_jobs, via_socket=False): transfer_jobs = listify(transfer_jobs) rval = [] for tj in transfer_jobs: if via_socket and tj.state not in tj.terminal_states and tj.socket: try: request = json.jsonrpc_request(method="get_state", id=True) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(5) sock.connect(("localhost", tj.socket)) sock.send(json.dumps(request)) response = sock.recv(8192) valid, response = json.validate_jsonrpc_response(response, id=request["id"]) if not valid: # No valid response received, make some pseudo-json-rpc raise Exception( dict(code=128, message="Did not receive valid response from transfer daemon for state") ) if "error" in response: # Response was valid but Request resulted in an error raise Exception(response["error"]) else: # Request was valid response["result"]["transfer_job_id"] = tj.id rval.append(response["result"]) except Exception, e: # State checking via the transfer daemon failed, just # return the state from the database instead. Callers can # look for the 'error' member of the response to see why # the check failed. self.sa_session.refresh(tj) error = e.args if type(error) != dict: error = dict(code=256, message="Error connecting to transfer daemon", data=str(e)) rval.append(dict(transfer_job_id=tj.id, state=tj.state, error=error)) else: self.sa_session.refresh(tj) rval.append(dict(transfer_job_id=tj.id, state=tj.state))
def get_chunk(self, trans, dataset, chunk): ck_index = int(chunk) f = open(dataset.file_name) f.seek(ck_index * trans.app.config.display_chunk_size) # If we aren't at the start of the file, seek to next newline. Do this better eventually. if f.tell() != 0: cursor = f.read(1) while cursor and cursor != '\n': cursor = f.read(1) ck_data = f.read(trans.app.config.display_chunk_size) cursor = f.read(1) while cursor and ck_data[-1] != '\n': ck_data += cursor cursor = f.read(1) # The ConnectivityTable format has several derivatives of which one is delimited by (multiple) spaces. # By converting these spaces back to tabs, chucks can still be interpreted by tab delimited file parsers ck_data_header, ck_data_body = ck_data.split('\n', 1) ck_data_header = re.sub('^([0-9]+)[ ]+',r'\1\t',ck_data_header) ck_data_body = re.sub('\n[ \t]+','\n',ck_data_body) ck_data_body = re.sub('[ ]+','\t',ck_data_body) return dumps( { 'ck_data': util.unicodify(ck_data_header + "\n" + ck_data_body ), 'ck_index': ck_index + 1 } )
def load( self, trans, **kwd ): """ Load dataset from the given source into the library. :param encoded_folder_id: the encoded id of the folder to import dataset to :type encoded_folder_id: an encoded id string :param source: source of the dataset to be loaded :type source: str :param link_data: flag whether to link the dataset to data or copy it to Galaxy :type link_data: bool :param preserve_dirs: flag whether to preserver directory structure when importing dir :type preserve_dirs: bool """ kwd[ 'space_to_tab' ] = 'False' kwd[ 'to_posix_lines' ] = 'True' kwd[ 'dbkey' ] = kwd.get( 'dbkey', '?' ) kwd[ 'file_type' ] = kwd.get( 'file_type', 'auto' ) kwd[' link_data_only' ] = 'link_to_files' if util.string_as_bool( kwd.get( 'link_data', False ) ) else 'copy_files' encoded_folder_id = kwd.get( 'encoded_folder_id', None ) if encoded_folder_id is not None: folder_id = self.folder_manager.cut_and_decode( trans, encoded_folder_id ) else: raise exceptions.RequestParameterMissingException( 'The required atribute encoded_folder_id is missing.' ) path = kwd.get( 'path', None) if path is None: raise exceptions.RequestParameterMissingException( 'The required atribute path is missing.' ) folder = self.folder_manager.get( trans, folder_id ) source = kwd.get( 'source', None ) if source not in [ 'userdir_file', 'userdir_folder', 'admin_path' ]: raise exceptions.RequestParameterMissingException( 'You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder" and "admin_path". ') if source in [ 'userdir_file', 'userdir_folder' ]: user_login = trans.user.email user_base_dir = trans.app.config.user_library_import_dir if user_base_dir is None: raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow upload from user directories.' ) full_dir = os.path.join( user_base_dir, user_login ) # path_to_root_import_folder = None if not path.lower().startswith( full_dir.lower() ): # path_to_root_import_folder = path path = os.path.join( full_dir, path ) if not os.path.exists( path ): raise exceptions.RequestParameterInvalidException( 'Given path does not exist on the host.' ) if not self.folder_manager.can_add_item( trans, folder ): raise exceptions.InsufficientPermissionsException( 'You do not have proper permission to add items to the given folder.' ) if source == 'admin_path': if not trans.app.config.allow_library_path_paste: raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from path.' ) if not trans.user_is_admin: raise exceptions.AdminRequiredException( 'Only admins can import from path.' ) # Set up the traditional tool state/params tool_id = 'upload1' tool = trans.app.toolbox.get_tool( tool_id ) state = tool.new_state( trans ) tool.update_state( trans, tool.inputs_by_page[ 0 ], state.inputs, kwd ) tool_params = state.inputs dataset_upload_inputs = [] for input_name, input in tool.inputs.iteritems(): if input.type == "upload_dataset": dataset_upload_inputs.append( input ) library_bunch = upload_common.handle_library_params( trans, {}, trans.security.encode_id( folder.id ) ) abspath_datasets = [] kwd[ 'filesystem_paths' ] = path params = util.Params( kwd ) # user wants to import one file only if source == "userdir_file": file = os.path.abspath( path ) abspath_datasets.append( trans.webapp.controllers[ 'library_common' ].make_library_uploaded_dataset( trans, 'api', params, os.path.basename( file ), file, 'server_dir', library_bunch ) ) # user wants to import whole folder if source == "userdir_folder": uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets( trans, 'api', params, library_bunch, 200, '' ) uploaded_datasets = uploaded_datasets_bunch[0] if uploaded_datasets is None: raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' ) for ud in uploaded_datasets: ud.path = os.path.abspath( ud.path ) abspath_datasets.append( ud ) # user wants to import from path (admins only) if source == "admin_path": # validate the path is within root uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets( trans, 'api', params, library_bunch, 200, '' ) uploaded_datasets = uploaded_datasets_bunch[0] if uploaded_datasets is None: raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' ) for ud in uploaded_datasets: ud.path = os.path.abspath( ud.path ) abspath_datasets.append( ud ) json_file_path = upload_common.create_paramfile( trans, abspath_datasets ) data_list = [ ud.data for ud in abspath_datasets ] job, output = upload_common.create_job( trans, tool_params, tool, json_file_path, data_list, folder=folder ) # HACK: Prevent outputs_to_working_directory from overwriting inputs when "linking" job.add_parameter( 'link_data_only', dumps( kwd.get( 'link_data_only', 'copy_files' ) ) ) job.add_parameter( 'uuid', dumps( kwd.get( 'uuid', None ) ) ) trans.sa_session.add( job ) trans.sa_session.flush() job_dict = job.to_dict() job_dict[ 'id' ] = trans.security.encode_id( job_dict[ 'id' ] ) return job_dict
def load( self, trans, payload=None, **kwd ): """ * POST /api/libraries/datasets Load dataset from the given source into the library. Source can be: user directory - root folder specified in galaxy.ini as "$user_library_import_dir" example path: path/to/galaxy/$user_library_import_dir/[email protected]/{user can browse everything here} the folder with the user login has to be created beforehand (admin)import directory - root folder specified in galaxy ini as "$library_import_dir" example path: path/to/galaxy/$library_import_dir/{admin can browse everything here} (admin)any absolute or relative path - option allowed with "allow_library_path_paste" in galaxy.ini :param payload: dictionary structure containing: :param encoded_folder_id: the encoded id of the folder to import dataset(s) to :type encoded_folder_id: an encoded id string :param source: source the datasets should be loaded from :type source: str :param link_data: flag whether to link the dataset to data or copy it to Galaxy, defaults to copy while linking is set to True all symlinks will be resolved _once_ :type link_data: bool :param preserve_dirs: flag whether to preserve the directory structure when importing dir if False only datasets will be imported :type preserve_dirs: bool :param file_type: file type of the loaded datasets, defaults to 'auto' (autodetect) :type file_type: str :param dbkey: dbkey of the loaded genome, defaults to '?' (unknown) :type dbkey: str :type dictionary :returns: dict containing information about the created upload job :rtype: dictionary :raises: RequestParameterMissingException, AdminRequiredException, ConfigDoesNotAllowException, RequestParameterInvalidException InsufficientPermissionsException, ObjectNotFound """ if payload: kwd.update(payload) kwd['space_to_tab'] = False kwd['to_posix_lines'] = True kwd[ 'dbkey' ] = kwd.get( 'dbkey', '?' ) kwd[ 'file_type' ] = kwd.get( 'file_type', 'auto' ) kwd['link_data_only'] = 'link_to_files' if util.string_as_bool( kwd.get( 'link_data', False ) ) else 'copy_files' encoded_folder_id = kwd.get( 'encoded_folder_id', None ) if encoded_folder_id is not None: folder_id = self.folder_manager.cut_and_decode( trans, encoded_folder_id ) else: raise exceptions.RequestParameterMissingException( 'The required atribute encoded_folder_id is missing.' ) path = kwd.get( 'path', None) if path is None: raise exceptions.RequestParameterMissingException( 'The required atribute path is missing.' ) folder = self.folder_manager.get( trans, folder_id ) source = kwd.get( 'source', None ) if source not in [ 'userdir_file', 'userdir_folder', 'importdir_file', 'importdir_folder', 'admin_path' ]: raise exceptions.RequestParameterMissingException( 'You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder", "admin_path", "importdir_file" and "importdir_folder". ') if source in [ 'importdir_file', 'importdir_folder' ]: if not trans.user_is_admin: raise exceptions.AdminRequiredException( 'Only admins can import from importdir.' ) if not trans.app.config.library_import_dir: raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from importdir.' ) import_base_dir = trans.app.config.library_import_dir path = os.path.join( import_base_dir, path ) if source in [ 'userdir_file', 'userdir_folder' ]: user_login = trans.user.email user_base_dir = trans.app.config.user_library_import_dir if user_base_dir is None: raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow upload from user directories.' ) full_dir = os.path.join( user_base_dir, user_login ) if not path.lower().startswith( full_dir.lower() ): path = os.path.join( full_dir, path ) if not os.path.exists( path ): raise exceptions.RequestParameterInvalidException( 'Given path does not exist on the host.' ) if not self.folder_manager.can_add_item( trans, folder ): raise exceptions.InsufficientPermissionsException( 'You do not have proper permission to add items to the given folder.' ) if source == 'admin_path': if not trans.app.config.allow_library_path_paste: raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from path.' ) if not trans.user_is_admin: raise exceptions.AdminRequiredException( 'Only admins can import from path.' ) # Set up the traditional tool state/params tool_id = 'upload1' tool = trans.app.toolbox.get_tool( tool_id ) state = tool.new_state( trans ) tool.populate_state( trans, tool.inputs, state.inputs, kwd ) tool_params = state.inputs dataset_upload_inputs = [] for input in tool.inputs.itervalues(): if input.type == "upload_dataset": dataset_upload_inputs.append( input ) library_bunch = upload_common.handle_library_params( trans, {}, trans.security.encode_id( folder.id ) ) abspath_datasets = [] kwd[ 'filesystem_paths' ] = path if source in [ 'importdir_folder' ]: kwd[ 'filesystem_paths' ] = os.path.join( import_base_dir, path ) # user wants to import one file only if source in [ "userdir_file", "importdir_file" ]: file = os.path.abspath( path ) abspath_datasets.append( trans.webapp.controllers[ 'library_common' ].make_library_uploaded_dataset( trans, 'api', kwd, os.path.basename( file ), file, 'server_dir', library_bunch ) ) # user wants to import whole folder if source == "userdir_folder": uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets( trans, 'api', kwd, library_bunch, 200, '' ) uploaded_datasets = uploaded_datasets_bunch[ 0 ] if uploaded_datasets is None: raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' ) for ud in uploaded_datasets: ud.path = os.path.abspath( ud.path ) abspath_datasets.append( ud ) # user wants to import from path if source in [ "admin_path", "importdir_folder" ]: # validate the path is within root uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets( trans, 'api', kwd, library_bunch, 200, '' ) uploaded_datasets = uploaded_datasets_bunch[0] if uploaded_datasets is None: raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' ) for ud in uploaded_datasets: ud.path = os.path.abspath( ud.path ) abspath_datasets.append( ud ) json_file_path = upload_common.create_paramfile( trans, abspath_datasets ) data_list = [ ud.data for ud in abspath_datasets ] job, output = upload_common.create_job( trans, tool_params, tool, json_file_path, data_list, folder=folder ) # HACK: Prevent outputs_to_working_directory from overwriting inputs when "linking" job.add_parameter( 'link_data_only', dumps( kwd.get( 'link_data_only', 'copy_files' ) ) ) job.add_parameter( 'uuid', dumps( kwd.get( 'uuid', None ) ) ) trans.sa_session.add( job ) trans.sa_session.flush() job_dict = job.to_dict() job_dict[ 'id' ] = trans.security.encode_id( job_dict[ 'id' ] ) return job_dict
def load( self, trans, **kwd ): """ load( self, trans, **kwd ): * POST /api/libraries/datasets Load dataset from the given source into the library. Source can be: user directory - root folder specified in galaxy.ini as "$user_library_import_dir" example path: path/to/galaxy/$user_library_import_dir/[email protected]/{user can browse everything here} the folder with the user login has to be created beforehand (admin)import directory - root folder specified in galaxy ini as "$library_import_dir" example path: path/to/galaxy/$library_import_dir/{admin can browse everything here} (admin)any absolute or relative path - option allowed with "allow_library_path_paste" in galaxy.ini :param encoded_folder_id: the encoded id of the folder to import dataset(s) to :type encoded_folder_id: an encoded id string :param source: source the datasets should be loaded form :type source: str :param link_data: flag whether to link the dataset to data or copy it to Galaxy, defaults to copy while linking is set to True all symlinks will be resolved _once_ :type link_data: bool :param preserve_dirs: flag whether to preserve the directory structure when importing dir if False only datasets will be imported :type preserve_dirs: bool :param file_type: file type of the loaded datasets, defaults to 'auto' (autodetect) :type file_type: str :param dbkey: dbkey of the loaded genome, defaults to '?' (unknown) :type dbkey: str :returns: dict containing information about the created upload job :rtype: dictionary """ kwd[ 'space_to_tab' ] = 'False' kwd[ 'to_posix_lines' ] = 'True' kwd[ 'dbkey' ] = kwd.get( 'dbkey', '?' ) kwd[ 'file_type' ] = kwd.get( 'file_type', 'auto' ) kwd[' link_data_only' ] = 'link_to_files' if util.string_as_bool( kwd.get( 'link_data', False ) ) else 'copy_files' encoded_folder_id = kwd.get( 'encoded_folder_id', None ) if encoded_folder_id is not None: folder_id = self.folder_manager.cut_and_decode( trans, encoded_folder_id ) else: raise exceptions.RequestParameterMissingException( 'The required atribute encoded_folder_id is missing.' ) path = kwd.get( 'path', None) if path is None: raise exceptions.RequestParameterMissingException( 'The required atribute path is missing.' ) folder = self.folder_manager.get( trans, folder_id ) source = kwd.get( 'source', None ) if source not in [ 'userdir_file', 'userdir_folder', 'importdir_file', 'importdir_folder', 'admin_path' ]: raise exceptions.RequestParameterMissingException( 'You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder", "admin_path", "importdir_file" and "importdir_folder". ') if source in [ 'importdir_file', 'importdir_folder' ]: if not trans.user_is_admin: raise exceptions.AdminRequiredException( 'Only admins can import from importdir.' ) if not trans.app.config.library_import_dir: raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from importdir.' ) import_base_dir = trans.app.config.library_import_dir path = os.path.join( import_base_dir, path ) if source in [ 'userdir_file', 'userdir_folder' ]: user_login = trans.user.email user_base_dir = trans.app.config.user_library_import_dir if user_base_dir is None: raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow upload from user directories.' ) full_dir = os.path.join( user_base_dir, user_login ) if not path.lower().startswith( full_dir.lower() ): path = os.path.join( full_dir, path ) if not os.path.exists( path ): raise exceptions.RequestParameterInvalidException( 'Given path does not exist on the host.' ) if not self.folder_manager.can_add_item( trans, folder ): raise exceptions.InsufficientPermissionsException( 'You do not have proper permission to add items to the given folder.' ) if source == 'admin_path': if not trans.app.config.allow_library_path_paste: raise exceptions.ConfigDoesNotAllowException( 'The configuration of this Galaxy instance does not allow admins to import into library from path.' ) if not trans.user_is_admin: raise exceptions.AdminRequiredException( 'Only admins can import from path.' ) # Set up the traditional tool state/params tool_id = 'upload1' tool = trans.app.toolbox.get_tool( tool_id ) state = tool.new_state( trans ) tool.update_state( trans, tool.inputs_by_page[ 0 ], state.inputs, kwd ) tool_params = state.inputs dataset_upload_inputs = [] for input in tool.inputs.itervalues(): if input.type == "upload_dataset": dataset_upload_inputs.append( input ) library_bunch = upload_common.handle_library_params( trans, {}, trans.security.encode_id( folder.id ) ) abspath_datasets = [] kwd[ 'filesystem_paths' ] = path if source in [ 'importdir_folder' ]: kwd[ 'filesystem_paths' ] = os.path.join( import_base_dir, path ) params = util.Params( kwd ) # user wants to import one file only if source in [ "userdir_file", "importdir_file" ]: file = os.path.abspath( path ) abspath_datasets.append( trans.webapp.controllers[ 'library_common' ].make_library_uploaded_dataset( trans, 'api', params, os.path.basename( file ), file, 'server_dir', library_bunch ) ) # user wants to import whole folder if source == "userdir_folder": uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets( trans, 'api', params, library_bunch, 200, '' ) uploaded_datasets = uploaded_datasets_bunch[ 0 ] if uploaded_datasets is None: raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' ) for ud in uploaded_datasets: ud.path = os.path.abspath( ud.path ) abspath_datasets.append( ud ) # user wants to import from path if source in [ "admin_path", "importdir_folder" ]: # validate the path is within root uploaded_datasets_bunch = trans.webapp.controllers[ 'library_common' ].get_path_paste_uploaded_datasets( trans, 'api', params, library_bunch, 200, '' ) uploaded_datasets = uploaded_datasets_bunch[0] if uploaded_datasets is None: raise exceptions.ObjectNotFound( 'Given folder does not contain any datasets.' ) for ud in uploaded_datasets: ud.path = os.path.abspath( ud.path ) abspath_datasets.append( ud ) json_file_path = upload_common.create_paramfile( trans, abspath_datasets ) data_list = [ ud.data for ud in abspath_datasets ] job, output = upload_common.create_job( trans, tool_params, tool, json_file_path, data_list, folder=folder ) # HACK: Prevent outputs_to_working_directory from overwriting inputs when "linking" job.add_parameter( 'link_data_only', dumps( kwd.get( 'link_data_only', 'copy_files' ) ) ) job.add_parameter( 'uuid', dumps( kwd.get( 'uuid', None ) ) ) trans.sa_session.add( job ) trans.sa_session.flush() job_dict = job.to_dict() job_dict[ 'id' ] = trans.security.encode_id( job_dict[ 'id' ] ) return job_dict
def __call__( self, trans, **kwargs ): # Get basics. # FIXME: pretty sure this is only here to pass along, can likely be eliminated status = kwargs.get( 'status', None ) message = kwargs.get( 'message', None ) # Build a base filter and sort key that is the combination of the saved state and defaults. # Saved state takes preference over defaults. base_filter = {} if self.default_filter: # default_filter is a dictionary that provides a default set of filters based on the grid's columns. base_filter = self.default_filter.copy() base_sort_key = self.default_sort_key if self.preserve_state: pref_name = unicode( self.__class__.__name__ + self.cur_filter_pref_name ) if pref_name in trans.get_user().preferences: saved_filter = loads( trans.get_user().preferences[pref_name] ) base_filter.update( saved_filter ) pref_name = unicode( self.__class__.__name__ + self.cur_sort_key_pref_name ) if pref_name in trans.get_user().preferences: base_sort_key = loads( trans.get_user().preferences[pref_name] ) # Build initial query query = self.build_initial_query( trans, **kwargs ) query = self.apply_query_filter( trans, query, **kwargs ) # Maintain sort state in generated urls extra_url_args = {} # Determine whether use_default_filter flag is set. use_default_filter_str = kwargs.get( 'use_default_filter' ) use_default_filter = False if use_default_filter_str: use_default_filter = ( use_default_filter_str.lower() == 'true' ) # Process filtering arguments to (a) build a query that represents the filter and (b) build a # dictionary that denotes the current filter. cur_filter_dict = {} for column in self.columns: if column.key: # Get the filter criterion for the column. Precedence is (a) if using default filter, only look there; otherwise, # (b) look in kwargs; and (c) look in base filter. column_filter = None if use_default_filter: if self.default_filter: column_filter = self.default_filter.get( column.key ) elif "f-" + column.model_class.__name__ + ".%s" % column.key in kwargs: # Queries that include table joins cannot guarantee unique column names. This problem is # handled by setting the column_filter value to <TableName>.<ColumnName>. column_filter = kwargs.get( "f-" + column.model_class.__name__ + ".%s" % column.key ) elif "f-" + column.key in kwargs: column_filter = kwargs.get( "f-" + column.key ) elif column.key in base_filter: column_filter = base_filter.get( column.key ) # Method (1) combines a mix of strings and lists of strings into a single string and (2) attempts to de-jsonify all strings. def loads_recurse(item): decoded_list = [] if isinstance( item, basestring): try: # Not clear what we're decoding, so recurse to ensure that we catch everything. decoded_item = loads( item ) if isinstance( decoded_item, list): decoded_list = loads_recurse( decoded_item ) else: decoded_list = [ unicode( decoded_item ) ] except ValueError: decoded_list = [ unicode( item ) ] elif isinstance( item, list): for element in item: a_list = loads_recurse( element ) decoded_list = decoded_list + a_list return decoded_list # If column filter found, apply it. if column_filter is not None: # TextColumns may have a mix of json and strings. if isinstance( column, TextColumn ): column_filter = loads_recurse( column_filter ) if len( column_filter ) == 1: column_filter = column_filter[0] # Interpret ',' as a separator for multiple terms. if isinstance( column_filter, basestring ) and column_filter.find(',') != -1: column_filter = column_filter.split(',') # Check if filter is empty if isinstance( column_filter, list ): # Remove empty strings from filter list column_filter = [x for x in column_filter if x != ''] if len(column_filter) == 0: continue elif isinstance(column_filter, basestring): # If filter criterion is empty, do nothing. if column_filter == '': continue # Update query. query = column.filter( trans, trans.user, query, column_filter ) # Upate current filter dict. # Column filters are rendered in various places, sanitize them all here. cur_filter_dict[ column.key ] = sanitize_text(column_filter) # Carry filter along to newly generated urls; make sure filter is a string so # that we can encode to UTF-8 and thus handle user input to filters. if isinstance( column_filter, list ): # Filter is a list; process each item. for filter in column_filter: if not isinstance( filter, basestring ): filter = unicode( filter ).encode("utf-8") extra_url_args[ "f-" + column.key ] = dumps( column_filter ) else: # Process singleton filter. if not isinstance( column_filter, basestring ): column_filter = unicode(column_filter) extra_url_args[ "f-" + column.key ] = column_filter.encode("utf-8") # Process sort arguments. sort_key = None if 'sort' in kwargs: sort_key = kwargs['sort'] elif base_sort_key: sort_key = base_sort_key if sort_key: ascending = not( sort_key.startswith( "-" ) ) # Queries that include table joins cannot guarantee unique column names. This problem is # handled by setting the column_filter value to <TableName>.<ColumnName>. table_name = None if sort_key.find( '.' ) > -1: a_list = sort_key.split( '.' ) if ascending: table_name = a_list[0] else: table_name = a_list[0][1:] column_name = a_list[1] elif ascending: column_name = sort_key else: column_name = sort_key[1:] # Sort key is a column key. for column in self.columns: if column.key and column.key.find( '.' ) > -1: column_key = column.key.split( '.' )[1] else: column_key = column.key if ( table_name is None or table_name == column.model_class.__name__ ) and column_key == column_name: query = column.sort( trans, query, ascending, column_name=column_name ) break extra_url_args['sort'] = sort_key # There might be a current row current_item = self.get_current_item( trans, **kwargs ) # Process page number. if self.use_paging: if 'page' in kwargs: if kwargs['page'] == 'all': page_num = 0 else: page_num = int( kwargs['page'] ) else: page_num = 1 if page_num == 0: # Show all rows in page. total_num_rows = query.count() page_num = 1 num_pages = 1 else: # Show a limited number of rows. Before modifying query, get the total number of rows that query # returns so that the total number of pages can be computed. total_num_rows = query.count() query = query.limit( self.num_rows_per_page ).offset( ( page_num - 1 ) * self.num_rows_per_page ) num_pages = int( math.ceil( float( total_num_rows ) / self.num_rows_per_page ) ) else: # Defaults. page_num = 1 num_pages = 1 # There are some places in grid templates where it's useful for a grid # to have its current filter. self.cur_filter_dict = cur_filter_dict # Preserve grid state: save current filter and sort key. if self.preserve_state: pref_name = unicode( self.__class__.__name__ + self.cur_filter_pref_name ) trans.get_user().preferences[pref_name] = unicode( dumps( cur_filter_dict ) ) if sort_key: pref_name = unicode( self.__class__.__name__ + self.cur_sort_key_pref_name ) trans.get_user().preferences[pref_name] = unicode( dumps( sort_key ) ) trans.sa_session.flush() # Log grid view. context = unicode( self.__class__.__name__ ) params = cur_filter_dict.copy() params['sort'] = sort_key params['async'] = ( 'async' in kwargs ) # TODO:?? # commenting this out; when this fn calls session.add( action ) and session.flush the query from this fn # is effectively 'wiped' out. Nate believes it has something to do with our use of session( autocommit=True ) # in mapping.py. If you change that to False, the log_action doesn't affect the query # Below, I'm rendering the template first (that uses query), then calling log_action, then returning the page # trans.log_action( trans.get_user(), unicode( "grid.view" ), context, params ) # Render grid. def url( *args, **kwargs ): # Only include sort/filter arguments if not linking to another # page. This is a bit of a hack. if 'action' in kwargs: new_kwargs = dict() else: new_kwargs = dict( extra_url_args ) # Extend new_kwargs with first argument if found if len(args) > 0: new_kwargs.update( args[0] ) new_kwargs.update( kwargs ) # We need to encode item ids if 'id' in new_kwargs: id = new_kwargs[ 'id' ] if isinstance( id, list ): new_kwargs[ 'id' ] = [ trans.security.encode_id( i ) for i in id ] else: new_kwargs[ 'id' ] = trans.security.encode_id( id ) # The url_for invocation *must* include a controller and action. if 'controller' not in new_kwargs: new_kwargs['controller'] = trans.controller if 'action' not in new_kwargs: new_kwargs['action'] = trans.action return url_for( **new_kwargs) self.use_panels = ( kwargs.get( 'use_panels', False ) in [ True, 'True', 'true' ] ) self.advanced_search = ( kwargs.get( 'advanced_search', False ) in [ True, 'True', 'true' ] ) async_request = ( ( self.use_async ) and ( kwargs.get( 'async', False ) in [ True, 'True', 'true'] ) ) # Currently, filling the template returns a str object; this requires decoding the string into a # unicode object within mako templates. What probably should be done is to return the template as # utf-8 unicode; however, this would require encoding the object as utf-8 before returning the grid # results via a controller method, which is require substantial changes. Hence, for now, return grid # as str. page = trans.fill_template( iff( async_request, self.async_template, self.template ), grid=self, query=query, cur_page_num=page_num, num_pages=num_pages, num_page_links=self.num_page_links, default_filter_dict=self.default_filter, cur_filter_dict=cur_filter_dict, sort_key=sort_key, current_item=current_item, ids=kwargs.get( 'id', [] ), url=url, status=status, message=message, info_text=self.info_text, use_panels=self.use_panels, use_hide_message=self.use_hide_message, advanced_search=self.advanced_search, show_item_checkboxes=( self.show_item_checkboxes or kwargs.get( 'show_item_checkboxes', '' ) in [ 'True', 'true' ] ), # Pass back kwargs so that grid template can set and use args without # grid explicitly having to pass them. kwargs=kwargs ) trans.log_action( trans.get_user(), unicode( "grid.view" ), context, params ) return page
else: # This should not happen, but it's here just in case shutil.copy( dataset.path, output_path ) elif link_data_only == 'copy_files': shutil.move( dataset.path, output_path ) # Write the job info stdout = stdout or 'uploaded %s file' % data_type info = dict( type='dataset', dataset_id=dataset.dataset_id, ext=ext, stdout=stdout, name=dataset.name, line_count=line_count ) if dataset.get('uuid', None) is not None: info['uuid'] = dataset.get('uuid') json_file.write( dumps( info ) + "\n" ) if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming( output_path ): # Groom the dataset content if necessary datatype.groom_dataset_content( output_path ) def add_composite_file( dataset, json_file, output_path, files_path ): if dataset.composite_files: os.mkdir( files_path ) for name, value in dataset.composite_files.iteritems(): value = util.bunch.Bunch( **value ) if dataset.composite_file_paths[ value.name ] is None and not value.optional: file_err( 'A required composite data file was not provided (%s)' % name, dataset, json_file ) break elif dataset.composite_file_paths[value.name] is not None:
def execute_via_app( self, tool, app, session_id, history_id, user=None, incoming={}, set_output_hid=False, overwrite=True, history=None, job_params=None ): """ Execute using application. """ for name, value in incoming.iteritems(): if isinstance( value, app.model.HistoryDatasetAssociation ): dataset = value dataset_name = name type = 'hda' break elif isinstance( value, app.model.LibraryDatasetDatasetAssociation ): dataset = value dataset_name = name type = 'ldda' break else: raise Exception( 'The dataset to set metadata on could not be determined.' ) sa_session = app.model.context # Create the job object job = app.model.Job() job.session_id = session_id job.history_id = history_id job.tool_id = tool.id if user: job.user_id = user.id if job_params: job.params = dumps( job_params ) start_job_state = job.state # should be job.states.NEW try: # For backward compatibility, some tools may not have versions yet. job.tool_version = tool.version except: job.tool_version = "1.0.1" job.state = job.states.WAITING # we need to set job state to something other than NEW, or else when tracking jobs in db it will be picked up before we have added input / output parameters job.set_handler(tool.get_job_handler( job_params )) sa_session.add( job ) sa_session.flush() # ensure job.id is available # add parameters to job_parameter table # Store original dataset state, so we can restore it. A separate table might be better (no chance of 'losing' the original state)? incoming[ '__ORIGINAL_DATASET_STATE__' ] = dataset.state input_paths = [DatasetPath( dataset.id, real_path=dataset.file_name, mutable=False )] app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(job.id)) job_working_dir = app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(job.id)) external_metadata_wrapper = JobExternalOutputMetadataWrapper( job ) cmd_line = external_metadata_wrapper.setup_external_metadata( dataset, sa_session, exec_dir=None, tmp_dir=job_working_dir, dataset_files_path=app.model.Dataset.file_path, output_fnames=input_paths, config_root=app.config.root, config_file=app.config.config_file, datatypes_config=app.datatypes_registry.integrated_datatypes_configs, job_metadata=None, include_command=False, max_metadata_value_size=app.config.max_metadata_value_size, kwds={ 'overwrite' : overwrite } ) incoming[ '__SET_EXTERNAL_METADATA_COMMAND_LINE__' ] = cmd_line for name, value in tool.params_to_strings( incoming, app ).iteritems(): job.add_parameter( name, value ) # add the dataset to job_to_input_dataset table if type == 'hda': job.add_input_dataset( dataset_name, dataset ) elif type == 'ldda': job.add_input_library_dataset( dataset_name, dataset ) # Need a special state here to show that metadata is being set and also allow the job to run # i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state dataset._state = dataset.states.SETTING_METADATA job.state = start_job_state # job inputs have been configured, restore initial job state sa_session.flush() # Queue the job for execution app.job_queue.put( job.id, tool.id ) # FIXME: need to add event logging to app and log events there rather than trans. # trans.log_event( "Added set external metadata job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id ) # clear e.g. converted files dataset.datatype.before_setting_metadata( dataset ) return job, odict()
def setup_job(self, trans, jeha, include_hidden=False, include_deleted=False): """ Perform setup for job to export a history into an archive. Method generates attribute files for export, sets the corresponding attributes in the jeha object, and returns a command line for running the job. The command line includes the command, inputs, and options; it does not include the output file because it must be set at runtime. """ # # Helper methods/classes. # def get_item_tag_dict(item): """ Create dictionary of an item's tags. """ tags = {} for tag in item.tags: tag_user_tname = to_unicode(tag.user_tname) tag_user_value = to_unicode(tag.user_value) tags[tag_user_tname] = tag_user_value return tags def prepare_metadata(metadata): """ Prepare metatdata for exporting. """ for name, value in metadata.items(): # Metadata files are not needed for export because they can be # regenerated. if isinstance(value, trans.app.model.MetadataFile): del metadata[name] return metadata class HistoryDatasetAssociationEncoder(json.JSONEncoder): """ Custom JSONEncoder for a HistoryDatasetAssociation. """ def default(self, obj): """ Encode an HDA, default encoding for everything else. """ if isinstance(obj, trans.app.model.HistoryDatasetAssociation): rval = { "__HistoryDatasetAssociation__": True, "create_time": obj.create_time.__str__(), "update_time": obj.update_time.__str__(), "hid": obj.hid, "name": to_unicode(obj.name), "info": to_unicode(obj.info), "blurb": obj.blurb, "peek": obj.peek, "extension": obj.extension, "metadata": prepare_metadata(dict(obj.metadata.items())), "parent_id": obj.parent_id, "designation": obj.designation, "deleted": obj.deleted, "visible": obj.visible, "file_name": obj.file_name, "uuid": (lambda uuid: str(uuid) if uuid else None)(obj.dataset.uuid), "annotation": to_unicode(getattr(obj, 'annotation', '')), "tags": get_item_tag_dict(obj), "extra_files_path": obj.extra_files_path } if not obj.visible and not include_hidden: rval['exported'] = False elif obj.deleted and not include_deleted: rval['exported'] = False else: rval['exported'] = True return rval if isinstance(obj, UnvalidatedValue): return obj.__str__() return json.JSONEncoder.default(self, obj) # # Create attributes/metadata files for export. # temp_output_dir = tempfile.mkdtemp() # Write history attributes to file. history = jeha.history history_attrs = { "create_time": history.create_time.__str__(), "update_time": history.update_time.__str__(), "name": to_unicode(history.name), "hid_counter": history.hid_counter, "genome_build": history.genome_build, "annotation": to_unicode( self.get_item_annotation_str(trans.sa_session, history.user, history)), "tags": get_item_tag_dict(history), "includes_hidden_datasets": include_hidden, "includes_deleted_datasets": include_deleted } history_attrs_filename = tempfile.NamedTemporaryFile( dir=temp_output_dir).name history_attrs_out = open(history_attrs_filename, 'w') history_attrs_out.write(dumps(history_attrs)) history_attrs_out.close() jeha.history_attrs_filename = history_attrs_filename # Write datasets' attributes to file. datasets = self.get_history_datasets(trans, history) included_datasets = [] datasets_attrs = [] provenance_attrs = [] for dataset in datasets: dataset.annotation = self.get_item_annotation_str( trans.sa_session, history.user, dataset) if (not dataset.visible and not include_hidden) or (dataset.deleted and not include_deleted): provenance_attrs.append(dataset) else: datasets_attrs.append(dataset) included_datasets.append(dataset) datasets_attrs_filename = tempfile.NamedTemporaryFile( dir=temp_output_dir).name datasets_attrs_out = open(datasets_attrs_filename, 'w') datasets_attrs_out.write( dumps(datasets_attrs, cls=HistoryDatasetAssociationEncoder)) datasets_attrs_out.close() jeha.datasets_attrs_filename = datasets_attrs_filename provenance_attrs_out = open(datasets_attrs_filename + ".provenance", 'w') provenance_attrs_out.write( dumps(provenance_attrs, cls=HistoryDatasetAssociationEncoder)) provenance_attrs_out.close() # # Write jobs attributes file. # # Get all jobs associated with included HDAs. jobs_dict = {} for hda in included_datasets: # Get the associated job, if any. If this hda was copied from another, # we need to find the job that created the origial hda job_hda = hda while job_hda.copied_from_history_dataset_association: # should this check library datasets as well? job_hda = job_hda.copied_from_history_dataset_association if not job_hda.creating_job_associations: # No viable HDA found. continue # Get the job object. job = None for assoc in job_hda.creating_job_associations: job = assoc.job break if not job: # No viable job. continue jobs_dict[job.id] = job # Get jobs' attributes. jobs_attrs = [] for id, job in jobs_dict.items(): job_attrs = {} job_attrs['tool_id'] = job.tool_id job_attrs['tool_version'] = job.tool_version job_attrs['state'] = job.state job_attrs['info'] = job.info job_attrs['traceback'] = job.traceback job_attrs['command_line'] = job.command_line job_attrs['stderr'] = job.stderr job_attrs['stdout'] = job.stdout job_attrs['exit_code'] = job.exit_code job_attrs['create_time'] = job.create_time.isoformat() job_attrs['update_time'] = job.update_time.isoformat() # Get the job's parameters try: params_objects = job.get_param_values(trans.app) except: # Could not get job params. continue params_dict = {} for name, value in params_objects.items(): params_dict[name] = value job_attrs['params'] = params_dict # -- Get input, output datasets. -- input_datasets = [] input_mapping = {} for assoc in job.input_datasets: # Optional data inputs will not have a dataset. if assoc.dataset: input_datasets.append(assoc.dataset.hid) input_mapping[assoc.name] = assoc.dataset.hid job_attrs['input_datasets'] = input_datasets job_attrs['input_mapping'] = input_mapping output_datasets = [ assoc.dataset.hid for assoc in job.output_datasets ] job_attrs['output_datasets'] = output_datasets jobs_attrs.append(job_attrs) jobs_attrs_filename = tempfile.NamedTemporaryFile( dir=temp_output_dir).name jobs_attrs_out = open(jobs_attrs_filename, 'w') jobs_attrs_out.write( dumps(jobs_attrs, cls=HistoryDatasetAssociationEncoder)) jobs_attrs_out.close() jeha.jobs_attrs_filename = jobs_attrs_filename # # Create and return command line for running tool. # options = "" if jeha.compressed: options = "-G" return "%s %s %s %s" % (options, history_attrs_filename, datasets_attrs_filename, jobs_attrs_filename)
def _rerun_tool( self, trans, payload, **kwargs ): """ Rerun a tool to produce a new output dataset that corresponds to a dataset that a user is currently viewing. """ # # TODO: refactor to use same code as run_tool. # # Run tool on region if region is specificied. run_on_regions = False regions = payload.get( 'regions', None ) if regions: if isinstance( regions, dict ): # Regions is a single region. regions = [ GenomeRegion.from_dict( regions ) ] elif isinstance( regions, list ): # There is a list of regions. regions = [ GenomeRegion.from_dict( r ) for r in regions ] if len( regions ) > 1: # Sort by chrom name, start so that data is not fetched out of order. regions = sorted(regions, key=lambda r: (r.chrom.lower(), r.start)) # Merge overlapping regions so that regions do not overlap # and hence data is not included multiple times. prev = regions[0] cur = regions[1] index = 1 while True: if cur.chrom == prev.chrom and cur.start <= prev.end: # Found overlapping regions, so join them into prev. prev.end = cur.end del regions[ index ] else: # No overlap, move to next region. prev = cur index += 1 # Get next region or exit. if index == len( regions ): # Done. break else: cur = regions[ index ] run_on_regions = True # Dataset check. decoded_dataset_id = self.decode_id( payload.get( 'target_dataset_id' ) ) original_dataset = self.hda_manager.get_accessible( trans, decoded_dataset_id, user=trans.user ) original_dataset = self.hda_manager.error_if_uploading( trans, original_dataset ) msg = self.hda_manager.data_conversion_status( trans, original_dataset ) if msg: return msg # Set tool parameters--except non-hidden dataset parameters--using combination of # job's previous parameters and incoming parameters. Incoming parameters # have priority. # original_job = self.hda_manager.creating_job( original_dataset ) tool = trans.app.toolbox.get_tool( original_job.tool_id ) if not tool or not tool.allow_user_access( trans.user ): return trans.app.model.Dataset.conversion_messages.NO_TOOL tool_params = dict( [ ( p.name, p.value ) for p in original_job.parameters ] ) # TODO: rather than set new inputs using dict of json'ed value, unpack parameters and set using set_param_value below. # TODO: need to handle updates to conditional parameters; conditional # params are stored in dicts (and dicts within dicts). new_inputs = payload[ 'inputs' ] tool_params.update( dict( [ ( key, dumps( value ) ) for key, value in new_inputs.items() if key in tool.inputs and new_inputs[ key ] is not None ] ) ) tool_params = tool.params_from_strings( tool_params, self.app ) # # If running tool on region, convert input datasets (create indices) so # that can regions of data can be quickly extracted. # data_provider_registry = trans.app.data_provider_registry messages_list = [] if run_on_regions: for jida in original_job.input_datasets: input_dataset = jida.dataset data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data' ) if data_provider and ( not data_provider.converted_dataset or data_provider.converted_dataset.state != trans.app.model.Dataset.states.OK ): # Can convert but no converted dataset yet, so return message about why. data_sources = input_dataset.datatype.data_sources msg = input_dataset.convert_dataset( trans, data_sources[ 'data' ] ) if msg is not None: messages_list.append( msg ) # Return any messages generated during conversions. return_message = self._get_highest_priority_msg( messages_list ) if return_message: return return_message # # Set target history (the history that tool will use for inputs/outputs). # If user owns dataset, put new data in original dataset's history; if # user does not own dataset (and hence is accessing dataset via sharing), # put new data in user's current history. # if original_dataset.history.user == trans.user: target_history = original_dataset.history else: target_history = trans.get_history( create=True ) hda_permissions = trans.app.security_agent.history_get_default_permissions( target_history ) def set_param_value( param_dict, param_name, param_value ): """ Set new parameter value in a tool's parameter dictionary. """ # Recursive function to set param value. def set_value( param_dict, group_name, group_index, param_name, param_value ): if group_name in param_dict: param_dict[ group_name ][ group_index ][ param_name ] = param_value return True elif param_name in param_dict: param_dict[ param_name ] = param_value return True else: # Recursive search. return_val = False for value in param_dict.values(): if isinstance( value, dict ): return_val = set_value( value, group_name, group_index, param_name, param_value) if return_val: return return_val return False # Parse parameter name if necessary. if param_name.find( "|" ) == -1: # Non-grouping parameter. group_name = group_index = None else: # Grouping parameter. group, param_name = param_name.split( "|" ) index = group.rfind( "_" ) group_name = group[ :index ] group_index = int( group[ index + 1: ] ) return set_value( param_dict, group_name, group_index, param_name, param_value ) # Set parameters based tool's trackster config. params_set = {} for action in tool.trackster_conf.actions: success = False for joda in original_job.output_datasets: if joda.name == action.output_name: set_param_value( tool_params, action.name, joda.dataset ) params_set[ action.name ] = True success = True break if not success: return trans.app.model.Dataset.conversion_messages.ERROR # # Set input datasets for tool. If running on regions, extract and use subset # when possible. # if run_on_regions: regions_str = ",".join( [ str( r ) for r in regions ] ) for jida in original_job.input_datasets: # If param set previously by config actions, do nothing. if jida.name in params_set: continue input_dataset = jida.dataset if input_dataset is None: # optional dataset and dataset wasn't selected tool_params[ jida.name ] = None elif run_on_regions and 'data' in input_dataset.datatype.data_sources: # Dataset is indexed and hence a subset can be extracted and used # as input. # Look for subset. subset_dataset_association = trans.sa_session.query( trans.app.model.HistoryDatasetAssociationSubset ) \ .filter_by( hda=input_dataset, location=regions_str ) \ .first() if subset_dataset_association: # Data subset exists. subset_dataset = subset_dataset_association.subset else: # Need to create subset. data_source = input_dataset.datatype.data_sources[ 'data' ] converted_dataset = input_dataset.get_converted_dataset( trans, data_source ) deps = input_dataset.get_converted_dataset_deps( trans, data_source ) # Create new HDA for input dataset's subset. new_dataset = trans.app.model.HistoryDatasetAssociation( extension=input_dataset.ext, \ dbkey=input_dataset.dbkey, \ create_dataset=True, \ sa_session=trans.sa_session, name="Subset [%s] of data %i" % \ ( regions_str, input_dataset.hid ), visible=False ) target_history.add_dataset( new_dataset ) trans.sa_session.add( new_dataset ) trans.app.security_agent.set_all_dataset_permissions( new_dataset.dataset, hda_permissions ) # Write subset of data to new dataset data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data' ) trans.app.object_store.create( new_dataset.dataset ) data_provider.write_data_to_file( regions, new_dataset.file_name ) # TODO: (a) size not working; (b) need to set peek. new_dataset.set_size() new_dataset.info = "Data subset for trackster" new_dataset.set_dataset_state( trans.app.model.Dataset.states.OK ) # Set metadata. # TODO: set meta internally if dataset is small enough? trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute( trans.app.datatypes_registry.set_external_metadata_tool, trans, incoming={ 'input1': new_dataset }, overwrite=False, job_params={ "source" : "trackster" } ) # Add HDA subset association. subset_association = trans.app.model.HistoryDatasetAssociationSubset( hda=input_dataset, subset=new_dataset, location=regions_str ) trans.sa_session.add( subset_association ) subset_dataset = new_dataset trans.sa_session.flush() # Add dataset to tool's parameters. if not set_param_value( tool_params, jida.name, subset_dataset ): return { "error" : True, "message" : "error setting parameter %s" % jida.name } # # Execute tool and handle outputs. # try: subset_job, subset_job_outputs = tool.execute( trans, incoming=tool_params, history=target_history, job_params={ "source" : "trackster" } ) except Exception, e: # Lots of things can go wrong when trying to execute tool. return { "error" : True, "message" : e.__class__.__name__ + ": " + str(e) }
def __call__(self, trans, **kwargs): # Get basics. # FIXME: pretty sure this is only here to pass along, can likely be eliminated status = kwargs.get('status', None) message = kwargs.get('message', None) # Build a base filter and sort key that is the combination of the saved state and defaults. # Saved state takes preference over defaults. base_filter = {} if self.default_filter: # default_filter is a dictionary that provides a default set of filters based on the grid's columns. base_filter = self.default_filter.copy() base_sort_key = self.default_sort_key if self.preserve_state: pref_name = unicode(self.__class__.__name__ + self.cur_filter_pref_name) if pref_name in trans.get_user().preferences: saved_filter = loads(trans.get_user().preferences[pref_name]) base_filter.update(saved_filter) pref_name = unicode(self.__class__.__name__ + self.cur_sort_key_pref_name) if pref_name in trans.get_user().preferences: base_sort_key = loads(trans.get_user().preferences[pref_name]) # Build initial query query = self.build_initial_query(trans, **kwargs) query = self.apply_query_filter(trans, query, **kwargs) # Maintain sort state in generated urls extra_url_args = {} # Determine whether use_default_filter flag is set. use_default_filter_str = kwargs.get('use_default_filter') use_default_filter = False if use_default_filter_str: use_default_filter = (use_default_filter_str.lower() == 'true') # Process filtering arguments to (a) build a query that represents the filter and (b) build a # dictionary that denotes the current filter. cur_filter_dict = {} for column in self.columns: if column.key: # Get the filter criterion for the column. Precedence is (a) if using default filter, only look there; otherwise, # (b) look in kwargs; and (c) look in base filter. column_filter = None if use_default_filter: if self.default_filter: column_filter = self.default_filter.get(column.key) elif "f-" + column.model_class.__name__ + ".%s" % column.key in kwargs: # Queries that include table joins cannot guarantee unique column names. This problem is # handled by setting the column_filter value to <TableName>.<ColumnName>. column_filter = kwargs.get("f-" + column.model_class.__name__ + ".%s" % column.key) elif "f-" + column.key in kwargs: column_filter = kwargs.get("f-" + column.key) elif column.key in base_filter: column_filter = base_filter.get(column.key) # Method (1) combines a mix of strings and lists of strings into a single string and (2) attempts to de-jsonify all strings. def loads_recurse(item): decoded_list = [] if isinstance(item, basestring): try: # Not clear what we're decoding, so recurse to ensure that we catch everything. decoded_item = loads(item) if isinstance(decoded_item, list): decoded_list = loads_recurse(decoded_item) else: decoded_list = [unicode(decoded_item)] except ValueError: decoded_list = [unicode(item)] elif isinstance(item, list): for element in item: a_list = loads_recurse(element) decoded_list = decoded_list + a_list return decoded_list # If column filter found, apply it. if column_filter is not None: # TextColumns may have a mix of json and strings. if isinstance(column, TextColumn): column_filter = loads_recurse(column_filter) if len(column_filter) == 1: column_filter = column_filter[0] # Interpret ',' as a separator for multiple terms. if isinstance( column_filter, basestring) and column_filter.find(',') != -1: column_filter = column_filter.split(',') # Check if filter is empty if isinstance(column_filter, list): # Remove empty strings from filter list column_filter = [x for x in column_filter if x != ''] if len(column_filter) == 0: continue elif isinstance(column_filter, basestring): # If filter criterion is empty, do nothing. if column_filter == '': continue # Update query. query = column.filter(trans, trans.user, query, column_filter) # Upate current filter dict. # Column filters are rendered in various places, sanitize them all here. cur_filter_dict[column.key] = sanitize_text(column_filter) # Carry filter along to newly generated urls; make sure filter is a string so # that we can encode to UTF-8 and thus handle user input to filters. if isinstance(column_filter, list): # Filter is a list; process each item. for filter in column_filter: if not isinstance(filter, basestring): filter = unicode(filter).encode("utf-8") extra_url_args["f-" + column.key] = dumps(column_filter) else: # Process singleton filter. if not isinstance(column_filter, basestring): column_filter = unicode(column_filter) extra_url_args[ "f-" + column.key] = column_filter.encode("utf-8") # Process sort arguments. sort_key = None if 'sort' in kwargs: sort_key = kwargs['sort'] elif base_sort_key: sort_key = base_sort_key if sort_key: ascending = not (sort_key.startswith("-")) # Queries that include table joins cannot guarantee unique column names. This problem is # handled by setting the column_filter value to <TableName>.<ColumnName>. table_name = None if sort_key.find('.') > -1: a_list = sort_key.split('.') if ascending: table_name = a_list[0] else: table_name = a_list[0][1:] column_name = a_list[1] elif ascending: column_name = sort_key else: column_name = sort_key[1:] # Sort key is a column key. for column in self.columns: if column.key and column.key.find('.') > -1: column_key = column.key.split('.')[1] else: column_key = column.key if (table_name is None or table_name == column.model_class.__name__ ) and column_key == column_name: query = column.sort(trans, query, ascending, column_name=column_name) break extra_url_args['sort'] = sort_key # There might be a current row current_item = self.get_current_item(trans, **kwargs) # Process page number. if self.use_paging: if 'page' in kwargs: if kwargs['page'] == 'all': page_num = 0 else: page_num = int(kwargs['page']) else: page_num = 1 if page_num == 0: # Show all rows in page. total_num_rows = query.count() page_num = 1 num_pages = 1 else: # Show a limited number of rows. Before modifying query, get the total number of rows that query # returns so that the total number of pages can be computed. total_num_rows = query.count() query = query.limit(self.num_rows_per_page).offset( (page_num - 1) * self.num_rows_per_page) num_pages = int( math.ceil(float(total_num_rows) / self.num_rows_per_page)) else: # Defaults. page_num = 1 num_pages = 1 # There are some places in grid templates where it's useful for a grid # to have its current filter. self.cur_filter_dict = cur_filter_dict # Preserve grid state: save current filter and sort key. if self.preserve_state: pref_name = unicode(self.__class__.__name__ + self.cur_filter_pref_name) trans.get_user().preferences[pref_name] = unicode( dumps(cur_filter_dict)) if sort_key: pref_name = unicode(self.__class__.__name__ + self.cur_sort_key_pref_name) trans.get_user().preferences[pref_name] = unicode( dumps(sort_key)) trans.sa_session.flush() # Log grid view. context = unicode(self.__class__.__name__) params = cur_filter_dict.copy() params['sort'] = sort_key params['async'] = ('async' in kwargs) #TODO:?? # commenting this out; when this fn calls session.add( action ) and session.flush the query from this fn # is effectively 'wiped' out. Nate believes it has something to do with our use of session( autocommit=True ) # in mapping.py. If you change that to False, the log_action doesn't affect the query # Below, I'm rendering the template first (that uses query), then calling log_action, then returning the page #trans.log_action( trans.get_user(), unicode( "grid.view" ), context, params ) # Render grid. def url(*args, **kwargs): # Only include sort/filter arguments if not linking to another # page. This is a bit of a hack. if 'action' in kwargs: new_kwargs = dict() else: new_kwargs = dict(extra_url_args) # Extend new_kwargs with first argument if found if len(args) > 0: new_kwargs.update(args[0]) new_kwargs.update(kwargs) # We need to encode item ids if 'id' in new_kwargs: id = new_kwargs['id'] if isinstance(id, list): new_kwargs['id'] = [ trans.security.encode_id(i) for i in id ] else: new_kwargs['id'] = trans.security.encode_id(id) #The url_for invocation *must* include a controller and action. if 'controller' not in new_kwargs: new_kwargs['controller'] = trans.controller if 'action' not in new_kwargs: new_kwargs['action'] = trans.action return url_for(**new_kwargs) self.use_panels = (kwargs.get('use_panels', False) in [True, 'True', 'true']) self.advanced_search = (kwargs.get('advanced_search', False) in [True, 'True', 'true']) async_request = ((self.use_async) and (kwargs.get('async', False) in [True, 'True', 'true'])) # Currently, filling the template returns a str object; this requires decoding the string into a # unicode object within mako templates. What probably should be done is to return the template as # utf-8 unicode; however, this would require encoding the object as utf-8 before returning the grid # results via a controller method, which is require substantial changes. Hence, for now, return grid # as str. page = trans.fill_template( iff(async_request, self.async_template, self.template), grid=self, query=query, cur_page_num=page_num, num_pages=num_pages, num_page_links=self.num_page_links, default_filter_dict=self.default_filter, cur_filter_dict=cur_filter_dict, sort_key=sort_key, current_item=current_item, ids=kwargs.get('id', []), url=url, status=status, message=message, info_text=self.info_text, use_panels=self.use_panels, use_hide_message=self.use_hide_message, advanced_search=self.advanced_search, show_item_checkboxes=(self.show_item_checkboxes or kwargs.get( 'show_item_checkboxes', '') in ['True', 'true']), # Pass back kwargs so that grid template can set and use args without # grid explicitly having to pass them. kwargs=kwargs) trans.log_action(trans.get_user(), unicode("grid.view"), context, params) return page
def pretty_print_json(json_data, is_json_string=False): if is_json_string: json_data = json.loads(json_data) return json.dumps(json_data, sort_keys=True, indent=4)
def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, set_output_history=True, history=None, job_params=None, rerun_remap_job_id=None, mapping_over_collection=False): """ Executes a tool, creating job and tool outputs, associating them, and submitting the job to the job queue. If history is not specified, use trans.history as destination for tool's output datasets. """ # Set history. if not history: history = tool.get_default_history_by_trans(trans, create=True) out_data = odict() out_collections = {} out_collection_instances = {} # Track input dataset collections - but replace with simply lists so collect # input datasets can process these normally. inp_dataset_collections = self.collect_input_dataset_collections( tool, incoming) # Collect any input datasets from the incoming parameters inp_data = self.collect_input_datasets(tool, incoming, trans) # Deal with input dataset names, 'dbkey' and types input_names = [] input_ext = 'data' input_dbkey = incoming.get("dbkey", "?") for name, data in inp_data.items(): if not data: data = NoneDataset( datatypes_registry=trans.app.datatypes_registry) continue # Convert LDDA to an HDA. if isinstance(data, LibraryDatasetDatasetAssociation): data = data.to_history_dataset_association(None) inp_data[name] = data else: # HDA if data.hid: input_names.append('data %s' % data.hid) input_ext = data.ext if data.dbkey not in [None, '?']: input_dbkey = data.dbkey # Collect chromInfo dataset and add as parameters to incoming (chrom_info, db_dataset) = trans.app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id != 'CONVERTER_fasta_to_len') if db_dataset: inp_data.update({"chromInfo": db_dataset}) incoming["chromInfo"] = chrom_info # Determine output dataset permission/roles list existing_datasets = [inp for inp in inp_data.values() if inp] if existing_datasets: output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets) else: # No valid inputs, we will use history defaults output_permissions = trans.app.security_agent.history_get_default_permissions( history) # Build name for output datasets based on tool name and input names on_text = on_text_for_names(input_names) # Add the dbkey to the incoming parameters incoming["dbkey"] = input_dbkey # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed wrapped_params = WrappedParameters(trans, tool, incoming) # Keep track of parent / child relationships, we'll create all the # datasets first, then create the associations parent_to_child_pairs = [] child_dataset_names = set() object_store_populator = ObjectStorePopulator(trans.app) def handle_output(name, output): if output.parent: parent_to_child_pairs.append((output.parent, name)) child_dataset_names.add(name) ## What is the following hack for? Need to document under what ## conditions can the following occur? ([email protected]) # HACK: the output data has already been created # this happens i.e. as a result of the async controller if name in incoming: dataid = incoming[name] data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation).get(dataid) assert data is not None out_data[name] = data else: ext = determine_output_format(output, wrapped_params.params, inp_data, input_ext) data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, sa_session=trans.sa_session) if output.hidden: data.visible = False # Commit the dataset immediately so it gets database assigned unique id trans.sa_session.add(data) trans.sa_session.flush() trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions) object_store_populator.set_object_store_id(data) # This may not be neccesary with the new parent/child associations data.designation = name # Copy metadata from one of the inputs if requested. # metadata source can be either a string referencing an input # or an actual object to copy. metadata_source = output.metadata_source if metadata_source: if isinstance(metadata_source, basestring): metadata_source = inp_data[metadata_source] if metadata_source is not None: data.init_meta(copy_from=metadata_source) else: data.init_meta() # Take dbkey from LAST input data.dbkey = str(input_dbkey) # Set state # FIXME: shouldn't this be NEW until the job runner changes it? data.state = data.states.QUEUED data.blurb = "queued" # Set output label data.name = self.get_output_name(output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params) # Store output out_data[name] = data if output.actions: #Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format output_action_params = dict(out_data) output_action_params.update(incoming) output.actions.apply_action(data, output_action_params) # Store all changes to database trans.sa_session.flush() return data for name, output in tool.outputs.items(): if not filter_output(output, incoming): if output.collection: collections_manager = trans.app.dataset_collections_service # As far as I can tell - this is always true - but just verify assert set_output_history, "Cannot create dataset collection for this kind of tool." elements = odict() input_collections = dict([ (k, v[0]) for k, v in inp_dataset_collections.iteritems() ]) known_outputs = output.known_outputs( input_collections, collections_manager.type_registry) # Just to echo TODO elsewhere - this should be restructured to allow # nested collections. for output_part_def in known_outputs: effective_output_name = output_part_def.effective_output_name element = handle_output(effective_output_name, output_part_def.output_def) # Following hack causes dataset to no be added to history... child_dataset_names.add(effective_output_name) if set_output_history: history.add_dataset(element, set_hid=set_output_hid) trans.sa_session.add(element) trans.sa_session.flush() elements[output_part_def.element_identifier] = element if output.dynamic_structure: assert not elements # known_outputs must have been empty elements = collections_manager.ELEMENTS_UNINITIALIZED if mapping_over_collection: dc = collections_manager.create_dataset_collection( trans, collection_type=output.structure.collection_type, elements=elements, ) out_collections[name] = dc else: hdca_name = self.get_output_name( output, None, tool, on_text, trans, incoming, history, wrapped_params.params, job_params) hdca = collections_manager.create( trans, history, name=hdca_name, collection_type=output.structure.collection_type, elements=elements, ) # name here is name of the output element - not name # of the hdca. out_collection_instances[name] = hdca else: handle_output(name, output) # Add all the top-level (non-child) datasets to the history unless otherwise specified for name in out_data.keys(): if name not in child_dataset_names and name not in incoming: # don't add children; or already existing datasets, i.e. async created data = out_data[name] if set_output_history: history.add_dataset(data, set_hid=set_output_hid) trans.sa_session.add(data) trans.sa_session.flush() # Add all the children to their parents for parent_name, child_name in parent_to_child_pairs: parent_dataset = out_data[parent_name] child_dataset = out_data[child_name] parent_dataset.children.append(child_dataset) # Store data after custom code runs trans.sa_session.flush() # Create the job object job = trans.app.model.Job() if hasattr(trans, "get_galaxy_session"): galaxy_session = trans.get_galaxy_session() # If we're submitting from the API, there won't be a session. if type(galaxy_session) == trans.model.GalaxySession: job.session_id = galaxy_session.id if trans.user is not None: job.user_id = trans.user.id job.history_id = history.id job.tool_id = tool.id try: # For backward compatibility, some tools may not have versions yet. job.tool_version = tool.version except: job.tool_version = "1.0.0" # FIXME: Don't need all of incoming here, just the defined parameters # from the tool. We need to deal with tools that pass all post # parameters to the command as a special case. for name, (dataset_collection, reduced) in inp_dataset_collections.iteritems(): # TODO: Does this work if nested in repeat/conditional? if reduced: incoming[ name] = "__collection_reduce__|%s" % dataset_collection.id # Should verify security? We check security of individual # datasets below? job.add_input_dataset_collection(name, dataset_collection) for name, value in tool.params_to_strings(incoming, trans.app).iteritems(): job.add_parameter(name, value) current_user_roles = trans.get_current_user_roles() for name, dataset in inp_data.iteritems(): if dataset: if not trans.app.security_agent.can_access_dataset( current_user_roles, dataset.dataset): raise "User does not have permission to use a dataset (%s) provided for input." % data.id job.add_input_dataset(name, dataset) else: job.add_input_dataset(name, None) for name, dataset in out_data.iteritems(): job.add_output_dataset(name, dataset) for name, dataset_collection in out_collections.iteritems(): job.add_implicit_output_dataset_collection(name, dataset_collection) for name, dataset_collection_instance in out_collection_instances.iteritems( ): job.add_output_dataset_collection(name, dataset_collection_instance) job.object_store_id = object_store_populator.object_store_id if job_params: job.params = dumps(job_params) job.set_handler(tool.get_job_handler(job_params)) trans.sa_session.add(job) # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs # This functionality requires tracking jobs in the database. if trans.app.config.track_jobs_in_database and rerun_remap_job_id is not None: try: old_job = trans.sa_session.query( trans.app.model.Job).get(rerun_remap_job_id) assert old_job is not None, '(%s/%s): Old job id is invalid' % ( rerun_remap_job_id, job.id) assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % ( old_job.id, job.id, old_job.tool_id, job.tool_id) if trans.user is not None: assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % ( old_job.id, job.id, old_job.user_id, trans.user.id) elif trans.user is None and type( galaxy_session) == trans.model.GalaxySession: assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % ( old_job.id, job.id, old_job.session_id, galaxy_session.id) else: raise Exception( '(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, job.id)) for jtod in old_job.output_datasets: for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]: if (trans.user is not None and job_to_remap.user_id == trans.user.id) or (trans.user is None and job_to_remap.session_id == galaxy_session.id): if job_to_remap.state == job_to_remap.states.PAUSED: job_to_remap.state = job_to_remap.states.NEW for hda in [ dep_jtod.dataset for dep_jtod in job_to_remap.output_datasets ]: if hda.state == hda.states.PAUSED: hda.state = hda.states.NEW hda.info = None for p in job_to_remap.parameters: if p.name == jtid.name and p.value == str( jtod.dataset.id): p.value = str(out_data[jtod.name].id) jtid.dataset = out_data[jtod.name] jtid.dataset.hid = jtod.dataset.hid log.info( 'Job %s input HDA %s remapped to new HDA %s' % (job_to_remap.id, jtod.dataset.id, jtid.dataset.id)) trans.sa_session.add(job_to_remap) trans.sa_session.add(jtid) jtod.dataset.visible = False trans.sa_session.add(jtod) except Exception, e: log.exception('Cannot remap rerun dependencies.')
def _rerun_tool(self, trans, payload, **kwargs): """ Rerun a tool to produce a new output dataset that corresponds to a dataset that a user is currently viewing. """ # # TODO: refactor to use same code as run_tool. # # Run tool on region if region is specificied. run_on_regions = False regions = payload.get('regions', None) if regions: if isinstance(regions, dict): # Regions is a single region. regions = [GenomeRegion.from_dict(regions)] elif isinstance(regions, list): # There is a list of regions. regions = [GenomeRegion.from_dict(r) for r in regions] if len(regions) > 1: # Sort by chrom name, start so that data is not fetched out of order. regions = sorted(regions, key=lambda r: (r.chrom.lower(), r.start)) # Merge overlapping regions so that regions do not overlap # and hence data is not included multiple times. prev = regions[0] cur = regions[1] index = 1 while True: if cur.chrom == prev.chrom and cur.start <= prev.end: # Found overlapping regions, so join them into prev. prev.end = cur.end del regions[index] else: # No overlap, move to next region. prev = cur index += 1 # Get next region or exit. if index == len(regions): # Done. break else: cur = regions[index] run_on_regions = True # Dataset check. decoded_dataset_id = self.decode_id(payload.get('target_dataset_id')) original_dataset = self.hda_manager.get_accessible(decoded_dataset_id, user=trans.user) original_dataset = self.hda_manager.error_if_uploading( original_dataset) msg = self.hda_manager.data_conversion_status(original_dataset) if msg: return msg # Set tool parameters--except non-hidden dataset parameters--using combination of # job's previous parameters and incoming parameters. Incoming parameters # have priority. # original_job = self.hda_manager.creating_job(trans, original_dataset) tool = trans.app.toolbox.get_tool(original_job.tool_id) if not tool or not tool.allow_user_access(trans.user): return trans.app.model.Dataset.conversion_messages.NO_TOOL tool_params = dict([(p.name, p.value) for p in original_job.parameters]) # TODO: rather than set new inputs using dict of json'ed value, unpack parameters and set using set_param_value below. # TODO: need to handle updates to conditional parameters; conditional # params are stored in dicts (and dicts within dicts). new_inputs = payload['inputs'] tool_params.update( dict([(key, dumps(value)) for key, value in new_inputs.items() if key in tool.inputs and new_inputs[key] is not None])) tool_params = tool.params_from_strings(tool_params, self.app) # # If running tool on region, convert input datasets (create indices) so # that can regions of data can be quickly extracted. # data_provider_registry = trans.app.data_provider_registry messages_list = [] if run_on_regions: for jida in original_job.input_datasets: input_dataset = jida.dataset data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data') if data_provider and (not data_provider.converted_dataset or data_provider.converted_dataset.state != trans.app.model.Dataset.states.OK): # Can convert but no converted dataset yet, so return message about why. data_sources = input_dataset.datatype.data_sources msg = input_dataset.convert_dataset( trans, data_sources['data']) if msg is not None: messages_list.append(msg) # Return any messages generated during conversions. return_message = self._get_highest_priority_msg(messages_list) if return_message: return return_message # # Set target history (the history that tool will use for inputs/outputs). # If user owns dataset, put new data in original dataset's history; if # user does not own dataset (and hence is accessing dataset via sharing), # put new data in user's current history. # if original_dataset.history.user == trans.user: target_history = original_dataset.history else: target_history = trans.get_history(create=True) hda_permissions = trans.app.security_agent.history_get_default_permissions( target_history) def set_param_value(param_dict, param_name, param_value): """ Set new parameter value in a tool's parameter dictionary. """ # Recursive function to set param value. def set_value(param_dict, group_name, group_index, param_name, param_value): if group_name in param_dict: param_dict[group_name][group_index][ param_name] = param_value return True elif param_name in param_dict: param_dict[param_name] = param_value return True else: # Recursive search. return_val = False for value in param_dict.values(): if isinstance(value, dict): return_val = set_value(value, group_name, group_index, param_name, param_value) if return_val: return return_val return False # Parse parameter name if necessary. if param_name.find("|") == -1: # Non-grouping parameter. group_name = group_index = None else: # Grouping parameter. group, param_name = param_name.split("|") index = group.rfind("_") group_name = group[:index] group_index = int(group[index + 1:]) return set_value(param_dict, group_name, group_index, param_name, param_value) # Set parameters based tool's trackster config. params_set = {} for action in tool.trackster_conf.actions: success = False for joda in original_job.output_datasets: if joda.name == action.output_name: set_param_value(tool_params, action.name, joda.dataset) params_set[action.name] = True success = True break if not success: return trans.app.model.Dataset.conversion_messages.ERROR # # Set input datasets for tool. If running on regions, extract and use subset # when possible. # if run_on_regions: regions_str = ",".join([str(r) for r in regions]) for jida in original_job.input_datasets: # If param set previously by config actions, do nothing. if jida.name in params_set: continue input_dataset = jida.dataset if input_dataset is None: # optional dataset and dataset wasn't selected tool_params[jida.name] = None elif run_on_regions and 'data' in input_dataset.datatype.data_sources: # Dataset is indexed and hence a subset can be extracted and used # as input. # Look for subset. subset_dataset_association = trans.sa_session.query( trans.app.model.HistoryDatasetAssociationSubset ) \ .filter_by( hda=input_dataset, location=regions_str ) \ .first() if subset_dataset_association: # Data subset exists. subset_dataset = subset_dataset_association.subset else: # Need to create subset. data_source = input_dataset.datatype.data_sources['data'] converted_dataset = input_dataset.get_converted_dataset( trans, data_source) deps = input_dataset.get_converted_dataset_deps( trans, data_source) # Create new HDA for input dataset's subset. new_dataset = trans.app.model.HistoryDatasetAssociation( extension=input_dataset.ext, \ dbkey=input_dataset.dbkey, \ create_dataset=True, \ sa_session=trans.sa_session, name="Subset [%s] of data %i" % \ ( regions_str, input_dataset.hid ), visible=False ) target_history.add_dataset(new_dataset) trans.sa_session.add(new_dataset) trans.app.security_agent.set_all_dataset_permissions( new_dataset.dataset, hda_permissions) # Write subset of data to new dataset data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data') trans.app.object_store.create(new_dataset.dataset) data_provider.write_data_to_file(regions, new_dataset.file_name) # TODO: (a) size not working; (b) need to set peek. new_dataset.set_size() new_dataset.info = "Data subset for trackster" new_dataset.set_dataset_state( trans.app.model.Dataset.states.OK) # Set metadata. # TODO: set meta internally if dataset is small enough? trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute( trans.app.datatypes_registry. set_external_metadata_tool, trans, incoming={'input1': new_dataset}, overwrite=False, job_params={"source": "trackster"}) # Add HDA subset association. subset_association = trans.app.model.HistoryDatasetAssociationSubset( hda=input_dataset, subset=new_dataset, location=regions_str) trans.sa_session.add(subset_association) subset_dataset = new_dataset trans.sa_session.flush() # Add dataset to tool's parameters. if not set_param_value(tool_params, jida.name, subset_dataset): return { "error": True, "message": "error setting parameter %s" % jida.name } # # Execute tool and handle outputs. # try: subset_job, subset_job_outputs = tool.execute( trans, incoming=tool_params, history=target_history, job_params={"source": "trackster"}) except Exception, e: # Lots of things can go wrong when trying to execute tool. return { "error": True, "message": e.__class__.__name__ + ": " + str(e) }
def create_job(trans, params, tool, json_file_path, data_list, folder=None, history=None): """ Create the upload job. """ job = trans.app.model.Job() galaxy_session = trans.get_galaxy_session() if type(galaxy_session) == trans.model.GalaxySession: job.session_id = galaxy_session.id if trans.user is not None: job.user_id = trans.user.id if folder: job.library_folder_id = folder.id else: if not history: history = trans.history job.history_id = history.id job.tool_id = tool.id job.tool_version = tool.version job.set_state(job.states.UPLOAD) trans.sa_session.add(job) trans.sa_session.flush() log.info('tool %s created job id %d' % (tool.id, job.id)) trans.log_event('created job id %d' % job.id, tool_id=tool.id) for name, value in tool.params_to_strings(params, trans.app).iteritems(): job.add_parameter(name, value) job.add_parameter('paramfile', dumps(json_file_path)) object_store_id = None for i, dataset in enumerate(data_list): if folder: job.add_output_library_dataset('output%i' % i, dataset) else: job.add_output_dataset('output%i' % i, dataset) # Create an empty file immediately if not dataset.dataset.external_filename: dataset.dataset.object_store_id = object_store_id try: trans.app.object_store.create(dataset.dataset) except ObjectInvalid: raise Exception( 'Unable to create output dataset: object store is full') object_store_id = dataset.dataset.object_store_id trans.sa_session.add(dataset) # open( dataset.file_name, "w" ).close() job.object_store_id = object_store_id job.set_state(job.states.NEW) job.set_handler(tool.get_job_handler(None)) trans.sa_session.add(job) trans.sa_session.flush() # Queue the job for execution trans.app.job_queue.put(job.id, job.tool_id) trans.log_event("Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id) output = odict() for i, v in enumerate(data_list): output['output%i' % i] = v return job, output
test_config.plugins.addPlugin( StructuredTestDataPlugin() ) test_config.configure( sys.argv ) result = run_tests( test_config ) success = result.wasSuccessful() return success if testing_migrated_tools or testing_installed_tools: shed_tools_dict = {} if testing_migrated_tools: has_test_data, shed_tools_dict = parse_tool_panel_config( migrated_tool_panel_config, shed_tools_dict ) elif testing_installed_tools: for shed_tool_config in installed_tool_panel_configs: has_test_data, shed_tools_dict = parse_tool_panel_config( shed_tool_config, shed_tools_dict ) # Persist the shed_tools_dict to the galaxy_tool_shed_test_file. shed_tools_file = open( galaxy_tool_shed_test_file, 'w' ) shed_tools_file.write( dumps( shed_tools_dict ) ) shed_tools_file.close() if not os.path.isabs( galaxy_tool_shed_test_file ): galaxy_tool_shed_test_file = os.path.join( os.getcwd(), galaxy_tool_shed_test_file ) os.environ[ 'GALAXY_TOOL_SHED_TEST_FILE' ] = galaxy_tool_shed_test_file if testing_installed_tools: # Eliminate the migrated_tool_panel_config from the app's tool_configs, append the list of installed_tool_panel_configs, # and reload the app's toolbox. relative_migrated_tool_panel_config = os.path.join( app.config.root, migrated_tool_panel_config ) if relative_migrated_tool_panel_config in tool_configs: tool_configs.remove( relative_migrated_tool_panel_config ) for installed_tool_panel_config in installed_tool_panel_configs: tool_configs.append( installed_tool_panel_config ) app.toolbox = tools.ToolBox( tool_configs, app.config.tool_path, app ) success = _run_functional_test( testing_shed_tools=True ) try:
result = run_tests(test_config) success = result.wasSuccessful() return success if testing_migrated_tools or testing_installed_tools: shed_tools_dict = {} if testing_migrated_tools: has_test_data, shed_tools_dict = parse_tool_panel_config( migrated_tool_panel_config, shed_tools_dict) elif testing_installed_tools: for shed_tool_config in installed_tool_panel_configs: has_test_data, shed_tools_dict = parse_tool_panel_config( shed_tool_config, shed_tools_dict) # Persist the shed_tools_dict to the galaxy_tool_shed_test_file. shed_tools_file = open(galaxy_tool_shed_test_file, 'w') shed_tools_file.write(dumps(shed_tools_dict)) shed_tools_file.close() if not os.path.isabs(galaxy_tool_shed_test_file): galaxy_tool_shed_test_file = os.path.join( os.getcwd(), galaxy_tool_shed_test_file) os.environ[ 'GALAXY_TOOL_SHED_TEST_FILE'] = galaxy_tool_shed_test_file if testing_installed_tools: # Eliminate the migrated_tool_panel_config from the app's tool_configs, append the list of installed_tool_panel_configs, # and reload the app's toolbox. relative_migrated_tool_panel_config = os.path.join( app.config.root, migrated_tool_panel_config) if relative_migrated_tool_panel_config in tool_configs: tool_configs.remove(relative_migrated_tool_panel_config) for installed_tool_panel_config in installed_tool_panel_configs: tool_configs.append(installed_tool_panel_config)
def get_state( self, secure=True ): return dumps( self.state )
dataset_id=data.dataset.id, dbkey=uploaded_dataset.dbkey, type=uploaded_dataset.type, is_binary=is_binary, link_data_only=link_data_only, uuid=uuid_str, to_posix_lines=getattr(uploaded_dataset, "to_posix_lines", True), space_to_tab=uploaded_dataset.space_to_tab, in_place=trans.app.config.external_chown_script is None, path=uploaded_dataset.path ) # TODO: This will have to change when we start bundling inputs. # Also, in_place above causes the file to be left behind since the # user cannot remove it unless the parent directory is writable. if link_data_only == 'copy_files' and trans.app.config.external_chown_script: _chown( uploaded_dataset.path ) json_file.write( dumps( json ) + '\n' ) json_file.close() if trans.app.config.external_chown_script: _chown( json_file_path ) return json_file_path def create_job( trans, params, tool, json_file_path, data_list, folder=None, history=None ): """ Create the upload job. """ job = trans.app.model.Job() galaxy_session = trans.get_galaxy_session() if type( galaxy_session ) == trans.model.GalaxySession: job.session_id = galaxy_session.id if trans.user is not None:
dbkey=uploaded_dataset.dbkey, type=uploaded_dataset.type, is_binary=is_binary, link_data_only=link_data_only, uuid=uuid_str, to_posix_lines=getattr(uploaded_dataset, "to_posix_lines", True), space_to_tab=uploaded_dataset.space_to_tab, in_place=trans.app.config.external_chown_script is None, path=uploaded_dataset.path) # TODO: This will have to change when we start bundling inputs. # Also, in_place above causes the file to be left behind since the # user cannot remove it unless the parent directory is writable. if link_data_only == 'copy_files' and trans.app.config.external_chown_script: _chown(uploaded_dataset.path) json_file.write(dumps(json) + '\n') json_file.close() if trans.app.config.external_chown_script: _chown(json_file_path) return json_file_path def create_job(trans, params, tool, json_file_path, data_list, folder=None, history=None): """ Create the upload job.
def cleanup_after_job(self): """ Set history, datasets, and jobs' attributes and clean up archive directory. """ # # Helper methods. # def file_in_dir(file_path, a_dir): """ Returns true if file is in directory. """ abs_file_path = os.path.abspath(file_path) return os.path.split(abs_file_path)[0] == a_dir def read_file_contents(file_path): """ Read contents of a file. """ fp = open(file_path, 'rb') buffsize = 1048576 file_contents = '' try: while True: file_contents += fp.read(buffsize) if not file_contents or len(file_contents) % buffsize != 0: break except OverflowError: pass fp.close() return file_contents def get_tag_str(tag, value): """ Builds a tag string for a tag, value pair. """ if not value: return tag else: return tag + ":" + value # # Import history. # jiha = self.sa_session.query(model.JobImportHistoryArchive).filter_by( job_id=self.job_id).first() if jiha: try: archive_dir = jiha.archive_dir user = jiha.job.user # # Create history. # history_attr_file_name = os.path.join(archive_dir, 'history_attrs.txt') history_attr_str = read_file_contents(history_attr_file_name) history_attrs = loads(history_attr_str) # Create history. new_history = model.History( name='imported from archive: %s' % history_attrs['name'].encode('utf-8'), user=user) new_history.importing = True new_history.hid_counter = history_attrs['hid_counter'] new_history.genome_build = history_attrs['genome_build'] self.sa_session.add(new_history) jiha.history = new_history self.sa_session.flush() # Add annotation, tags. if user: self.add_item_annotation(self.sa_session, user, new_history, history_attrs['annotation']) """ TODO: figure out to how add tags to item. for tag, value in history_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, new_history, get_tag_str( tag, value ) ) """ # # Create datasets. # datasets_attrs_file_name = os.path.join( archive_dir, 'datasets_attrs.txt') datasets_attr_str = read_file_contents( datasets_attrs_file_name) datasets_attrs = loads(datasets_attr_str) if os.path.exists(datasets_attrs_file_name + ".provenance"): provenance_attr_str = read_file_contents( datasets_attrs_file_name + ".provenance") provenance_attrs = loads(provenance_attr_str) datasets_attrs += provenance_attrs # Get counts of how often each dataset file is used; a file can # be linked to multiple dataset objects (HDAs). datasets_usage_counts = {} for dataset_attrs in datasets_attrs: temp_dataset_file_name = \ os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) if (temp_dataset_file_name not in datasets_usage_counts): datasets_usage_counts[temp_dataset_file_name] = 0 datasets_usage_counts[temp_dataset_file_name] += 1 # Create datasets. for dataset_attrs in datasets_attrs: metadata = dataset_attrs['metadata'] # Create dataset and HDA. hda = model.HistoryDatasetAssociation( name=dataset_attrs['name'].encode('utf-8'), extension=dataset_attrs['extension'], info=dataset_attrs['info'].encode('utf-8'), blurb=dataset_attrs['blurb'], peek=dataset_attrs['peek'], designation=dataset_attrs['designation'], visible=dataset_attrs['visible'], dbkey=metadata['dbkey'], metadata=metadata, history=new_history, create_dataset=True, sa_session=self.sa_session) if 'uuid' in dataset_attrs: hda.dataset.uuid = dataset_attrs["uuid"] if dataset_attrs.get('exported', True) is False: hda.state = hda.states.DISCARDED hda.deleted = True hda.purged = True else: hda.state = hda.states.OK self.sa_session.add(hda) self.sa_session.flush() new_history.add_dataset(hda, genome_build=None) hda.hid = dataset_attrs[ 'hid'] # Overwrite default hid set when HDA added to history. # TODO: Is there a way to recover permissions? Is this needed? # permissions = trans.app.security_agent.history_get_default_permissions( new_history ) # trans.app.security_agent.set_all_dataset_permissions( hda.dataset, permissions ) self.sa_session.flush() if dataset_attrs.get('exported', True) is True: # Do security check and move/copy dataset data. temp_dataset_file_name = \ os.path.realpath( os.path.abspath( os.path.join( archive_dir, dataset_attrs['file_name'] ) ) ) if not file_in_dir( temp_dataset_file_name, os.path.join(archive_dir, "datasets")): raise MalformedContents( "Invalid dataset path: %s" % temp_dataset_file_name) if datasets_usage_counts[temp_dataset_file_name] == 1: self.app.object_store.update_from_file( hda.dataset, file_name=temp_dataset_file_name, create=True) # Import additional files if present. Histories exported previously might not have this attribute set. dataset_extra_files_path = dataset_attrs.get( 'extra_files_path', None) if dataset_extra_files_path: try: file_list = os.listdir( os.path.join(archive_dir, dataset_extra_files_path)) except OSError: file_list = [] if file_list: for extra_file in file_list: self.app.object_store.update_from_file( hda.dataset, extra_dir='dataset_%s_files' % hda.dataset.id, alt_name=extra_file, file_name=os.path.join( archive_dir, dataset_extra_files_path, extra_file), create=True) else: datasets_usage_counts[temp_dataset_file_name] -= 1 shutil.copyfile(temp_dataset_file_name, hda.file_name) hda.dataset.set_total_size( ) # update the filesize record in the database # Set tags, annotations. if user: self.add_item_annotation(self.sa_session, user, hda, dataset_attrs['annotation']) # TODO: Set tags. """ for tag, value in dataset_attrs[ 'tags' ].items(): trans.app.tag_handler.apply_item_tags( trans, trans.user, hda, get_tag_str( tag, value ) ) self.sa_session.flush() """ # Although metadata is set above, need to set metadata to recover BAI for BAMs. if hda.extension == 'bam': self.app.datatypes_registry.set_external_metadata_tool.tool_action.execute_via_app( self.app.datatypes_registry. set_external_metadata_tool, self.app, jiha.job.session_id, new_history.id, jiha.job.user, incoming={'input1': hda}, overwrite=False) # # Create jobs. # # Read jobs attributes. jobs_attr_file_name = os.path.join(archive_dir, 'jobs_attrs.txt') jobs_attr_str = read_file_contents(jobs_attr_file_name) # Decode jobs attributes. def as_hda(obj_dct): """ Hook to 'decode' an HDA; method uses history and HID to get the HDA represented by the encoded object. This only works because HDAs are created above. """ if obj_dct.get('__HistoryDatasetAssociation__', False): return self.sa_session.query( model.HistoryDatasetAssociation).filter_by( history=new_history, hid=obj_dct['hid']).first() return obj_dct jobs_attrs = loads(jobs_attr_str, object_hook=as_hda) # Create each job. for job_attrs in jobs_attrs: imported_job = model.Job() imported_job.user = user # TODO: set session? # imported_job.session = trans.get_galaxy_session().id imported_job.history = new_history imported_job.imported = True imported_job.tool_id = job_attrs['tool_id'] imported_job.tool_version = job_attrs['tool_version'] imported_job.set_state(job_attrs['state']) imported_job.info = job_attrs.get('info', None) imported_job.exit_code = job_attrs.get('exit_code', None) imported_job.traceback = job_attrs.get('traceback', None) imported_job.stdout = job_attrs.get('stdout', None) imported_job.stderr = job_attrs.get('stderr', None) imported_job.command_line = job_attrs.get( 'command_line', None) try: imported_job.create_time = datetime.datetime.strptime( job_attrs["create_time"], "%Y-%m-%dT%H:%M:%S.%f") imported_job.update_time = datetime.datetime.strptime( job_attrs["update_time"], "%Y-%m-%dT%H:%M:%S.%f") except: pass self.sa_session.add(imported_job) self.sa_session.flush() class HistoryDatasetAssociationIDEncoder(json.JSONEncoder): """ Custom JSONEncoder for a HistoryDatasetAssociation that encodes an HDA as its ID. """ def default(self, obj): """ Encode an HDA, default encoding for everything else. """ if isinstance(obj, model.HistoryDatasetAssociation): return obj.id return json.JSONEncoder.default(self, obj) # Set parameters. May be useful to look at metadata.py for creating parameters. # TODO: there may be a better way to set parameters, e.g.: # for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): # job.add_parameter( name, value ) # to make this work, we'd need to flesh out the HDA objects. The code below is # relatively similar. for name, value in job_attrs['params'].items(): # Transform parameter values when necessary. if isinstance(value, model.HistoryDatasetAssociation): # HDA input: use hid to find input. input_hda = self.sa_session.query( model.HistoryDatasetAssociation ) \ .filter_by( history=new_history, hid=value.hid ).first() value = input_hda.id # print "added parameter %s-->%s to job %i" % ( name, value, imported_job.id ) imported_job.add_parameter( name, dumps(value, cls=HistoryDatasetAssociationIDEncoder)) # TODO: Connect jobs to input datasets. # Connect jobs to output datasets. for output_hid in job_attrs['output_datasets']: # print "%s job has output dataset %i" % (imported_job.id, output_hid) output_hda = self.sa_session.query( model.HistoryDatasetAssociation).filter_by( history=new_history, hid=output_hid).first() if output_hda: imported_job.add_output_dataset( output_hda.name, output_hda) # Connect jobs to input datasets. if 'input_mapping' in job_attrs: for input_name, input_hid in job_attrs[ 'input_mapping'].items(): input_hda = self.sa_session.query( model.HistoryDatasetAssociation ) \ .filter_by( history=new_history, hid=input_hid ).first() if input_hda: imported_job.add_input_dataset( input_name, input_hda) self.sa_session.flush() # Done importing. new_history.importing = False self.sa_session.flush() # Cleanup. if os.path.exists(archive_dir): shutil.rmtree(archive_dir) except Exception, e: jiha.job.stderr += "Error cleaning up history import job: %s" % e self.sa_session.flush() raise
def log_action( self, user=None, action=None, context=None, params=None): """ Application-level logging of user actions. """ if self.app.config.log_actions: action = self.app.model.UserAction(action=action, context=context, params=unicode( dumps( params ) ) ) try: if user: action.user = user else: action.user = self.user except: action.user = None try: action.session_id = self.galaxy_session.id except: action.session_id = None self.sa_session.add( action ) self.sa_session.flush()
def execute_via_app(self, tool, app, session_id, history_id, user=None, incoming={}, set_output_hid=False, overwrite=True, history=None, job_params=None): """ Execute using application. """ for name, value in incoming.iteritems(): if isinstance(value, app.model.HistoryDatasetAssociation): dataset = value dataset_name = name type = 'hda' break elif isinstance(value, app.model.LibraryDatasetDatasetAssociation): dataset = value dataset_name = name type = 'ldda' break else: raise Exception( 'The dataset to set metadata on could not be determined.') sa_session = app.model.context # Create the job object job = app.model.Job() job.session_id = session_id job.history_id = history_id job.tool_id = tool.id if user: job.user_id = user.id if job_params: job.params = dumps(job_params) start_job_state = job.state # should be job.states.NEW try: # For backward compatibility, some tools may not have versions yet. job.tool_version = tool.version except: job.tool_version = "1.0.1" job.state = job.states.WAITING # we need to set job state to something other than NEW, or else when tracking jobs in db it will be picked up before we have added input / output parameters job.set_handler(tool.get_job_handler(job_params)) sa_session.add(job) sa_session.flush() # ensure job.id is available # add parameters to job_parameter table # Store original dataset state, so we can restore it. A separate table might be better (no chance of 'losing' the original state)? incoming['__ORIGINAL_DATASET_STATE__'] = dataset.state input_paths = [ DatasetPath(dataset.id, real_path=dataset.file_name, mutable=False) ] app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(job.id)) job_working_dir = app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(job.id)) external_metadata_wrapper = JobExternalOutputMetadataWrapper(job) cmd_line = external_metadata_wrapper.setup_external_metadata( dataset, sa_session, exec_dir=None, tmp_dir=job_working_dir, dataset_files_path=app.model.Dataset.file_path, output_fnames=input_paths, config_root=app.config.root, config_file=app.config.config_file, datatypes_config=app.datatypes_registry. integrated_datatypes_configs, job_metadata=None, include_command=False, max_metadata_value_size=app.config.max_metadata_value_size, kwds={'overwrite': overwrite}) incoming['__SET_EXTERNAL_METADATA_COMMAND_LINE__'] = cmd_line for name, value in tool.params_to_strings(incoming, app).iteritems(): job.add_parameter(name, value) # add the dataset to job_to_input_dataset table if type == 'hda': job.add_input_dataset(dataset_name, dataset) elif type == 'ldda': job.add_input_library_dataset(dataset_name, dataset) # Need a special state here to show that metadata is being set and also allow the job to run # i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state dataset._state = dataset.states.SETTING_METADATA job.state = start_job_state # job inputs have been configured, restore initial job state sa_session.flush() # Queue the job for execution app.job_queue.put(job.id, tool.id) # FIXME: need to add event logging to app and log events there rather than trans. # trans.log_event( "Added set external metadata job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id ) # clear e.g. converted files dataset.datatype.before_setting_metadata(dataset) return job, odict()
def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, set_output_history=True, history=None, job_params=None, rerun_remap_job_id=None, mapping_over_collection=False): """ Executes a tool, creating job and tool outputs, associating them, and submitting the job to the job queue. If history is not specified, use trans.history as destination for tool's output datasets. """ assert tool.allow_user_access( trans.user ), "User (%s) is not allowed to access this tool." % ( trans.user ) # Set history. if not history: history = tool.get_default_history_by_trans( trans, create=True ) out_data = odict() out_collections = {} out_collection_instances = {} # Track input dataset collections - but replace with simply lists so collect # input datasets can process these normally. inp_dataset_collections = self.collect_input_dataset_collections( tool, incoming ) # Collect any input datasets from the incoming parameters inp_data = self.collect_input_datasets( tool, incoming, trans ) # Deal with input dataset names, 'dbkey' and types input_names = [] input_ext = 'data' input_dbkey = incoming.get( "dbkey", "?" ) inp_items = inp_data.items() inp_items.reverse() for name, data in inp_items: if not data: data = NoneDataset( datatypes_registry=trans.app.datatypes_registry ) continue # Convert LDDA to an HDA. if isinstance(data, LibraryDatasetDatasetAssociation): data = data.to_history_dataset_association( None ) inp_data[name] = data else: # HDA if data.hid: input_names.append( 'data %s' % data.hid ) input_ext = data.ext if data.dbkey not in [None, '?']: input_dbkey = data.dbkey identifier = getattr( data, "element_identifier", None ) if identifier is not None: incoming[ "%s|__identifier__" % name ] = identifier # Collect chromInfo dataset and add as parameters to incoming ( chrom_info, db_dataset ) = trans.app.genome_builds.get_chrom_info( input_dbkey, trans=trans, custom_build_hack_get_len_from_fasta_conversion=tool.id != 'CONVERTER_fasta_to_len' ) if db_dataset: inp_data.update( { "chromInfo": db_dataset } ) incoming[ "chromInfo" ] = chrom_info # Determine output dataset permission/roles list existing_datasets = [ inp for inp in inp_data.values() if inp ] if existing_datasets: output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets ) else: # No valid inputs, we will use history defaults output_permissions = trans.app.security_agent.history_get_default_permissions( history ) # Build name for output datasets based on tool name and input names on_text = on_text_for_names( input_names ) # Add the dbkey to the incoming parameters incoming[ "dbkey" ] = input_dbkey # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed wrapped_params = WrappedParameters( trans, tool, incoming ) # Keep track of parent / child relationships, we'll create all the # datasets first, then create the associations parent_to_child_pairs = [] child_dataset_names = set() object_store_populator = ObjectStorePopulator( trans.app ) def handle_output( name, output ): if output.parent: parent_to_child_pairs.append( ( output.parent, name ) ) child_dataset_names.add( name ) # What is the following hack for? Need to document under what # conditions can the following occur? ([email protected]) # HACK: the output data has already been created # this happens i.e. as a result of the async controller if name in incoming: dataid = incoming[name] data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataid ) assert data is not None out_data[name] = data else: ext = determine_output_format( output, wrapped_params.params, inp_data, input_ext ) data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, sa_session=trans.sa_session ) if output.hidden: data.visible = False # Commit the dataset immediately so it gets database assigned unique id trans.sa_session.add( data ) trans.sa_session.flush() trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions ) object_store_populator.set_object_store_id( data ) # This may not be neccesary with the new parent/child associations data.designation = name # Copy metadata from one of the inputs if requested. # metadata source can be either a string referencing an input # or an actual object to copy. metadata_source = output.metadata_source if metadata_source: if isinstance( metadata_source, basestring ): metadata_source = inp_data[metadata_source] if metadata_source is not None: data.init_meta( copy_from=metadata_source ) else: data.init_meta() # Take dbkey from LAST input data.dbkey = str(input_dbkey) # Set state # FIXME: shouldn't this be NEW until the job runner changes it? data.state = data.states.QUEUED data.blurb = "queued" # Set output label data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params ) # Store output out_data[ name ] = data if output.actions: # Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format output_action_params = dict( out_data ) output_action_params.update( incoming ) output.actions.apply_action( data, output_action_params ) # Store all changes to database trans.sa_session.flush() return data for name, output in tool.outputs.items(): if not filter_output(output, incoming): if output.collection: collections_manager = trans.app.dataset_collections_service # As far as I can tell - this is always true - but just verify assert set_output_history, "Cannot create dataset collection for this kind of tool." element_identifiers = [] input_collections = dict( [ (k, v[0]) for k, v in inp_dataset_collections.iteritems() ] ) known_outputs = output.known_outputs( input_collections, collections_manager.type_registry ) # Just to echo TODO elsewhere - this should be restructured to allow # nested collections. for output_part_def in known_outputs: # Add elements to top-level collection, unless nested... current_element_identifiers = element_identifiers current_collection_type = output.structure.collection_type for parent_id in (output_part_def.parent_ids or []): # TODO: replace following line with formal abstractions for doing this. current_collection_type = ":".join(current_collection_type.split(":")[1:]) name_to_index = dict(map(lambda (index, value): (value["name"], index), enumerate(current_element_identifiers))) if parent_id not in name_to_index: if parent_id not in current_element_identifiers: index = len(current_element_identifiers) current_element_identifiers.append(dict( name=parent_id, collection_type=current_collection_type, src="new_collection", element_identifiers=[], )) else: index = name_to_index[parent_id] current_element_identifiers = current_element_identifiers[ index ][ "element_identifiers" ] effective_output_name = output_part_def.effective_output_name element = handle_output( effective_output_name, output_part_def.output_def ) # Following hack causes dataset to no be added to history... child_dataset_names.add( effective_output_name ) if set_output_history: history.add_dataset( element, set_hid=set_output_hid ) trans.sa_session.add( element ) trans.sa_session.flush() current_element_identifiers.append({ "__object__": element, "name": output_part_def.element_identifier, }) log.info(element_identifiers) if output.dynamic_structure: assert not element_identifiers # known_outputs must have been empty element_kwds = dict(elements=collections_manager.ELEMENTS_UNINITIALIZED) else: element_kwds = dict(element_identifiers=element_identifiers) if mapping_over_collection: dc = collections_manager.create_dataset_collection( trans, collection_type=output.structure.collection_type, **element_kwds ) out_collections[ name ] = dc else: hdca_name = self.get_output_name( output, None, tool, on_text, trans, incoming, history, wrapped_params.params, job_params ) hdca = collections_manager.create( trans, history, name=hdca_name, collection_type=output.structure.collection_type, trusted_identifiers=True, **element_kwds ) # name here is name of the output element - not name # of the hdca. out_collection_instances[ name ] = hdca else: handle_output_timer = ExecutionTimer() handle_output( name, output ) log.info("Handled output %s" % handle_output_timer) # Add all the top-level (non-child) datasets to the history unless otherwise specified for name in out_data.keys(): if name not in child_dataset_names and name not in incoming: # don't add children; or already existing datasets, i.e. async created data = out_data[ name ] if set_output_history: history.add_dataset( data, set_hid=set_output_hid ) trans.sa_session.add( data ) trans.sa_session.flush() # Add all the children to their parents for parent_name, child_name in parent_to_child_pairs: parent_dataset = out_data[ parent_name ] child_dataset = out_data[ child_name ] parent_dataset.children.append( child_dataset ) # Store data after custom code runs trans.sa_session.flush() # Create the job object job = trans.app.model.Job() if hasattr( trans, "get_galaxy_session" ): galaxy_session = trans.get_galaxy_session() # If we're submitting from the API, there won't be a session. if type( galaxy_session ) == trans.model.GalaxySession: job.session_id = galaxy_session.id if trans.user is not None: job.user_id = trans.user.id job.history_id = history.id job.tool_id = tool.id try: # For backward compatibility, some tools may not have versions yet. job.tool_version = tool.version except: job.tool_version = "1.0.0" # FIXME: Don't need all of incoming here, just the defined parameters # from the tool. We need to deal with tools that pass all post # parameters to the command as a special case. for name, ( dataset_collection, reduced ) in inp_dataset_collections.iteritems(): # TODO: Does this work if nested in repeat/conditional? if reduced: incoming[ name ] = "__collection_reduce__|%s" % dataset_collection.id # Should verify security? We check security of individual # datasets below? job.add_input_dataset_collection( name, dataset_collection ) for name, value in tool.params_to_strings( incoming, trans.app ).iteritems(): job.add_parameter( name, value ) current_user_roles = trans.get_current_user_roles() access_timer = ExecutionTimer() for name, dataset in inp_data.iteritems(): if dataset: if not trans.app.security_agent.can_access_dataset( current_user_roles, dataset.dataset ): raise Exception("User does not have permission to use a dataset (%s) provided for input." % data.id) job.add_input_dataset( name, dataset ) else: job.add_input_dataset( name, None ) log.info("Verified access to datasets %s" % access_timer) for name, dataset in out_data.iteritems(): job.add_output_dataset( name, dataset ) for name, dataset_collection in out_collections.iteritems(): job.add_implicit_output_dataset_collection( name, dataset_collection ) for name, dataset_collection_instance in out_collection_instances.iteritems(): job.add_output_dataset_collection( name, dataset_collection_instance ) job.object_store_id = object_store_populator.object_store_id if job_params: job.params = dumps( job_params ) job.set_handler(tool.get_job_handler(job_params)) trans.sa_session.add( job ) # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs # This functionality requires tracking jobs in the database. if trans.app.config.track_jobs_in_database and rerun_remap_job_id is not None: try: old_job = trans.sa_session.query( trans.app.model.Job ).get(rerun_remap_job_id) assert old_job is not None, '(%s/%s): Old job id is invalid' % (rerun_remap_job_id, job.id) assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (old_job.id, job.id, old_job.tool_id, job.tool_id) if trans.user is not None: assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (old_job.id, job.id, old_job.user_id, trans.user.id) elif trans.user is None and type( galaxy_session ) == trans.model.GalaxySession: assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (old_job.id, job.id, old_job.session_id, galaxy_session.id) else: raise Exception('(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, job.id)) for jtod in old_job.output_datasets: for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]: if (trans.user is not None and job_to_remap.user_id == trans.user.id) or (trans.user is None and job_to_remap.session_id == galaxy_session.id): if job_to_remap.state == job_to_remap.states.PAUSED: job_to_remap.state = job_to_remap.states.NEW for hda in [ dep_jtod.dataset for dep_jtod in job_to_remap.output_datasets ]: if hda.state == hda.states.PAUSED: hda.state = hda.states.NEW hda.info = None for p in job_to_remap.parameters: if p.name == jtid.name and p.value == str(jtod.dataset.id): p.value = str(out_data[jtod.name].id) jtid.dataset = out_data[jtod.name] jtid.dataset.hid = jtod.dataset.hid log.info('Job %s input HDA %s remapped to new HDA %s' % (job_to_remap.id, jtod.dataset.id, jtid.dataset.id)) trans.sa_session.add(job_to_remap) trans.sa_session.add(jtid) jtod.dataset.visible = False trans.sa_session.add(jtod) except Exception: log.exception('Cannot remap rerun dependencies.') trans.sa_session.flush() # Some tools are not really executable, but jobs are still created for them ( for record keeping ). # Examples include tools that redirect to other applications ( epigraph ). These special tools must # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job # from being queued. if 'REDIRECT_URL' in incoming: # Get the dataset - there should only be 1 for name in inp_data.keys(): dataset = inp_data[ name ] redirect_url = tool.parse_redirect_url( dataset, incoming ) # GALAXY_URL should be include in the tool params to enable the external application # to send back to the current Galaxy instance GALAXY_URL = incoming.get( 'GALAXY_URL', None ) assert GALAXY_URL is not None, "GALAXY_URL parameter missing in tool config." redirect_url += "&GALAXY_URL=%s" % GALAXY_URL # Job should not be queued, so set state to ok job.set_state( trans.app.model.Job.states.OK ) job.info = "Redirected to: %s" % redirect_url trans.sa_session.add( job ) trans.sa_session.flush() trans.response.send_redirect( url_for( controller='tool_runner', action='redirect', redirect_url=redirect_url ) ) else: # Put the job in the queue if tracking in memory trans.app.job_queue.put( job.id, job.tool_id ) trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id ) return job, out_data
repository, None, as_html=False ) results[ 'unsuccessful_count' ] += 1 else: message = "Successfully reset metadata on repository %s owned by %s" % \ ( str( repository.name ), str( repository.owner ) ) results[ 'successful_count' ] += 1 except Exception, e: message = "Error resetting metadata on repository %s owned by %s: %s" % \ ( str( repository.name ), str( repository.owner ), str( e ) ) results[ 'unsuccessful_count' ] += 1 results[ 'repository_status' ].append( message ) stop_time = strftime( "%Y-%m-%d %H:%M:%S" ) results[ 'stop_time' ] = stop_time return json.dumps( results, sort_keys=True, indent=4 ) @expose_api def show( self, trans, id, **kwd ): """ GET /api/tool_shed_repositories/{encoded_tool_shed_repsository_id} Display a dictionary containing information about a specified tool_shed_repository. :param id: the encoded id of the ToolShedRepository object """ # Example URL: http://localhost:8763/api/tool_shed_repositories/df7a1f0c02a5b08e tool_shed_repository = suc.get_tool_shed_repository_by_id( trans.app, id ) if tool_shed_repository is None: log.debug( "Unable to locate tool_shed_repository record for id %s." % ( str( id ) ) ) return {} tool_shed_repository_dict = tool_shed_repository.as_dict( value_mapper=self.__get_value_mapper( trans, tool_shed_repository ) )
else: # This should not happen, but it's here just in case shutil.copy(dataset.path, output_path) elif link_data_only == 'copy_files': shutil.move(dataset.path, output_path) # Write the job info stdout = stdout or 'uploaded %s file' % data_type info = dict(type='dataset', dataset_id=dataset.dataset_id, ext=ext, stdout=stdout, name=dataset.name, line_count=line_count) if dataset.get('uuid', None) is not None: info['uuid'] = dataset.get('uuid') json_file.write(dumps(info) + "\n") if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming( output_path): # Groom the dataset content if necessary datatype.groom_dataset_content(output_path) def add_composite_file(dataset, json_file, output_path, files_path): if dataset.composite_files: os.mkdir(files_path) for name, value in dataset.composite_files.iteritems(): value = util.bunch.Bunch(**value) if dataset.composite_file_paths[ value.name] is None and not value.optional: file_err(
cmd = "SELECT f.id, f.fields FROM form_definition AS f" result = migrate_engine.execute(cmd) for row in result: form_definition_id = row[0] fields = str(row[1]) if not fields.strip(): continue fields_list = loads(_sniffnfix_pg9_hex(fields)) if len(fields_list): for index, field in enumerate(fields_list): field['name'] = 'field_%i' % index field['helptext'] = field['helptext'].replace("'", "''").replace( '"', "") field['label'] = field['label'].replace("'", "''") fields_json = dumps(fields_list) if migrate_engine.name == 'mysql': cmd = "UPDATE form_definition AS f SET f.fields='%s' WHERE f.id=%i" % ( fields_json, form_definition_id) else: cmd = "UPDATE form_definition SET fields='%s' WHERE id=%i" % ( fields_json, form_definition_id) migrate_engine.execute(cmd) # replace the values list in the content field of the form_values table with a name:value dict cmd = "SELECT form_values.id, form_values.content, form_definition.fields" \ " FROM form_values, form_definition" \ " WHERE form_values.form_definition_id=form_definition.id" \ " ORDER BY form_values.id ASC" result = migrate_engine.execute(cmd) for row in result: form_values_id = int(row[0])