def setup_external_metadata( self, datasets, exec_dir = None, tmp_dir = None, dataset_files_path = None, output_fnames = None, kwds = {} ): #fill in metadata_files_dict and return the command with args required to set metadata def __metadata_files_list_to_cmd_line( metadata_files ): def __get_filename_override(): if output_fnames: for dataset_path in output_fnames: if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name: return dataset_path.false_path return "" return "%s,%s,%s,%s,%s" % ( metadata_files.filename_in, metadata_files.filename_kwds, metadata_files.filename_out, metadata_files.filename_results_code, __get_filename_override() ) if not isinstance( datasets, list ): datasets = [ datasets ] if exec_dir is None: exec_dir = os.path.abspath( os.getcwd() ) if tmp_dir is None: tmp_dir = MetadataTempFile.tmp_dir if dataset_files_path is None: dataset_files_path = galaxy.model.Dataset.file_path metadata_files_list = [] for dataset in datasets: key = self.get_dataset_metadata_key( dataset ) #future note: #wonkiness in job execution causes build command line to be called more than once #when setting metadata externally, via 'auto-detect' button in edit attributes, etc., #we don't want to overwrite (losing the ability to cleanup) our existing dataset keys and files, #so we will only populate the dictionary once metadata_files = self.get_output_filenames_by_dataset( dataset ) if not metadata_files: metadata_files = galaxy.model.JobExternalOutputMetadata( dataset = dataset) metadata_files.job_id = self.job_id #we are using tempfile to create unique filenames, tempfile always returns an absolute path #we will use pathnames relative to the galaxy root, to accommodate instances where the galaxy root #is located differently, i.e. on a cluster node with a different filesystem structure #file to store existing dataset metadata_files.filename_in = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_in_%s_" % key ).name ) cPickle.dump( dataset, open( metadata_files.filename_in, 'wb+' ) ) #file to store metadata results of set_meta() metadata_files.filename_out = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_out_%s_" % key ).name ) open( metadata_files.filename_out, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible) #file to store a 'return code' indicating the results of the set_meta() call #results code is like (True/False - if setting metadata was successful/failed , exception or string of reason of success/failure ) metadata_files.filename_results_code = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_out_%s_" % key ).name ) simplejson.dump( ( False, 'External set_meta() not called' ), open( metadata_files.filename_results_code, 'wb+' ) ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible) #file to store kwds passed to set_meta() metadata_files.filename_kwds = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_kwds_%s_" % key ).name ) simplejson.dump( kwds, open( metadata_files.filename_kwds, 'wb+' ), ensure_ascii=True ) metadata_files.flush() metadata_files_list.append( metadata_files ) #return command required to build return "%s %s %s %s" % ( os.path.join( exec_dir, 'set_metadata.sh' ), dataset_files_path, tmp_dir, " ".join( map( __metadata_files_list_to_cmd_line, metadata_files_list ) ) )
def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append("All file contents specified in the paste box were ignored.") if ftp_files: warnings.append("All FTP uploaded file selections were ignored.") elif url_paste is not None and url_paste.strip(): # we need to use url_paste for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append("All FTP uploaded file selections were ignored.") elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)), 'filename' : os.path.basename(path)} purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info, purge=purge, ) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid return file_bunch, warnings
def get_filenames( context ): rval = [] data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) space_to_tab = False if context.get( 'space_to_tab', None ) not in [ "None", None, False ]: space_to_tab = True warnings = [] file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info ) if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ): if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode)] if trans.user is None: log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files ) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ): for filename in filenames: path = relpath( os.path.join( dirpath, filename ), user_ftp_dir ) if not os.path.islink( os.path.join( dirpath, filename ) ): # Normalize filesystem paths if isinstance(path, unicode): valid_files.append(unicodedata.normalize('NFC', path )) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file ) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, ftp_file ) ), 'filename' : os.path.basename( ftp_file ) } file_bunch = get_data_file_filename( ftp_data_file, override_name = name, override_info = info ) if file_bunch.path: file_bunch.space_to_tab = space_to_tab rval.append( file_bunch ) return rval
def get_one_filename( context ): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get( 'NAME', None ) info = context.get( 'INFO', None ) warnings = [] to_posix_lines = False if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]: to_posix_lines = True space_to_tab = False if context.get( 'space_to_tab', None ) not in [ "None", None, False ]: space_to_tab = True file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info ) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) if ftp_files: warnings.append( "All FTP uploaded file selections were ignored." ) elif url_paste is not None and url_paste.strip(): #we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append( "All FTP uploaded file selections were ignored." ) elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath( os.path.join( dirpath, filename ), user_ftp_dir ) if not os.path.islink( os.path.join( dirpath, filename ) ): ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, path ) ), 'filename' : os.path.basename( path ) } file_bunch = get_data_file_filename( ftp_data_file, override_name = name, override_info = info ) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab return file_bunch, warnings
def get_html( self, prefix="" ): rval = FTPFileField.thead if self.dir is None: rval += '<tr><td colspan="4"><em>Please <a href="%s">create</a> or <a href="%s">log in to</a> a Galaxy account to view files uploaded via FTP.</em></td></tr>' % ( url_for( controller='user', action='create', cntrller='user', referer=url_for( controller='root' ) ), url_for( controller='user', action='login', cntrller='user', referer=url_for( controller='root' ) ) ) elif not os.path.exists( self.dir ): rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>' else: uploads = [] for ( dirpath, dirnames, filenames ) in os.walk( self.dir ): for filename in filenames: path = relpath( os.path.join( dirpath, filename ), self.dir ) statinfo = os.lstat( os.path.join( dirpath, filename ) ) uploads.append( dict( path=path, size=nice_size( statinfo.st_size ), ctime=time.strftime( "%m/%d/%Y %I:%M:%S %p", time.localtime( statinfo.st_ctime ) ) ) ) if not uploads: rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>' for upload in uploads: rval += FTPFileField.trow % ( prefix, self.name, upload['path'], upload['path'], upload['size'], upload['ctime'] ) rval += FTPFileField.tfoot rval += '<div class="toolParamHelp">This Galaxy server allows you to upload files via FTP. To upload some files, log in to the FTP server at <strong>%s</strong> using your Galaxy credentials (email address and password).</div>' % self.ftp_site return rval
def get_html( self, prefix="" ): rval = FTPFileField.thead if self.dir is None: rval += '<tr><td colspan="4"><em>Please <a href="%s">create</a> or <a href="%s">log in to</a> a Galaxy account to view files uploaded via FTP.</em></td></tr>' % ( url_for( controller='user', action='create', cntrller='user', referer=url_for( controller='root' ) ), url_for( controller='user', action='login', cntrller='user', referer=url_for( controller='root' ) ) ) elif not os.path.exists( self.dir ): rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>' else: uploads = [] for ( dirpath, dirnames, filenames ) in os.walk( self.dir ): for filename in filenames: path = relpath( os.path.join( dirpath, filename ), self.dir ) statinfo = os.lstat( os.path.join( dirpath, filename ) ) uploads.append( dict( path=path, size=nice_size( statinfo.st_size ), ctime=time.strftime( "%m/%d/%Y %I:%M:%S %p", time.localtime( statinfo.st_ctime ) ) ) ) if not uploads: rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>' for upload in uploads: rval += FTPFileField.trow % ( prefix, self.name, upload['path'], upload['path'], upload['size'], upload['ctime'] ) rval += FTPFileField.tfoot rval += '<div class="toolParamHelp">This Galaxy server allows you to upload files via FTP. To upload some files, log in to the FTP server at <strong>%s</strong> using your Galaxy credentials (email address and password). After transfering files via FTP they will appear here. To use them in further analysis you must return to this page, select these files and press the <strong>Execute</strong> button. After they are processed they will appear in your Uploaded Files project space. Consult <a href="http://wiki.galaxyproject.org/FTPUpload">the Galaxy wiki</a> for more information.</div>' % self.ftp_site return rval
def get_html( self, prefix="" ): rval = FTPFileField.thead if self.dir is None: rval += '<tr><td colspan="4"><em>Please <a href="%s">create</a> or <a href="%s">log in to</a> a Galaxy account to view files uploaded via FTP.</em></td></tr>' % ( url_for( controller='user', action='create', cntrller='user', referer=url_for( controller='root' ) ), url_for( controller='user', action='login', cntrller='user', referer=url_for( controller='root' ) ) ) elif not os.path.exists( self.dir ): rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>' else: uploads = [] for ( dirpath, dirnames, filenames ) in os.walk( self.dir ): for filename in filenames: path = relpath( os.path.join( dirpath, filename ), self.dir ) statinfo = os.lstat( os.path.join( dirpath, filename ) ) uploads.append( dict( path=path, size=nice_size( statinfo.st_size ), ctime=time.strftime( "%m/%d/%Y %I:%M:%S %p", time.localtime( statinfo.st_ctime ) ) ) ) if not uploads: rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>' for upload in uploads: rval += FTPFileField.trow % ( prefix, self.name, upload['path'], upload['path'], upload['size'], upload['ctime'] ) galaxy_ip_address = subprocess.check_output('curl http://instance-data/latest/meta-data/public-ipv4', shell=True) rval += FTPFileField.tfoot rval += '<div class="toolParamHelp">This Galaxy server allows you to upload files via FTP. To upload files, use an FTP program to access the FTP server at <strong>%s</strong> (using the SFTP protocol) and port <strong>2200</strong>. You will use your Galaxy credentials (email address and password) to log in to the server.</div>' % galaxy_ip_address return rval
def get_filenames(context): rval = [] data_file = context['file_data'] ftp_files = context['ftp_files'] uuid = context.get('uuid', None) or None # Turn '' to None name = context.get('NAME', None) info = context.get('INFO', None) file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)] if trans.user is None: log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): # Normalize filesystem paths if isinstance(path, text_type): valid_files.append(unicodedata.normalize('NFC', path)) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)), 'filename' : os.path.basename(ftp_file)} purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey rval.append(file_bunch) return rval
def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None file_type = context.get('file_type', None) dbkey = self.get_dbkey(context) warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append("All file contents specified in the paste box were ignored.") if ftp_files: warnings.append("All FTP uploaded file selections were ignored.") elif url_paste is not None and url_paste.strip(): # we need to use url_paste for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append("All FTP uploaded file selections were ignored.") elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)), 'filename' : os.path.basename(path)} purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info, purge=purge, ) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid if file_type is not None: file_bunch.file_type = file_type if dbkey is not None: file_bunch.dbkey = dbkey return file_bunch, warnings
def check_src(item): if "object_id" in item: raise RequestParameterInvalidException( "object_id not allowed to appear in the request.") validate_datatype_extension( datatypes_registry=trans.app.datatypes_registry, ext=item.get('ext')) # Normalize file:// URLs into paths. if item["src"] == "url": if "url" not in item: raise RequestParameterInvalidException( "src specified as 'url' but 'url' not specified") url = item["url"] if url.startswith("file://"): item["src"] = "path" item["path"] = url[len("file://"):] del item["url"] if "in_place" in item: raise RequestParameterInvalidException( "in_place cannot be set in the upload request") src = item["src"] # Check link_data_only can only be set for certain src types and certain elements_from types. _handle_invalid_link_data_only_elements_type(item) if src not in ["path", "server_dir"]: _handle_invalid_link_data_only_type(item) elements_from = item.get("elements_from", None) if elements_from and elements_from not in ELEMENTS_FROM_TYPE: raise RequestParameterInvalidException( "Invalid elements_from/items_from found in request") if src == "path" or (src == "url" and item["url"].startswith("file:")): # Validate is admin, leave alone. validate_path_upload(trans) elif src == "server_dir": # Validate and replace with path definition. server_dir = item["server_dir"] full_path, _ = validate_server_directory_upload(trans, server_dir) item["src"] = "path" item["path"] = full_path elif src == "ftp_import": ftp_path = item["ftp_path"] full_path = None # It'd be nice if this can be de-duplicated with what is in parameters/grouping.py. user_ftp_dir = trans.user_ftp_dir is_directory = False assert not os.path.islink( user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: if ftp_path == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): full_path = os.path.abspath( os.path.join(user_ftp_dir, path)) break for dirname in dirnames: if ftp_path == dirname: path = relpath(os.path.join(dirpath, dirname), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, dirname)): full_path = os.path.abspath( os.path.join(user_ftp_dir, path)) is_directory = True break if is_directory: # If the target is a directory - make sure no files under it are symbolic links for (dirpath, dirnames, filenames) in os.walk(full_path): for filename in filenames: if ftp_path == filename: path = relpath(os.path.join(dirpath, filename), full_path) if not os.path.islink( os.path.join(dirpath, filename)): full_path = False break for dirname in dirnames: if ftp_path == dirname: path = relpath(os.path.join(dirpath, filename), full_path) if not os.path.islink( os.path.join(dirpath, filename)): full_path = False break if not full_path: raise RequestParameterInvalidException( "Failed to find referenced ftp_path or symbolic link was enountered" ) item["src"] = "path" item["path"] = full_path item["purge_source"] = purge_ftp_source elif src == "url": url = item["url"] looks_like_url = False for url_prefix in ["http://", "https://", "ftp://", "ftps://"]: if url.startswith(url_prefix): looks_like_url = True break if not looks_like_url and trans.app.file_sources.looks_like_uri( url): looks_like_url = True if not looks_like_url: raise RequestParameterInvalidException( "Invalid URL [%s] found in src definition." % url) validate_url(url, trans.app.config.fetch_url_allowlist_ips) item["in_place"] = run_as_real_user elif src == "files": item["in_place"] = run_as_real_user # Small disagreement with traditional uploads - we purge less by default since whether purging # happens varies based on upload options in non-obvious ways. # https://github.com/galaxyproject/galaxy/issues/5361 if "purge_source" not in item: item["purge_source"] = False
def get_filenames(context): rval = [] data_file = context['file_data'] ftp_files = context['ftp_files'] uuid = context.get('uuid', None) or None # Turn '' to None name = context.get('NAME', None) info = context.get('INFO', None) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True auto_decompress = False if context.get('auto_decompress', None) not in ["None", None, False]: auto_decompress = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) file_bunch.uuid = uuid if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info): if file_bunch.path: file_bunch.uuid = uuid file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)] if trans.user is None: log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): # Normalize filesystem paths if isinstance(path, text_type): valid_files.append(unicodedata.normalize('NFC', path)) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)), 'filename' : os.path.basename(ftp_file)} purge = getattr(trans.app.config, 'ftp_upload_purge', True) file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.auto_decompress = auto_decompress file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) return rval
def get_one_filename(context): data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) uuid = context.get('uuid', None) or None # Turn '' to None warnings = [] to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: if url_paste is not None and url_paste.strip(): warnings.append( "All file contents specified in the paste box were ignored." ) if ftp_files: warnings.append( "All FTP uploaded file selections were ignored.") elif url_paste is not None and url_paste.strip( ): # we need to use url_paste for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: break if file_bunch.path and ftp_files is not None: warnings.append( "All FTP uploaded file selections were ignored.") elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP user_ftp_dir = trans.user_ftp_dir for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: for ftp_filename in ftp_files: if ftp_filename == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): ftp_data_file = { 'local_filename': os.path.abspath( os.path.join(user_ftp_dir, path)), 'filename': os.path.basename(path) } file_bunch = get_data_file_filename( ftp_data_file, override_name=name, override_info=info) if file_bunch.path: break if file_bunch.path: break if file_bunch.path: break file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab file_bunch.uuid = uuid return file_bunch, warnings
def get_filenames(context): rval = [] data_file = context['file_data'] url_paste = context['url_paste'] ftp_files = context['ftp_files'] name = context.get('NAME', None) info = context.get('INFO', None) to_posix_lines = False if context.get('to_posix_lines', None) not in ["None", None, False]: to_posix_lines = True space_to_tab = False if context.get('space_to_tab', None) not in ["None", None, False]: space_to_tab = True warnings = [] file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) for file_bunch in get_url_paste_urls_or_filename( context, override_name=name, override_info=info): if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) # look for files uploaded via FTP valid_files = [] if ftp_files is not None: # Normalize input paths to ensure utf-8 encoding is normal form c. # This allows for comparison when the filesystem uses a different encoding than the browser. ftp_files = [ unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode) ] if trans.user is None: log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files) ftp_files = [] # TODO: warning to the user (could happen if session has become invalid) else: user_ftp_dir = trans.user_ftp_dir for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink( os.path.join(dirpath, filename)): # Normalize filesystem paths if isinstance(path, unicode): valid_files.append( unicodedata.normalize('NFC', path)) else: valid_files.append(path) else: ftp_files = [] for ftp_file in ftp_files: if ftp_file not in valid_files: log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file) continue # TODO: warning to the user (could happen if file is already imported) ftp_data_file = { 'local_filename': os.path.abspath(os.path.join(user_ftp_dir, ftp_file)), 'filename': os.path.basename(ftp_file) } file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info) if file_bunch.path: file_bunch.to_posix_lines = to_posix_lines file_bunch.space_to_tab = space_to_tab rval.append(file_bunch) return rval
def file_name( self ): if self._filename is None: #we need to create a tmp file, accessable across all nodes/heads, save the name, and return it self._filename = relpath( tempfile.NamedTemporaryFile( dir = self.tmp_dir, prefix = "metadata_temp_file_" ).name ) open( self._filename, 'wb+' ) #create an empty file, so it can't be reused using tempfile return self._filename
def check_src(item): if "object_id" in item: raise RequestParameterInvalidException("object_id not allowed to appear in the request.") # Normalize file:// URLs into paths. if item["src"] == "url" and item["url"].startswith("file://"): item["src"] = "path" item["path"] = item["url"][len("file://"):] del item["path"] if "in_place" in item: raise RequestParameterInvalidException("in_place cannot be set in the upload request") src = item["src"] # Check link_data_only can only be set for certain src types and certain elements_from types. _handle_invalid_link_data_only_elements_type(item) if src not in ["path", "server_dir"]: _handle_invalid_link_data_only_type(item) elements_from = item.get("elements_from", None) if elements_from and elements_from not in ELEMENTS_FROM_TYPE: raise RequestParameterInvalidException("Invalid elements_from/items_from found in request") if src == "path" or (src == "url" and item["url"].startswith("file:")): # Validate is admin, leave alone. validate_path_upload(trans) elif src == "server_dir": # Validate and replace with path definition. server_dir = item["server_dir"] full_path, _ = validate_server_directory_upload(trans, server_dir) item["src"] = "path" item["path"] = full_path elif src == "ftp_import": ftp_path = item["ftp_path"] full_path = None # It'd be nice if this can be de-duplicated with what is in parameters/grouping.py. user_ftp_dir = trans.user_ftp_dir is_directory = False assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link" for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir): for filename in filenames: if ftp_path == filename: path = relpath(os.path.join(dirpath, filename), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, filename)): full_path = os.path.abspath(os.path.join(user_ftp_dir, path)) break for dirname in dirnames: if ftp_path == dirname: path = relpath(os.path.join(dirpath, dirname), user_ftp_dir) if not os.path.islink(os.path.join(dirpath, dirname)): full_path = os.path.abspath(os.path.join(user_ftp_dir, path)) is_directory = True break if is_directory: # If the target is a directory - make sure no files under it are symbolic links for (dirpath, dirnames, filenames) in os.walk(full_path): for filename in filenames: if ftp_path == filename: path = relpath(os.path.join(dirpath, filename), full_path) if not os.path.islink(os.path.join(dirpath, filename)): full_path = False break for dirname in dirnames: if ftp_path == dirname: path = relpath(os.path.join(dirpath, filename), full_path) if not os.path.islink(os.path.join(dirpath, filename)): full_path = False break if not full_path: raise RequestParameterInvalidException("Failed to find referenced ftp_path or symbolic link was enountered") item["src"] = "path" item["path"] = full_path item["purge_source"] = purge_ftp_source elif src == "url": url = item["url"] looks_like_url = False for url_prefix in ["http://", "https://", "ftp://", "ftps://"]: if url.startswith(url_prefix): looks_like_url = True break if not looks_like_url: raise RequestParameterInvalidException("Invalid URL [%s] found in src definition." % url) validate_url(url, trans.app.config.fetch_url_whitelist_ips) item["in_place"] = run_as_real_user elif src == "files": item["in_place"] = run_as_real_user # Small disagreement with traditional uploads - we purge less by default since whether purging # happens varies based on upload options in non-obvious ways. # https://github.com/galaxyproject/galaxy/issues/5361 if "purge_source" not in item: item["purge_source"] = False
def setup_external_metadata(self, datasets, sa_session, exec_dir=None, tmp_dir=None, dataset_files_path=None, output_fnames=None, config_root=None, datatypes_config=None, job_metadata=None, kwds={}): #fill in metadata_files_dict and return the command with args required to set metadata def __metadata_files_list_to_cmd_line(metadata_files): def __get_filename_override(): if output_fnames: for dataset_path in output_fnames: if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name: return dataset_path.false_path return "" return "%s,%s,%s,%s,%s,%s" % ( metadata_files.filename_in, metadata_files.filename_kwds, metadata_files.filename_out, metadata_files.filename_results_code, __get_filename_override(), metadata_files.filename_override_metadata) if not isinstance(datasets, list): datasets = [datasets] if exec_dir is None: exec_dir = os.path.abspath(os.getcwd()) if tmp_dir is None: tmp_dir = MetadataTempFile.tmp_dir if dataset_files_path is None: dataset_files_path = galaxy.model.Dataset.file_path if config_root is None: config_root = os.path.abspath(os.getcwd()) if datatypes_config is None: datatypes_config = 'datatypes_conf.xml' metadata_files_list = [] for dataset in datasets: key = self.get_dataset_metadata_key(dataset) #future note: #wonkiness in job execution causes build command line to be called more than once #when setting metadata externally, via 'auto-detect' button in edit attributes, etc., #we don't want to overwrite (losing the ability to cleanup) our existing dataset keys and files, #so we will only populate the dictionary once metadata_files = self.get_output_filenames_by_dataset( dataset, sa_session) if not metadata_files: metadata_files = galaxy.model.JobExternalOutputMetadata( dataset=dataset) metadata_files.job_id = self.job_id #we are using tempfile to create unique filenames, tempfile always returns an absolute path #we will use pathnames relative to the galaxy root, to accommodate instances where the galaxy root #is located differently, i.e. on a cluster node with a different filesystem structure #file to store existing dataset metadata_files.filename_in = relpath( tempfile.NamedTemporaryFile(dir=tmp_dir, prefix="metadata_in_%s_" % key).name) cPickle.dump(dataset, open(metadata_files.filename_in, 'wb+')) #file to store metadata results of set_meta() metadata_files.filename_out = relpath( tempfile.NamedTemporaryFile(dir=tmp_dir, prefix="metadata_out_%s_" % key).name) open( metadata_files.filename_out, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible) #file to store a 'return code' indicating the results of the set_meta() call #results code is like (True/False - if setting metadata was successful/failed , exception or string of reason of success/failure ) metadata_files.filename_results_code = relpath( tempfile.NamedTemporaryFile(dir=tmp_dir, prefix="metadata_results_%s_" % key).name) simplejson.dump( (False, 'External set_meta() not called'), open(metadata_files.filename_results_code, 'wb+') ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible) #file to store kwds passed to set_meta() metadata_files.filename_kwds = relpath( tempfile.NamedTemporaryFile(dir=tmp_dir, prefix="metadata_kwds_%s_" % key).name) simplejson.dump(kwds, open(metadata_files.filename_kwds, 'wb+'), ensure_ascii=True) #existing metadata file parameters need to be overridden with cluster-writable file locations metadata_files.filename_override_metadata = relpath( tempfile.NamedTemporaryFile( dir=tmp_dir, prefix="metadata_override_%s_" % key).name) open( metadata_files.filename_override_metadata, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible) override_metadata = [] for meta_key, spec_value in dataset.metadata.spec.iteritems(): if isinstance(spec_value.param, FileParameter) and dataset.metadata.get( meta_key, None) is not None: metadata_temp = MetadataTempFile() shutil.copy( dataset.metadata.get(meta_key, None).file_name, metadata_temp.file_name) override_metadata.append( (meta_key, metadata_temp.to_JSON())) simplejson.dump( override_metadata, open(metadata_files.filename_override_metadata, 'wb+')) #add to session and flush sa_session.add(metadata_files) sa_session.flush() metadata_files_list.append(metadata_files) #return command required to build return "%s %s %s %s %s %s %s" % ( os.path.join(exec_dir, 'set_metadata.sh'), dataset_files_path, tmp_dir, config_root, datatypes_config, job_metadata, " ".join( map(__metadata_files_list_to_cmd_line, metadata_files_list)))