Esempio n. 1
0
 def setup_external_metadata( self, datasets, exec_dir = None, tmp_dir = None, dataset_files_path = None, output_fnames = None, kwds = {} ):
     #fill in metadata_files_dict and return the command with args required to set metadata
     def __metadata_files_list_to_cmd_line( metadata_files ):
         def __get_filename_override():
             if output_fnames:
                 for dataset_path in output_fnames:
                     if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name:
                         return dataset_path.false_path
             return ""
         return "%s,%s,%s,%s,%s" % ( metadata_files.filename_in, metadata_files.filename_kwds, metadata_files.filename_out, metadata_files.filename_results_code, __get_filename_override() )
     if not isinstance( datasets, list ):
         datasets = [ datasets ]
     if exec_dir is None:
         exec_dir = os.path.abspath( os.getcwd() )
     if tmp_dir is None:
         tmp_dir = MetadataTempFile.tmp_dir
     if dataset_files_path is None:
         dataset_files_path = galaxy.model.Dataset.file_path
     metadata_files_list = []
     for dataset in datasets:
         key = self.get_dataset_metadata_key( dataset )
         #future note:
         #wonkiness in job execution causes build command line to be called more than once
         #when setting metadata externally, via 'auto-detect' button in edit attributes, etc., 
         #we don't want to overwrite (losing the ability to cleanup) our existing dataset keys and files, 
         #so we will only populate the dictionary once
         metadata_files = self.get_output_filenames_by_dataset( dataset )
         if not metadata_files:
             metadata_files = galaxy.model.JobExternalOutputMetadata( dataset = dataset)
             metadata_files.job_id = self.job_id
             #we are using tempfile to create unique filenames, tempfile always returns an absolute path
             #we will use pathnames relative to the galaxy root, to accommodate instances where the galaxy root
             #is located differently, i.e. on a cluster node with a different filesystem structure
             
             #file to store existing dataset
             metadata_files.filename_in = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_in_%s_" % key ).name )
             cPickle.dump( dataset, open( metadata_files.filename_in, 'wb+' ) )
             #file to store metadata results of set_meta()
             metadata_files.filename_out = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_out_%s_" % key ).name )
             open( metadata_files.filename_out, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
             #file to store a 'return code' indicating the results of the set_meta() call
             #results code is like (True/False - if setting metadata was successful/failed , exception or string of reason of success/failure )
             metadata_files.filename_results_code = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_out_%s_" % key ).name )
             simplejson.dump( ( False, 'External set_meta() not called' ), open( metadata_files.filename_results_code, 'wb+' ) ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
             #file to store kwds passed to set_meta()
             metadata_files.filename_kwds = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_kwds_%s_" % key ).name )
             simplejson.dump( kwds, open( metadata_files.filename_kwds, 'wb+' ), ensure_ascii=True )
             metadata_files.flush()
         metadata_files_list.append( metadata_files )
     #return command required to build
     return "%s %s %s %s" % ( os.path.join( exec_dir, 'set_metadata.sh' ), dataset_files_path, tmp_dir, " ".join( map( __metadata_files_list_to_cmd_line, metadata_files_list ) ) )
Esempio n. 2
0
 def get_one_filename(context):
     data_file = context['file_data']
     url_paste = context['url_paste']
     ftp_files = context['ftp_files']
     name = context.get('NAME', None)
     info = context.get('INFO', None)
     uuid = context.get('uuid', None) or None  # Turn '' to None
     warnings = []
     to_posix_lines = False
     if context.get('to_posix_lines', None) not in ["None", None, False]:
         to_posix_lines = True
     auto_decompress = False
     if context.get('auto_decompress', None) not in ["None", None, False]:
         auto_decompress = True
     space_to_tab = False
     if context.get('space_to_tab', None) not in ["None", None, False]:
         space_to_tab = True
     file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
     if file_bunch.path:
         if url_paste is not None and url_paste.strip():
             warnings.append("All file contents specified in the paste box were ignored.")
         if ftp_files:
             warnings.append("All FTP uploaded file selections were ignored.")
     elif url_paste is not None and url_paste.strip():  # we need to use url_paste
         for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
             if file_bunch.path:
                 break
         if file_bunch.path and ftp_files is not None:
             warnings.append("All FTP uploaded file selections were ignored.")
     elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
         user_ftp_dir = trans.user_ftp_dir
         assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
         for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
             for filename in filenames:
                 for ftp_filename in ftp_files:
                     if ftp_filename == filename:
                         path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                         if not os.path.islink(os.path.join(dirpath, filename)):
                             ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)),
                                              'filename' : os.path.basename(path)}
                             purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                             file_bunch = get_data_file_filename(
                                 ftp_data_file,
                                 override_name=name,
                                 override_info=info,
                                 purge=purge,
                             )
                             if file_bunch.path:
                                 break
                 if file_bunch.path:
                     break
             if file_bunch.path:
                 break
     file_bunch.to_posix_lines = to_posix_lines
     file_bunch.auto_decompress = auto_decompress
     file_bunch.space_to_tab = space_to_tab
     file_bunch.uuid = uuid
     return file_bunch, warnings
Esempio n. 3
0
        def get_filenames( context ):
            rval = []
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get( 'NAME', None )
            info = context.get( 'INFO', None )
            space_to_tab = False
            if context.get( 'space_to_tab', None ) not in [ "None", None, False ]:
                space_to_tab = True
            warnings = []
            file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info )
            if file_bunch.path:
                file_bunch.space_to_tab = space_to_tab
                rval.append( file_bunch )
            for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ):
                if file_bunch.path:
                    file_bunch.space_to_tab = space_to_tab
                    rval.append( file_bunch )
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, unicode)]
                if trans.user is None:
                    log.warning( 'Anonymous user passed values in ftp_files: %s' % ftp_files )
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ):
                        for filename in filenames:
                            path = relpath( os.path.join( dirpath, filename ), user_ftp_dir )
                            if not os.path.islink( os.path.join( dirpath, filename ) ):
                                # Normalize filesystem paths
                                if isinstance(path, unicode):
                                    valid_files.append(unicodedata.normalize('NFC', path ))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning( 'User passed an invalid file path in ftp_files: %s' % ftp_file )
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, ftp_file ) ),
                                  'filename' : os.path.basename( ftp_file ) }
                file_bunch = get_data_file_filename( ftp_data_file, override_name = name, override_info = info )
                if file_bunch.path:
                    file_bunch.space_to_tab = space_to_tab
                    rval.append( file_bunch )
            return rval
Esempio n. 4
0
 def get_one_filename( context ):
     data_file = context['file_data']
     url_paste = context['url_paste']
     ftp_files = context['ftp_files']
     name = context.get( 'NAME', None )
     info = context.get( 'INFO', None )
     warnings = []
     to_posix_lines = False
     if context.get( 'to_posix_lines', None ) not in [ "None", None, False ]:
         to_posix_lines = True
     space_to_tab = False
     if context.get( 'space_to_tab', None ) not in [ "None", None, False ]:
         space_to_tab = True
     file_bunch = get_data_file_filename( data_file, override_name = name, override_info = info )
     if file_bunch.path:
         if url_paste is not None and url_paste.strip():
             warnings.append( "All file contents specified in the paste box were ignored." )
         if ftp_files:
             warnings.append( "All FTP uploaded file selections were ignored." )
     elif url_paste is not None and url_paste.strip(): #we need to use url_paste
         for file_bunch in get_url_paste_urls_or_filename( context, override_name = name, override_info = info ):
             if file_bunch.path:
                 break
         if file_bunch.path and ftp_files is not None:
             warnings.append( "All FTP uploaded file selections were ignored." )
     elif ftp_files is not None and trans.user is not None: # look for files uploaded via FTP
         user_ftp_dir = trans.user_ftp_dir
         for ( dirpath, dirnames, filenames ) in os.walk( user_ftp_dir ):
             for filename in filenames:
                 for ftp_filename in ftp_files:
                     if ftp_filename == filename:
                         path = relpath( os.path.join( dirpath, filename ), user_ftp_dir )
                         if not os.path.islink( os.path.join( dirpath, filename ) ):
                             ftp_data_file = { 'local_filename' : os.path.abspath( os.path.join( user_ftp_dir, path ) ),
                                   'filename' : os.path.basename( path ) }
                             file_bunch = get_data_file_filename( ftp_data_file, override_name = name, override_info = info )
                             if file_bunch.path:
                                 break
                 if file_bunch.path:
                     break
             if file_bunch.path:
                 break
     file_bunch.to_posix_lines = to_posix_lines
     file_bunch.space_to_tab = space_to_tab
     return file_bunch, warnings
Esempio n. 5
0
 def get_html( self, prefix="" ):
     rval = FTPFileField.thead
     if self.dir is None:
         rval += '<tr><td colspan="4"><em>Please <a href="%s">create</a> or <a href="%s">log in to</a> a Galaxy account to view files uploaded via FTP.</em></td></tr>' % ( url_for( controller='user', action='create', cntrller='user', referer=url_for( controller='root' ) ), url_for( controller='user', action='login', cntrller='user', referer=url_for( controller='root' ) ) )
     elif not os.path.exists( self.dir ):
         rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>'
     else:
         uploads = []
         for ( dirpath, dirnames, filenames ) in os.walk( self.dir ):
             for filename in filenames:
                 path = relpath( os.path.join( dirpath, filename ), self.dir )
                 statinfo = os.lstat( os.path.join( dirpath, filename ) )
                 uploads.append( dict( path=path,
                                       size=nice_size( statinfo.st_size ),
                                       ctime=time.strftime( "%m/%d/%Y %I:%M:%S %p", time.localtime( statinfo.st_ctime ) ) ) )
         if not uploads:
             rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>'
         for upload in uploads:
             rval += FTPFileField.trow % ( prefix, self.name, upload['path'], upload['path'], upload['size'], upload['ctime'] )
     rval += FTPFileField.tfoot
     rval += '<div class="toolParamHelp">This Galaxy server allows you to upload files via FTP.  To upload some files, log in to the FTP server at <strong>%s</strong> using your Galaxy credentials (email address and password).</div>' % self.ftp_site
     return rval
Esempio n. 6
0
 def get_html( self, prefix="" ):
     rval = FTPFileField.thead
     if self.dir is None:
         rval += '<tr><td colspan="4"><em>Please <a href="%s">create</a> or <a href="%s">log in to</a> a Galaxy account to view files uploaded via FTP.</em></td></tr>' % ( url_for( controller='user', action='create', cntrller='user', referer=url_for( controller='root' ) ), url_for( controller='user', action='login', cntrller='user', referer=url_for( controller='root' ) ) )
     elif not os.path.exists( self.dir ):
         rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>'
     else:
         uploads = []
         for ( dirpath, dirnames, filenames ) in os.walk( self.dir ):
             for filename in filenames:
                 path = relpath( os.path.join( dirpath, filename ), self.dir )
                 statinfo = os.lstat( os.path.join( dirpath, filename ) )
                 uploads.append( dict( path=path,
                                       size=nice_size( statinfo.st_size ),
                                       ctime=time.strftime( "%m/%d/%Y %I:%M:%S %p", time.localtime( statinfo.st_ctime ) ) ) )
         if not uploads:
             rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>'
         for upload in uploads:
             rval += FTPFileField.trow % ( prefix, self.name, upload['path'], upload['path'], upload['size'], upload['ctime'] )
     rval += FTPFileField.tfoot
     rval += '<div class="toolParamHelp">This Galaxy server allows you to upload files via FTP.  To upload some files, log in to the FTP server at <strong>%s</strong> using your Galaxy credentials (email address and password). After transfering files via FTP they will appear here. To use them in further analysis you must return to this page, select these files and press the <strong>Execute</strong> button. After they are processed they will appear in your Uploaded Files project space. Consult <a href="http://wiki.galaxyproject.org/FTPUpload">the Galaxy wiki</a> for more information.</div>' % self.ftp_site
     return rval
Esempio n. 7
0
 def get_html( self, prefix="" ):
     rval = FTPFileField.thead
     if self.dir is None:
         rval += '<tr><td colspan="4"><em>Please <a href="%s">create</a> or <a href="%s">log in to</a> a Galaxy account to view files uploaded via FTP.</em></td></tr>' % ( url_for( controller='user', action='create', cntrller='user', referer=url_for( controller='root' ) ), url_for( controller='user', action='login', cntrller='user', referer=url_for( controller='root' ) ) )
     elif not os.path.exists( self.dir ):
         rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>'
     else:
         uploads = []
         for ( dirpath, dirnames, filenames ) in os.walk( self.dir ):
             for filename in filenames:
                 path = relpath( os.path.join( dirpath, filename ), self.dir )
                 statinfo = os.lstat( os.path.join( dirpath, filename ) )
                 uploads.append( dict( path=path,
                                       size=nice_size( statinfo.st_size ),
                                       ctime=time.strftime( "%m/%d/%Y %I:%M:%S %p", time.localtime( statinfo.st_ctime ) ) ) )
         if not uploads:
             rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>'
         for upload in uploads:
             rval += FTPFileField.trow % ( prefix, self.name, upload['path'], upload['path'], upload['size'], upload['ctime'] )
     galaxy_ip_address = subprocess.check_output('curl http://instance-data/latest/meta-data/public-ipv4', shell=True)
     rval += FTPFileField.tfoot
     rval += '<div class="toolParamHelp">This Galaxy server allows you to upload files via FTP.  To upload files, use an FTP program to access the FTP server at <strong>%s</strong> (using the SFTP protocol) and port <strong>2200</strong>. You will use your Galaxy credentials (email address and password) to log in to the server.</div>' % galaxy_ip_address
     return rval
Esempio n. 8
0
        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            to_posix_lines = False
            if context.get('to_posix_lines', None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress', None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.auto_decompress = auto_decompress
                file_bunch.space_to_tab = space_to_tab
                if file_type is not None:
                    file_bunch.file_type = file_type
                if dbkey is not None:
                    file_bunch.dbkey = dbkey

                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey

                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)]
                if trans.user is None:
                    log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
                    for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                            if not os.path.islink(os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, text_type):
                                    valid_files.append(unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                                 'filename' : os.path.basename(ftp_file)}
                purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey
                    rval.append(file_bunch)
            return rval
Esempio n. 9
0
 def get_one_filename(context):
     data_file = context['file_data']
     url_paste = context['url_paste']
     ftp_files = context['ftp_files']
     name = context.get('NAME', None)
     info = context.get('INFO', None)
     uuid = context.get('uuid', None) or None  # Turn '' to None
     file_type = context.get('file_type', None)
     dbkey = self.get_dbkey(context)
     warnings = []
     to_posix_lines = False
     if context.get('to_posix_lines', None) not in ["None", None, False]:
         to_posix_lines = True
     auto_decompress = False
     if context.get('auto_decompress', None) not in ["None", None, False]:
         auto_decompress = True
     space_to_tab = False
     if context.get('space_to_tab', None) not in ["None", None, False]:
         space_to_tab = True
     file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
     if file_bunch.path:
         if url_paste is not None and url_paste.strip():
             warnings.append("All file contents specified in the paste box were ignored.")
         if ftp_files:
             warnings.append("All FTP uploaded file selections were ignored.")
     elif url_paste is not None and url_paste.strip():  # we need to use url_paste
         for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
             if file_bunch.path:
                 break
         if file_bunch.path and ftp_files is not None:
             warnings.append("All FTP uploaded file selections were ignored.")
     elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
         user_ftp_dir = trans.user_ftp_dir
         assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
         for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
             for filename in filenames:
                 for ftp_filename in ftp_files:
                     if ftp_filename == filename:
                         path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                         if not os.path.islink(os.path.join(dirpath, filename)):
                             ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)),
                                              'filename' : os.path.basename(path)}
                             purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                             file_bunch = get_data_file_filename(
                                 ftp_data_file,
                                 override_name=name,
                                 override_info=info,
                                 purge=purge,
                             )
                             if file_bunch.path:
                                 break
                 if file_bunch.path:
                     break
             if file_bunch.path:
                 break
     file_bunch.to_posix_lines = to_posix_lines
     file_bunch.auto_decompress = auto_decompress
     file_bunch.space_to_tab = space_to_tab
     file_bunch.uuid = uuid
     if file_type is not None:
         file_bunch.file_type = file_type
     if dbkey is not None:
         file_bunch.dbkey = dbkey
     return file_bunch, warnings
Esempio n. 10
0
    def check_src(item):
        if "object_id" in item:
            raise RequestParameterInvalidException(
                "object_id not allowed to appear in the request.")

        validate_datatype_extension(
            datatypes_registry=trans.app.datatypes_registry,
            ext=item.get('ext'))

        # Normalize file:// URLs into paths.
        if item["src"] == "url":
            if "url" not in item:
                raise RequestParameterInvalidException(
                    "src specified as 'url' but 'url' not specified")

            url = item["url"]
            if url.startswith("file://"):
                item["src"] = "path"
                item["path"] = url[len("file://"):]
                del item["url"]

        if "in_place" in item:
            raise RequestParameterInvalidException(
                "in_place cannot be set in the upload request")

        src = item["src"]

        # Check link_data_only can only be set for certain src types and certain elements_from types.
        _handle_invalid_link_data_only_elements_type(item)
        if src not in ["path", "server_dir"]:
            _handle_invalid_link_data_only_type(item)
        elements_from = item.get("elements_from", None)
        if elements_from and elements_from not in ELEMENTS_FROM_TYPE:
            raise RequestParameterInvalidException(
                "Invalid elements_from/items_from found in request")

        if src == "path" or (src == "url" and item["url"].startswith("file:")):
            # Validate is admin, leave alone.
            validate_path_upload(trans)
        elif src == "server_dir":
            # Validate and replace with path definition.
            server_dir = item["server_dir"]
            full_path, _ = validate_server_directory_upload(trans, server_dir)
            item["src"] = "path"
            item["path"] = full_path
        elif src == "ftp_import":
            ftp_path = item["ftp_path"]
            full_path = None

            # It'd be nice if this can be de-duplicated with what is in parameters/grouping.py.
            user_ftp_dir = trans.user_ftp_dir
            is_directory = False

            assert not os.path.islink(
                user_ftp_dir), "User FTP directory cannot be a symbolic link"
            for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                for filename in filenames:
                    if ftp_path == filename:
                        path = relpath(os.path.join(dirpath, filename),
                                       user_ftp_dir)
                        if not os.path.islink(os.path.join(dirpath, filename)):
                            full_path = os.path.abspath(
                                os.path.join(user_ftp_dir, path))
                            break

                for dirname in dirnames:
                    if ftp_path == dirname:
                        path = relpath(os.path.join(dirpath, dirname),
                                       user_ftp_dir)
                        if not os.path.islink(os.path.join(dirpath, dirname)):
                            full_path = os.path.abspath(
                                os.path.join(user_ftp_dir, path))
                            is_directory = True
                            break

            if is_directory:
                # If the target is a directory - make sure no files under it are symbolic links
                for (dirpath, dirnames, filenames) in os.walk(full_path):
                    for filename in filenames:
                        if ftp_path == filename:
                            path = relpath(os.path.join(dirpath, filename),
                                           full_path)
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                full_path = False
                                break

                    for dirname in dirnames:
                        if ftp_path == dirname:
                            path = relpath(os.path.join(dirpath, filename),
                                           full_path)
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                full_path = False
                                break

            if not full_path:
                raise RequestParameterInvalidException(
                    "Failed to find referenced ftp_path or symbolic link was enountered"
                )

            item["src"] = "path"
            item["path"] = full_path
            item["purge_source"] = purge_ftp_source
        elif src == "url":
            url = item["url"]
            looks_like_url = False
            for url_prefix in ["http://", "https://", "ftp://", "ftps://"]:
                if url.startswith(url_prefix):
                    looks_like_url = True
                    break

            if not looks_like_url and trans.app.file_sources.looks_like_uri(
                    url):
                looks_like_url = True

            if not looks_like_url:
                raise RequestParameterInvalidException(
                    "Invalid URL [%s] found in src definition." % url)

            validate_url(url, trans.app.config.fetch_url_allowlist_ips)
            item["in_place"] = run_as_real_user
        elif src == "files":
            item["in_place"] = run_as_real_user

        # Small disagreement with traditional uploads - we purge less by default since whether purging
        # happens varies based on upload options in non-obvious ways.
        # https://github.com/galaxyproject/galaxy/issues/5361
        if "purge_source" not in item:
            item["purge_source"] = False
Esempio n. 11
0
        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            to_posix_lines = False
            if context.get('to_posix_lines', None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress', None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.auto_decompress = auto_decompress
                file_bunch.space_to_tab = space_to_tab
                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)]
                if trans.user is None:
                    log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
                    for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                            if not os.path.islink(os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, text_type):
                                    valid_files.append(unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                                 'filename' : os.path.basename(ftp_file)}
                purge = getattr(trans.app.config, 'ftp_upload_purge', True)
                file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            return rval
Esempio n. 12
0
 def get_one_filename(context):
     data_file = context['file_data']
     url_paste = context['url_paste']
     ftp_files = context['ftp_files']
     name = context.get('NAME', None)
     info = context.get('INFO', None)
     uuid = context.get('uuid', None) or None  # Turn '' to None
     warnings = []
     to_posix_lines = False
     if context.get('to_posix_lines',
                    None) not in ["None", None, False]:
         to_posix_lines = True
     space_to_tab = False
     if context.get('space_to_tab', None) not in ["None", None, False]:
         space_to_tab = True
     file_bunch = get_data_file_filename(data_file,
                                         override_name=name,
                                         override_info=info)
     if file_bunch.path:
         if url_paste is not None and url_paste.strip():
             warnings.append(
                 "All file contents specified in the paste box were ignored."
             )
         if ftp_files:
             warnings.append(
                 "All FTP uploaded file selections were ignored.")
     elif url_paste is not None and url_paste.strip(
     ):  # we need to use url_paste
         for file_bunch in get_url_paste_urls_or_filename(
                 context, override_name=name, override_info=info):
             if file_bunch.path:
                 break
         if file_bunch.path and ftp_files is not None:
             warnings.append(
                 "All FTP uploaded file selections were ignored.")
     elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
         user_ftp_dir = trans.user_ftp_dir
         for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
             for filename in filenames:
                 for ftp_filename in ftp_files:
                     if ftp_filename == filename:
                         path = relpath(os.path.join(dirpath, filename),
                                        user_ftp_dir)
                         if not os.path.islink(
                                 os.path.join(dirpath, filename)):
                             ftp_data_file = {
                                 'local_filename':
                                 os.path.abspath(
                                     os.path.join(user_ftp_dir, path)),
                                 'filename':
                                 os.path.basename(path)
                             }
                             file_bunch = get_data_file_filename(
                                 ftp_data_file,
                                 override_name=name,
                                 override_info=info)
                             if file_bunch.path:
                                 break
                 if file_bunch.path:
                     break
             if file_bunch.path:
                 break
     file_bunch.to_posix_lines = to_posix_lines
     file_bunch.space_to_tab = space_to_tab
     file_bunch.uuid = uuid
     return file_bunch, warnings
Esempio n. 13
0
        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            url_paste = context['url_paste']
            ftp_files = context['ftp_files']
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            to_posix_lines = False
            if context.get('to_posix_lines',
                           None) not in ["None", None, False]:
                to_posix_lines = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            warnings = []
            file_bunch = get_data_file_filename(data_file,
                                                override_name=name,
                                                override_info=info)
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.space_to_tab = space_to_tab
                rval.append(file_bunch)
            for file_bunch in get_url_paste_urls_or_filename(
                    context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [
                    unicodedata.normalize('NFC', f) for f in ftp_files
                    if isinstance(f, unicode)
                ]
                if trans.user is None:
                    log.warning(
                        'Anonymous user passed values in ftp_files: %s' %
                        ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                else:
                    user_ftp_dir = trans.user_ftp_dir
                    for (dirpath, dirnames,
                         filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename),
                                           user_ftp_dir)
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, unicode):
                                    valid_files.append(
                                        unicodedata.normalize('NFC', path))
                                else:
                                    valid_files.append(path)

            else:
                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning(
                        'User passed an invalid file path in ftp_files: %s' %
                        ftp_file)
                    continue
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {
                    'local_filename':
                    os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                    'filename':
                    os.path.basename(ftp_file)
                }
                file_bunch = get_data_file_filename(ftp_data_file,
                                                    override_name=name,
                                                    override_info=info)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.space_to_tab = space_to_tab
                    rval.append(file_bunch)
            return rval
Esempio n. 14
0
 def file_name( self ):
     if self._filename is None:
         #we need to create a tmp file, accessable across all nodes/heads, save the name, and return it
         self._filename = relpath( tempfile.NamedTemporaryFile( dir = self.tmp_dir, prefix = "metadata_temp_file_" ).name )
         open( self._filename, 'wb+' ) #create an empty file, so it can't be reused using tempfile
     return self._filename
Esempio n. 15
0
    def check_src(item):
        if "object_id" in item:
            raise RequestParameterInvalidException("object_id not allowed to appear in the request.")

        # Normalize file:// URLs into paths.
        if item["src"] == "url" and item["url"].startswith("file://"):
            item["src"] = "path"
            item["path"] = item["url"][len("file://"):]
            del item["path"]

        if "in_place" in item:
            raise RequestParameterInvalidException("in_place cannot be set in the upload request")

        src = item["src"]

        # Check link_data_only can only be set for certain src types and certain elements_from types.
        _handle_invalid_link_data_only_elements_type(item)
        if src not in ["path", "server_dir"]:
            _handle_invalid_link_data_only_type(item)
        elements_from = item.get("elements_from", None)
        if elements_from and elements_from not in ELEMENTS_FROM_TYPE:
            raise RequestParameterInvalidException("Invalid elements_from/items_from found in request")

        if src == "path" or (src == "url" and item["url"].startswith("file:")):
            # Validate is admin, leave alone.
            validate_path_upload(trans)
        elif src == "server_dir":
            # Validate and replace with path definition.
            server_dir = item["server_dir"]
            full_path, _ = validate_server_directory_upload(trans, server_dir)
            item["src"] = "path"
            item["path"] = full_path
        elif src == "ftp_import":
            ftp_path = item["ftp_path"]
            full_path = None

            # It'd be nice if this can be de-duplicated with what is in parameters/grouping.py.
            user_ftp_dir = trans.user_ftp_dir
            is_directory = False

            assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
            for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                for filename in filenames:
                    if ftp_path == filename:
                        path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                        if not os.path.islink(os.path.join(dirpath, filename)):
                            full_path = os.path.abspath(os.path.join(user_ftp_dir, path))
                            break

                for dirname in dirnames:
                    if ftp_path == dirname:
                        path = relpath(os.path.join(dirpath, dirname), user_ftp_dir)
                        if not os.path.islink(os.path.join(dirpath, dirname)):
                            full_path = os.path.abspath(os.path.join(user_ftp_dir, path))
                            is_directory = True
                            break

            if is_directory:
                # If the target is a directory - make sure no files under it are symbolic links
                for (dirpath, dirnames, filenames) in os.walk(full_path):
                    for filename in filenames:
                        if ftp_path == filename:
                            path = relpath(os.path.join(dirpath, filename), full_path)
                            if not os.path.islink(os.path.join(dirpath, filename)):
                                full_path = False
                                break

                    for dirname in dirnames:
                        if ftp_path == dirname:
                            path = relpath(os.path.join(dirpath, filename), full_path)
                            if not os.path.islink(os.path.join(dirpath, filename)):
                                full_path = False
                                break

            if not full_path:
                raise RequestParameterInvalidException("Failed to find referenced ftp_path or symbolic link was enountered")

            item["src"] = "path"
            item["path"] = full_path
            item["purge_source"] = purge_ftp_source
        elif src == "url":
            url = item["url"]
            looks_like_url = False
            for url_prefix in ["http://", "https://", "ftp://", "ftps://"]:
                if url.startswith(url_prefix):
                    looks_like_url = True
                    break

            if not looks_like_url:
                raise RequestParameterInvalidException("Invalid URL [%s] found in src definition." % url)

            validate_url(url, trans.app.config.fetch_url_whitelist_ips)
            item["in_place"] = run_as_real_user
        elif src == "files":
            item["in_place"] = run_as_real_user

        # Small disagreement with traditional uploads - we purge less by default since whether purging
        # happens varies based on upload options in non-obvious ways.
        # https://github.com/galaxyproject/galaxy/issues/5361
        if "purge_source" not in item:
            item["purge_source"] = False
Esempio n. 16
0
    def setup_external_metadata(self,
                                datasets,
                                sa_session,
                                exec_dir=None,
                                tmp_dir=None,
                                dataset_files_path=None,
                                output_fnames=None,
                                config_root=None,
                                datatypes_config=None,
                                job_metadata=None,
                                kwds={}):
        #fill in metadata_files_dict and return the command with args required to set metadata
        def __metadata_files_list_to_cmd_line(metadata_files):
            def __get_filename_override():
                if output_fnames:
                    for dataset_path in output_fnames:
                        if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name:
                            return dataset_path.false_path
                return ""

            return "%s,%s,%s,%s,%s,%s" % (
                metadata_files.filename_in, metadata_files.filename_kwds,
                metadata_files.filename_out,
                metadata_files.filename_results_code,
                __get_filename_override(),
                metadata_files.filename_override_metadata)

        if not isinstance(datasets, list):
            datasets = [datasets]
        if exec_dir is None:
            exec_dir = os.path.abspath(os.getcwd())
        if tmp_dir is None:
            tmp_dir = MetadataTempFile.tmp_dir
        if dataset_files_path is None:
            dataset_files_path = galaxy.model.Dataset.file_path
        if config_root is None:
            config_root = os.path.abspath(os.getcwd())
        if datatypes_config is None:
            datatypes_config = 'datatypes_conf.xml'
        metadata_files_list = []
        for dataset in datasets:
            key = self.get_dataset_metadata_key(dataset)
            #future note:
            #wonkiness in job execution causes build command line to be called more than once
            #when setting metadata externally, via 'auto-detect' button in edit attributes, etc.,
            #we don't want to overwrite (losing the ability to cleanup) our existing dataset keys and files,
            #so we will only populate the dictionary once
            metadata_files = self.get_output_filenames_by_dataset(
                dataset, sa_session)
            if not metadata_files:
                metadata_files = galaxy.model.JobExternalOutputMetadata(
                    dataset=dataset)
                metadata_files.job_id = self.job_id
                #we are using tempfile to create unique filenames, tempfile always returns an absolute path
                #we will use pathnames relative to the galaxy root, to accommodate instances where the galaxy root
                #is located differently, i.e. on a cluster node with a different filesystem structure

                #file to store existing dataset
                metadata_files.filename_in = relpath(
                    tempfile.NamedTemporaryFile(dir=tmp_dir,
                                                prefix="metadata_in_%s_" %
                                                key).name)
                cPickle.dump(dataset, open(metadata_files.filename_in, 'wb+'))
                #file to store metadata results of set_meta()
                metadata_files.filename_out = relpath(
                    tempfile.NamedTemporaryFile(dir=tmp_dir,
                                                prefix="metadata_out_%s_" %
                                                key).name)
                open(
                    metadata_files.filename_out, 'wb+'
                )  # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
                #file to store a 'return code' indicating the results of the set_meta() call
                #results code is like (True/False - if setting metadata was successful/failed , exception or string of reason of success/failure )
                metadata_files.filename_results_code = relpath(
                    tempfile.NamedTemporaryFile(dir=tmp_dir,
                                                prefix="metadata_results_%s_" %
                                                key).name)
                simplejson.dump(
                    (False, 'External set_meta() not called'),
                    open(metadata_files.filename_results_code, 'wb+')
                )  # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
                #file to store kwds passed to set_meta()
                metadata_files.filename_kwds = relpath(
                    tempfile.NamedTemporaryFile(dir=tmp_dir,
                                                prefix="metadata_kwds_%s_" %
                                                key).name)
                simplejson.dump(kwds,
                                open(metadata_files.filename_kwds, 'wb+'),
                                ensure_ascii=True)
                #existing metadata file parameters need to be overridden with cluster-writable file locations
                metadata_files.filename_override_metadata = relpath(
                    tempfile.NamedTemporaryFile(
                        dir=tmp_dir,
                        prefix="metadata_override_%s_" % key).name)
                open(
                    metadata_files.filename_override_metadata, 'wb+'
                )  # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
                override_metadata = []
                for meta_key, spec_value in dataset.metadata.spec.iteritems():
                    if isinstance(spec_value.param,
                                  FileParameter) and dataset.metadata.get(
                                      meta_key, None) is not None:
                        metadata_temp = MetadataTempFile()
                        shutil.copy(
                            dataset.metadata.get(meta_key, None).file_name,
                            metadata_temp.file_name)
                        override_metadata.append(
                            (meta_key, metadata_temp.to_JSON()))
                simplejson.dump(
                    override_metadata,
                    open(metadata_files.filename_override_metadata, 'wb+'))
                #add to session and flush
                sa_session.add(metadata_files)
                sa_session.flush()
            metadata_files_list.append(metadata_files)
        #return command required to build
        return "%s %s %s %s %s %s %s" % (
            os.path.join(exec_dir, 'set_metadata.sh'), dataset_files_path,
            tmp_dir, config_root, datatypes_config, job_metadata, " ".join(
                map(__metadata_files_list_to_cmd_line, metadata_files_list)))