Beispiel #1
 def setup_external_metadata( self, datasets, exec_dir = None, tmp_dir = None, dataset_files_path = None, output_fnames = None, kwds = {} ):
     #fill in metadata_files_dict and return the command with args required to set metadata
     def __metadata_files_list_to_cmd_line( metadata_files ):
         def __get_filename_override():
             if output_fnames:
                 for dataset_path in output_fnames:
                     if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name:
                         return dataset_path.false_path
             return ""
         return "%s,%s,%s,%s,%s" % ( metadata_files.filename_in, metadata_files.filename_kwds, metadata_files.filename_out, metadata_files.filename_results_code, __get_filename_override() )
     if not isinstance( datasets, list ):
         datasets = [ datasets ]
     if exec_dir is None:
         exec_dir = os.path.abspath( os.getcwd() )
     if tmp_dir is None:
         tmp_dir = MetadataTempFile.tmp_dir
     if dataset_files_path is None:
         dataset_files_path = galaxy.model.Dataset.file_path
     metadata_files_list = []
     for dataset in datasets:
         key = self.get_dataset_metadata_key( dataset )
         #future note:
         #wonkiness in job execution causes build command line to be called more than once
         #when setting metadata externally, via 'auto-detect' button in edit attributes, etc., 
         #we don't want to overwrite (losing the ability to cleanup) our existing dataset keys and files, 
         #so we will only populate the dictionary once
         metadata_files = self.get_output_filenames_by_dataset( dataset )
         if not metadata_files:
             metadata_files = galaxy.model.JobExternalOutputMetadata( dataset = dataset)
             metadata_files.job_id = self.job_id
             #we are using tempfile to create unique filenames, tempfile always returns an absolute path
             #we will use pathnames relative to the galaxy root, to accommodate instances where the galaxy root
             #is located differently, i.e. on a cluster node with a different filesystem structure
             #file to store existing dataset
             metadata_files.filename_in = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_in_%s_" % key ).name )
             cPickle.dump( dataset, open( metadata_files.filename_in, 'wb+' ) )
             #file to store metadata results of set_meta()
             metadata_files.filename_out = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_out_%s_" % key ).name )
             open( metadata_files.filename_out, 'wb+' ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
             #file to store a 'return code' indicating the results of the set_meta() call
             #results code is like (True/False - if setting metadata was successful/failed , exception or string of reason of success/failure )
             metadata_files.filename_results_code = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_out_%s_" % key ).name )
             simplejson.dump( ( False, 'External set_meta() not called' ), open( metadata_files.filename_results_code, 'wb+' ) ) # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
             #file to store kwds passed to set_meta()
             metadata_files.filename_kwds = relpath( tempfile.NamedTemporaryFile( dir = tmp_dir, prefix = "metadata_kwds_%s_" % key ).name )
             simplejson.dump( kwds, open( metadata_files.filename_kwds, 'wb+' ), ensure_ascii=True )
         metadata_files_list.append( metadata_files )
     #return command required to build
     return "%s %s %s %s" % ( os.path.join( exec_dir, '' ), dataset_files_path, tmp_dir, " ".join( map( __metadata_files_list_to_cmd_line, metadata_files_list ) ) )
Beispiel #2
 def get_one_filename(context):
     data_file = context['file_data']
     url_paste = context['url_paste']
     ftp_files = context['ftp_files']
     name = context.get('NAME', None)
     info = context.get('INFO', None)
     uuid = context.get('uuid', None) or None  # Turn '' to None
     warnings = []
     to_posix_lines = False
     if context.get('to_posix_lines', None) not in ["None", None, False]:
         to_posix_lines = True
     auto_decompress = False
     if context.get('auto_decompress', None) not in ["None", None, False]:
         auto_decompress = True
     space_to_tab = False
     if context.get('space_to_tab', None) not in ["None", None, False]:
         space_to_tab = True
     file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
     if file_bunch.path:
         if url_paste is not None and url_paste.strip():
             warnings.append("All file contents specified in the paste box were ignored.")
         if ftp_files:
             warnings.append("All FTP uploaded file selections were ignored.")
     elif url_paste is not None and url_paste.strip():  # we need to use url_paste
         for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
             if file_bunch.path:
         if file_bunch.path and ftp_files is not None:
             warnings.append("All FTP uploaded file selections were ignored.")
     elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
         user_ftp_dir = trans.user_ftp_dir
         assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
         for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
             for filename in filenames:
                 for ftp_filename in ftp_files:
                     if ftp_filename == filename:
                         path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                         if not os.path.islink(os.path.join(dirpath, filename)):
                             ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)),
                                              'filename' : os.path.basename(path)}
                             purge = getattr(, 'ftp_upload_purge', True)
                             file_bunch = get_data_file_filename(
                             if file_bunch.path:
                 if file_bunch.path:
             if file_bunch.path:
     file_bunch.to_posix_lines = to_posix_lines
     file_bunch.auto_decompress = auto_decompress
     file_bunch.space_to_tab = space_to_tab
     file_bunch.uuid = uuid
     return file_bunch, warnings
Beispiel #4
 def get_html( self, prefix="" ):
     rval = FTPFileField.thead
     if self.dir is None:
         rval += '<tr><td colspan="4"><em>Please <a href="%s">create</a> or <a href="%s">log in to</a> a Galaxy account to view files uploaded via FTP.</em></td></tr>' % ( url_for( controller='user', action='create', cntrller='user', referer=url_for( controller='root' ) ), url_for( controller='user', action='login', cntrller='user', referer=url_for( controller='root' ) ) )
     elif not os.path.exists( self.dir ):
         rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>'
         uploads = []
         for ( dirpath, dirnames, filenames ) in os.walk( self.dir ):
             for filename in filenames:
                 path = relpath( os.path.join( dirpath, filename ), self.dir )
                 statinfo = os.lstat( os.path.join( dirpath, filename ) )
                 uploads.append( dict( path=path,
                                       size=nice_size( statinfo.st_size ),
                                       ctime=time.strftime( "%m/%d/%Y %I:%M:%S %p", time.localtime( statinfo.st_ctime ) ) ) )
         if not uploads:
             rval += '<tr><td colspan="4"><em>Your FTP upload directory contains no files.</em></td></tr>'
         for upload in uploads:
             rval += FTPFileField.trow % ( prefix,, upload['path'], upload['path'], upload['size'], upload['ctime'] )
     rval += FTPFileField.tfoot
     rval += '<div class="toolParamHelp">This Galaxy server allows you to upload files via FTP.  To upload some files, log in to the FTP server at <strong>%s</strong> using your Galaxy credentials (email address and password).</div>' % self.ftp_site
     return rval
Beispiel #6
Beispiel #8
        def get_filenames(context):
            rval = []
            data_file = context['file_data']
            ftp_files = context['ftp_files']
            uuid = context.get('uuid', None) or None  # Turn '' to None
            name = context.get('NAME', None)
            info = context.get('INFO', None)
            file_type = context.get('file_type', None)
            dbkey = self.get_dbkey(context)
            to_posix_lines = False
            if context.get('to_posix_lines', None) not in ["None", None, False]:
                to_posix_lines = True
            auto_decompress = False
            if context.get('auto_decompress', None) not in ["None", None, False]:
                auto_decompress = True
            space_to_tab = False
            if context.get('space_to_tab', None) not in ["None", None, False]:
                space_to_tab = True
            file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
            file_bunch.uuid = uuid
            if file_bunch.path:
                file_bunch.to_posix_lines = to_posix_lines
                file_bunch.auto_decompress = auto_decompress
                file_bunch.space_to_tab = space_to_tab
                if file_type is not None:
                    file_bunch.file_type = file_type
                if dbkey is not None:
                    file_bunch.dbkey = dbkey

            for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
                if file_bunch.path:
                    file_bunch.uuid = uuid
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey

            # look for files uploaded via FTP
            valid_files = []
            if ftp_files is not None:
                # Normalize input paths to ensure utf-8 encoding is normal form c.
                # This allows for comparison when the filesystem uses a different encoding than the browser.
                ftp_files = [unicodedata.normalize('NFC', f) for f in ftp_files if isinstance(f, text_type)]
                if trans.user is None:
                    log.warning('Anonymous user passed values in ftp_files: %s' % ftp_files)
                    ftp_files = []
                    # TODO: warning to the user (could happen if session has become invalid)
                    user_ftp_dir = trans.user_ftp_dir
                    assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
                    for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                        for filename in filenames:
                            path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                            if not os.path.islink(os.path.join(dirpath, filename)):
                                # Normalize filesystem paths
                                if isinstance(path, text_type):
                                    valid_files.append(unicodedata.normalize('NFC', path))

                ftp_files = []
            for ftp_file in ftp_files:
                if ftp_file not in valid_files:
                    log.warning('User passed an invalid file path in ftp_files: %s' % ftp_file)
                    # TODO: warning to the user (could happen if file is already imported)
                ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, ftp_file)),
                                 'filename' : os.path.basename(ftp_file)}
                purge = getattr(, 'ftp_upload_purge', True)
                file_bunch = get_data_file_filename(ftp_data_file, override_name=name, override_info=info, purge=purge)
                if file_bunch.path:
                    file_bunch.to_posix_lines = to_posix_lines
                    file_bunch.auto_decompress = auto_decompress
                    file_bunch.space_to_tab = space_to_tab
                    if file_type is not None:
                        file_bunch.file_type = file_type
                    if dbkey is not None:
                        file_bunch.dbkey = dbkey
            return rval
Beispiel #9
 def get_one_filename(context):
     data_file = context['file_data']
     url_paste = context['url_paste']
     ftp_files = context['ftp_files']
     name = context.get('NAME', None)
     info = context.get('INFO', None)
     uuid = context.get('uuid', None) or None  # Turn '' to None
     file_type = context.get('file_type', None)
     dbkey = self.get_dbkey(context)
     warnings = []
     to_posix_lines = False
     if context.get('to_posix_lines', None) not in ["None", None, False]:
         to_posix_lines = True
     auto_decompress = False
     if context.get('auto_decompress', None) not in ["None", None, False]:
         auto_decompress = True
     space_to_tab = False
     if context.get('space_to_tab', None) not in ["None", None, False]:
         space_to_tab = True
     file_bunch = get_data_file_filename(data_file, override_name=name, override_info=info)
     if file_bunch.path:
         if url_paste is not None and url_paste.strip():
             warnings.append("All file contents specified in the paste box were ignored.")
         if ftp_files:
             warnings.append("All FTP uploaded file selections were ignored.")
     elif url_paste is not None and url_paste.strip():  # we need to use url_paste
         for file_bunch in get_url_paste_urls_or_filename(context, override_name=name, override_info=info):
             if file_bunch.path:
         if file_bunch.path and ftp_files is not None:
             warnings.append("All FTP uploaded file selections were ignored.")
     elif ftp_files is not None and trans.user is not None:  # look for files uploaded via FTP
         user_ftp_dir = trans.user_ftp_dir
         assert not os.path.islink(user_ftp_dir), "User FTP directory cannot be a symbolic link"
         for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
             for filename in filenames:
                 for ftp_filename in ftp_files:
                     if ftp_filename == filename:
                         path = relpath(os.path.join(dirpath, filename), user_ftp_dir)
                         if not os.path.islink(os.path.join(dirpath, filename)):
                             ftp_data_file = {'local_filename' : os.path.abspath(os.path.join(user_ftp_dir, path)),
                                              'filename' : os.path.basename(path)}
                             purge = getattr(, 'ftp_upload_purge', True)
                             file_bunch = get_data_file_filename(
                             if file_bunch.path:
                 if file_bunch.path:
             if file_bunch.path:
     file_bunch.to_posix_lines = to_posix_lines
     file_bunch.auto_decompress = auto_decompress
     file_bunch.space_to_tab = space_to_tab
     file_bunch.uuid = uuid
     if file_type is not None:
         file_bunch.file_type = file_type
     if dbkey is not None:
         file_bunch.dbkey = dbkey
     return file_bunch, warnings
Beispiel #10
    def check_src(item):
        if "object_id" in item:
            raise RequestParameterInvalidException(
                "object_id not allowed to appear in the request.")


        # Normalize file:// URLs into paths.
        if item["src"] == "url":
            if "url" not in item:
                raise RequestParameterInvalidException(
                    "src specified as 'url' but 'url' not specified")

            url = item["url"]
            if url.startswith("file://"):
                item["src"] = "path"
                item["path"] = url[len("file://"):]
                del item["url"]

        if "in_place" in item:
            raise RequestParameterInvalidException(
                "in_place cannot be set in the upload request")

        src = item["src"]

        # Check link_data_only can only be set for certain src types and certain elements_from types.
        if src not in ["path", "server_dir"]:
        elements_from = item.get("elements_from", None)
        if elements_from and elements_from not in ELEMENTS_FROM_TYPE:
            raise RequestParameterInvalidException(
                "Invalid elements_from/items_from found in request")

        if src == "path" or (src == "url" and item["url"].startswith("file:")):
            # Validate is admin, leave alone.
        elif src == "server_dir":
            # Validate and replace with path definition.
            server_dir = item["server_dir"]
            full_path, _ = validate_server_directory_upload(trans, server_dir)
            item["src"] = "path"
            item["path"] = full_path
        elif src == "ftp_import":
            ftp_path = item["ftp_path"]
            full_path = None

            # It'd be nice if this can be de-duplicated with what is in parameters/
            user_ftp_dir = trans.user_ftp_dir
            is_directory = False

            assert not os.path.islink(
                user_ftp_dir), "User FTP directory cannot be a symbolic link"
            for (dirpath, dirnames, filenames) in os.walk(user_ftp_dir):
                for filename in filenames:
                    if ftp_path == filename:
                        path = relpath(os.path.join(dirpath, filename),
                        if not os.path.islink(os.path.join(dirpath, filename)):
                            full_path = os.path.abspath(
                                os.path.join(user_ftp_dir, path))

                for dirname in dirnames:
                    if ftp_path == dirname:
                        path = relpath(os.path.join(dirpath, dirname),
                        if not os.path.islink(os.path.join(dirpath, dirname)):
                            full_path = os.path.abspath(
                                os.path.join(user_ftp_dir, path))
                            is_directory = True

            if is_directory:
                # If the target is a directory - make sure no files under it are symbolic links
                for (dirpath, dirnames, filenames) in os.walk(full_path):
                    for filename in filenames:
                        if ftp_path == filename:
                            path = relpath(os.path.join(dirpath, filename),
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                full_path = False

                    for dirname in dirnames:
                        if ftp_path == dirname:
                            path = relpath(os.path.join(dirpath, filename),
                            if not os.path.islink(
                                    os.path.join(dirpath, filename)):
                                full_path = False

            if not full_path:
                raise RequestParameterInvalidException(
                    "Failed to find referenced ftp_path or symbolic link was enountered"

            item["src"] = "path"
            item["path"] = full_path
            item["purge_source"] = purge_ftp_source
        elif src == "url":
            url = item["url"]
            looks_like_url = False
            for url_prefix in ["http://", "https://", "ftp://", "ftps://"]:
                if url.startswith(url_prefix):
                    looks_like_url = True

            if not looks_like_url and
                looks_like_url = True

            if not looks_like_url:
                raise RequestParameterInvalidException(
                    "Invalid URL [%s] found in src definition." % url)

            item["in_place"] = run_as_real_user
        elif src == "files":
            item["in_place"] = run_as_real_user

        # Small disagreement with traditional uploads - we purge less by default since whether purging
        # happens varies based on upload options in non-obvious ways.
        if "purge_source" not in item:
            item["purge_source"] = False
Beispiel #11
Beispiel #12
Beispiel #13
Beispiel #14
 def file_name( self ):
     if self._filename is None:
         #we need to create a tmp file, accessable across all nodes/heads, save the name, and return it
         self._filename = relpath( tempfile.NamedTemporaryFile( dir = self.tmp_dir, prefix = "metadata_temp_file_" ).name )
         open( self._filename, 'wb+' ) #create an empty file, so it can't be reused using tempfile
     return self._filename
Beispiel #15
Beispiel #16
    def setup_external_metadata(self,
        #fill in metadata_files_dict and return the command with args required to set metadata
        def __metadata_files_list_to_cmd_line(metadata_files):
            def __get_filename_override():
                if output_fnames:
                    for dataset_path in output_fnames:
                        if dataset_path.false_path and dataset_path.real_path == metadata_files.dataset.file_name:
                            return dataset_path.false_path
                return ""

            return "%s,%s,%s,%s,%s,%s" % (
                metadata_files.filename_in, metadata_files.filename_kwds,

        if not isinstance(datasets, list):
            datasets = [datasets]
        if exec_dir is None:
            exec_dir = os.path.abspath(os.getcwd())
        if tmp_dir is None:
            tmp_dir = MetadataTempFile.tmp_dir
        if dataset_files_path is None:
            dataset_files_path = galaxy.model.Dataset.file_path
        if config_root is None:
            config_root = os.path.abspath(os.getcwd())
        if datatypes_config is None:
            datatypes_config = 'datatypes_conf.xml'
        metadata_files_list = []
        for dataset in datasets:
            key = self.get_dataset_metadata_key(dataset)
            #future note:
            #wonkiness in job execution causes build command line to be called more than once
            #when setting metadata externally, via 'auto-detect' button in edit attributes, etc.,
            #we don't want to overwrite (losing the ability to cleanup) our existing dataset keys and files,
            #so we will only populate the dictionary once
            metadata_files = self.get_output_filenames_by_dataset(
                dataset, sa_session)
            if not metadata_files:
                metadata_files = galaxy.model.JobExternalOutputMetadata(
                metadata_files.job_id = self.job_id
                #we are using tempfile to create unique filenames, tempfile always returns an absolute path
                #we will use pathnames relative to the galaxy root, to accommodate instances where the galaxy root
                #is located differently, i.e. on a cluster node with a different filesystem structure

                #file to store existing dataset
                metadata_files.filename_in = relpath(
                                                prefix="metadata_in_%s_" %
                cPickle.dump(dataset, open(metadata_files.filename_in, 'wb+'))
                #file to store metadata results of set_meta()
                metadata_files.filename_out = relpath(
                                                prefix="metadata_out_%s_" %
                    metadata_files.filename_out, 'wb+'
                )  # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
                #file to store a 'return code' indicating the results of the set_meta() call
                #results code is like (True/False - if setting metadata was successful/failed , exception or string of reason of success/failure )
                metadata_files.filename_results_code = relpath(
                                                prefix="metadata_results_%s_" %
                    (False, 'External set_meta() not called'),
                    open(metadata_files.filename_results_code, 'wb+')
                )  # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
                #file to store kwds passed to set_meta()
                metadata_files.filename_kwds = relpath(
                                                prefix="metadata_kwds_%s_" %
                                open(metadata_files.filename_kwds, 'wb+'),
                #existing metadata file parameters need to be overridden with cluster-writable file locations
                metadata_files.filename_override_metadata = relpath(
                        prefix="metadata_override_%s_" % key).name)
                    metadata_files.filename_override_metadata, 'wb+'
                )  # create the file on disk, so it cannot be reused by tempfile (unlikely, but possible)
                override_metadata = []
                for meta_key, spec_value in dataset.metadata.spec.iteritems():
                    if isinstance(spec_value.param,
                                  FileParameter) and dataset.metadata.get(
                                      meta_key, None) is not None:
                        metadata_temp = MetadataTempFile()
                            dataset.metadata.get(meta_key, None).file_name,
                            (meta_key, metadata_temp.to_JSON()))
                    open(metadata_files.filename_override_metadata, 'wb+'))
                #add to session and flush
        #return command required to build
        return "%s %s %s %s %s %s %s" % (
            os.path.join(exec_dir, ''), dataset_files_path,
            tmp_dir, config_root, datatypes_config, job_metadata, " ".join(
                map(__metadata_files_list_to_cmd_line, metadata_files_list)))