Beispiel #1
0
 def get_html_default(self, prefix="", disabled=False):
     if self.multiple:
         multiple = " multiple"
     else:
         multiple = ""
     if self.size:
         size = ' size="%s"' % str(self.size)
     else:
         size = ''
     rval = []
     last_selected_value = ""
     for text, value, selected in self.options:
         if selected:
             selected_text = " selected"
             last_selected_value = value
             if not isinstance(last_selected_value, string_types):
                 last_selected_value = str(last_selected_value)
         else:
             selected_text = ""
         rval.append('<option value="%s"%s>%s</option>' % (escape(unicodify(value), quote=True), selected_text, escape(unicodify(text), quote=True)))
     if last_selected_value:
         last_selected_value = ' last_selected_value="%s"' % escape(unicodify(last_selected_value), quote=True)
     if self.field_id is not None:
         id_string = ' id="%s"' % self.field_id
     else:
         id_string = ''
     rval.insert(0, '<select name="%s%s"%s%s%s%s%s%s%s>'
                 % (prefix, self.name, multiple, size, self.refresh_on_change_text, last_selected_value, self.get_disabled_str(disabled), id_string, self.extra_attributes))
     rval.append('</select>')
     return unicodify("\n".join(rval))
 def get_html( self, prefix="", disabled=False ):
     value = self.value
     if not isinstance( value, basestring ):
         value = str( value )
     value = unicodify( value )
     return unicodify( '<input type="text" name="%s%s" size="%d" value="%s"%s>'
                       % ( prefix, self.name, self.size, escape( value, quote=True ), self.get_disabled_str( disabled ) ) )
 def handle_command( self, tool_dependency, cmd, return_output=False ):
     """Handle a command and log the results."""
     context = self.app.install_model.context
     command = str( cmd )
     output = self.handle_complex_command( command )
     self.log_results( cmd, output, os.path.join( self.install_dir, basic_util.INSTALLATION_LOG ) )
     stdout = output.stdout
     stderr = output.stderr
     if len( stdout ) > DATABASE_MAX_STRING_SIZE:
         print "Length of stdout > %s, so only a portion will be saved in the database." % str( DATABASE_MAX_STRING_SIZE_PRETTY )
         stdout = shrink_string_by_size( stdout, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
     if len( stderr ) > DATABASE_MAX_STRING_SIZE:
         print "Length of stderr > %s, so only a portion will be saved in the database." % str( DATABASE_MAX_STRING_SIZE_PRETTY )
         stderr = shrink_string_by_size( stderr, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True )
     if output.return_code not in [ 0 ]:
         tool_dependency.status = self.app.install_model.ToolDependency.installation_status.ERROR
         if stderr:
             tool_dependency.error_message = unicodify( stderr )
         elif stdout:
             tool_dependency.error_message = unicodify( stdout )
         else:
             # We have a problem if there was no stdout and no stderr.
             tool_dependency.error_message = "Unknown error occurred executing shell command %s, return_code: %s" % \
                 ( str( cmd ), str( output.return_code ) )
         context.add( tool_dependency )
         context.flush()
     if return_output:
         return output
     return output.return_code
Beispiel #4
0
def _handle_script_integrity(path, config):
    if not check_script_integrity(config):
        return

    script_integrity_verified = False
    count = getattr(config, "check_job_script_integrity_count", DEFAULT_INTEGRITY_COUNT)
    sleep_amt = getattr(config, "check_job_script_integrity_sleep", DEFAULT_INTEGRITY_SLEEP)
    for i in range(count):
        try:
            returncode = subprocess.call([path], env={"ABC_TEST_JOB_SCRIPT_INTEGRITY_XYZ": "1"})
            if returncode == 42:
                script_integrity_verified = True
                break

            log.debug("Script integrity error for file '%s': returncode was %d", path, returncode)

            # Else we will sync and wait to see if the script becomes
            # executable.
            try:
                # sync file system to avoid "Text file busy" problems.
                # These have occurred both in Docker containers and on EC2 clusters
                # under high load.
                subprocess.check_call(INTEGRITY_SYNC_COMMAND)
            except Exception as e:
                log.debug("Error syncing the filesystem: %s", unicodify(e))

        except Exception as exc:
            log.debug("Script not available yet: %s", unicodify(exc))

        time.sleep(sleep_amt)

    if not script_integrity_verified:
        raise Exception("Failed to write job script '%s', could not verify job script integrity." % path)
Beispiel #5
0
    def submit_report(self, dataset, job, tool, **kwargs):
        """Submit the error report to sentry
        """
        log.info(self.github)

        if self.github:
            tool_kw = {'tool_id': unicodify(job.tool_id), 'tool_version': unicodify(job.tool_version)}
            label = self.get_label('{tool_id}/{tool_version}'.format(**tool_kw))
            error_title = u"""Galaxy Job Error: {tool_id} v{tool_version}""".format(**tool_kw)

            # We'll re-use the email error reporter's template since github supports HTML
            error_reporter = EmailErrorReporter(dataset.id, self.app)
            error_reporter.create_report(job.get_user(), email=kwargs.get('email', None), message=kwargs.get('message', None))

            # The HTML report
            error_message = error_reporter.html_report

            log.info(error_title in self.issue_cache)
            if error_title not in self.issue_cache:
                # Create a new issue.
                self.issue_cache[error_title] = self.repo.create_issue(
                    title=error_title,
                    body=error_message,
                    # Label it with a tag: tool_id/tool_version
                    labels=[label]
                )
            else:
                self.issue_cache[error_title].create_comment(error_message)
            return ('Submitted bug report to Github. Your issue number is %s' % self.issue_cache[error_title].number, 'success')
Beispiel #6
0
    def __create_jstree( self, directory, disable='folders' ):
        """
        Loads recursively all files and folders within the given folder
        and its subfolders and returns jstree representation
        of its structure.
        """
        userdir_jstree = None
        jstree_paths = []
        if os.path.exists( directory ):
            for ( dirpath, dirnames, filenames ) in os.walk( directory ):
                for dirname in dirnames:
                    dir_path = os.path.relpath( os.path.join( dirpath, dirname ), directory )
                    dir_path_hash = hashlib.sha1(unicodify(dir_path).encode('utf-8')).hexdigest()
                    disabled = True if disable == 'folders' else False
                    jstree_paths.append( jstree.Path( dir_path, dir_path_hash, { 'type': 'folder', 'state': { 'disabled': disabled }, 'li_attr': { 'full_path': dir_path } } ) )

                for filename in filenames:
                    file_path = os.path.relpath( os.path.join( dirpath, filename ), directory )
                    file_path_hash = hashlib.sha1(unicodify(file_path).encode('utf-8')).hexdigest()
                    disabled = True if disable == 'files' else False
                    jstree_paths.append( jstree.Path( file_path, file_path_hash, { 'type': 'file', 'state': { 'disabled': disabled }, 'li_attr': { 'full_path': file_path } } ) )
        else:
            raise exceptions.ConfigDoesNotAllowException( 'The given directory does not exist.' )
        userdir_jstree = jstree.JSTree( jstree_paths )
        return userdir_jstree
def main(argv):
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-k', '--secret-key', help='Key to convert pages with', default='')
    parser.add_argument('-d', '--dry-run', help='No changes, just test it.', action='store_true')
    populate_config_args(parser)
    args = parser.parse_args()
    properties = app_properties_from_args(args)
    config = galaxy.config.Configuration(**properties)
    secret = args.secret_key or config.id_secret
    security_helper = SecurityHelper(id_secret=secret)
    object_store = build_object_store_from_config(config)
    if not config.database_connection:
        print("The database connection is empty. If you are using the default value, please uncomment that in your galaxy.yml")

    model = galaxy.config.init_models_from_config(config, object_store=object_store)
    session = model.context.current
    pagerevs = session.query(model.PageRevision).all()
    mock_trans = Bunch(app=Bunch(security=security_helper), model=model, user_is_admin=lambda: True, sa_session=session)
    for p in pagerevs:
        try:
            processor = _PageContentProcessor(mock_trans, _placeholderRenderForSave)
            processor.feed(p.content)
            newcontent = unicodify(processor.output(), 'utf-8')
            if p.content != newcontent:
                if not args.dry_run:
                    p.content = unicodify(processor.output(), 'utf-8')
                    session.add(p)
                    session.flush()
                else:
                    print("Modifying revision %s." % p.id)
                    print(difflib.unified_diff(p.content, newcontent))
        except Exception:
            logging.exception("Error parsing page, rolling changes back and skipping revision %s.  Please report this error." % p.id)
            session.rollback()
Beispiel #8
0
 def create_report( self, user, email='', message='', **kwd ):
     hda = self.hda
     job = self.job
     host = web.url_for( '/', qualified=True )
     history_view_link = web.url_for( controller="history", action="view", id=self.app.security.encode_id( hda.history_id ), qualified=True )
     # Build the email message
     if user and user.email != email:
         email_str = "'%s' (providing preferred contact email '%s')" % (user.email, email)
     else:
         email_str = "'%s'" % (email or 'anonymously')
     self.report = string.Template( error_report_template ) \
         .safe_substitute( host=host,
                           dataset_id=hda.dataset_id,
                           history_id=hda.history_id,
                           hid=hda.hid,
                           history_item_name=hda.get_display_name(),
                           history_view_link=history_view_link,
                           job_id=job.id,
                           tool_version=job.tool_version,
                           job_tool_id=job.tool_id,
                           job_tool_version=hda.tool_version,
                           job_runner_external_id=job.job_runner_external_id,
                           job_command_line=job.command_line,
                           job_stderr=util.unicodify( job.stderr ),
                           job_stdout=util.unicodify( job.stdout ),
                           job_info=util.unicodify( job.info ),
                           job_traceback=util.unicodify( job.traceback ),
                           email_str=email_str,
                           message=util.unicodify( message ) )
def build_readme_files_dict( app, repository, changeset_revision, metadata, tool_path=None ):
    """
    Return a dictionary of valid readme file name <-> readme file content pairs for all readme files defined in the received metadata.  Since the
    received changeset_revision (which is associated with the received metadata) may not be the latest installable changeset revision, the README
    file contents may not be available on disk.  This method is used by both Galaxy and the Tool Shed.
    """
    if app.name == 'galaxy':
        can_use_disk_files = True
    else:
        repo = hg_util.get_repo_for_repository( app, repository=repository, repo_path=None, create=False )
        latest_downloadable_changeset_revision = suc.get_latest_downloadable_changeset_revision( app, repository, repo )
        can_use_disk_files = changeset_revision == latest_downloadable_changeset_revision
    readme_files_dict = {}
    if metadata:
        if 'readme_files' in metadata:
            for relative_path_to_readme_file in metadata[ 'readme_files' ]:
                readme_file_name = os.path.split( relative_path_to_readme_file )[ 1 ]
                if can_use_disk_files:
                    if tool_path:
                        full_path_to_readme_file = os.path.abspath( os.path.join( tool_path, relative_path_to_readme_file ) )
                    else:
                        full_path_to_readme_file = os.path.abspath( relative_path_to_readme_file )
                    text = None
                    try:
                        f = open( full_path_to_readme_file, 'r' )
                        text = unicodify( f.read() )
                        f.close()
                    except Exception, e:
                        log.exception( "Error reading README file '%s' from disk: %s" % ( str( relative_path_to_readme_file ), str( e ) ) )
                        text = None
                    if text:
                        text_of_reasonable_length = basic_util.size_string( text )
                        if text_of_reasonable_length.find( '.. image:: ' ) >= 0:
                            # Handle image display for README files that are contained in repositories in the tool shed or installed into Galaxy.
                            lock = threading.Lock()
                            lock.acquire( True )
                            try:
                                text_of_reasonable_length = suc.set_image_paths( app,
                                                                                 app.security.encode_id( repository.id ),
                                                                                 text_of_reasonable_length )
                            except Exception, e:
                                log.exception( "Exception in build_readme_files_dict, so images may not be properly displayed:\n%s" % str( e ) )
                            finally:
                                lock.release()
                        if readme_file_name.endswith( '.rst' ):
                            text_of_reasonable_length = Template( rst_to_html( text_of_reasonable_length ),
                                                                  input_encoding='utf-8',
                                                                  output_encoding='utf-8',
                                                                  default_filters=[ 'decode.utf8' ],
                                                                  encoding_errors='replace' )
                            text_of_reasonable_length = text_of_reasonable_length.render( static_path=web.url_for( '/static' ),
                                                                                          host_url=web.url_for( '/', qualified=True ) )
                            text_of_reasonable_length = unicodify( text_of_reasonable_length )
                        else:
                            text_of_reasonable_length = basic_util.to_html_string( text_of_reasonable_length )
                        readme_files_dict[ readme_file_name ] = text_of_reasonable_length
Beispiel #10
0
 def process_result_value(self, value, dialect):
     if value is None:
         return None
     ret = None
     try:
         ret = metadata_pickler.loads(unicodify(value))
         if ret:
             ret = dict(ret.__dict__)
     except Exception:
         try:
             ret = json_decoder.decode(unicodify(_sniffnfix_pg9_hex(value)))
         except Exception:
             ret = None
     return ret
Beispiel #11
0
    def execute(self, cmd, timeout=60):

        def retry():
            try:
                _, stdout, stderr = self._execute(cmd, timeout)
            except paramiko.SSHException as e:
                log.error(e)
                time.sleep(10)
                self.connect()
                _, stdout, stderr = self._execute(cmd, timeout)
            return stdout, stderr

        stdout, stderr = self.retry_action_executor.execute(retry)
        return_code = stdout.channel.recv_exit_status()
        return Bunch(stdout=unicodify(stdout.read()), stderr=unicodify(stderr.read()), returncode=return_code)
 def _scripts_check_output(self, script, args):
     cwd = galaxy_directory()
     cmd = ["python", os.path.join(cwd, "scripts", script)] + args
     clean_env = {
         "PATH": os.environ.get("PATH", None),
     }  # Don't let testing environment variables interfere with config.
     return unicodify(subprocess.check_output(cmd, cwd=cwd, env=clean_env))
Beispiel #13
0
def get_headers( fname, sep, count=60, is_multi_byte=False ):
    """
    Returns a list with the first 'count' lines split by 'sep'

    >>> fname = get_test_fname('complete.bed')
    >>> get_headers(fname,'\\t')
    [['chr7', '127475281', '127491632', 'NM_000230', '0', '+', '127486022', '127488767', '0', '3', '29,172,3225,', '0,10713,13126,'], ['chr7', '127486011', '127488900', 'D49487', '0', '+', '127486022', '127488767', '0', '2', '155,490,', '0,2399']]
    """
    headers = []
    compressed_gzip = is_gzip(fname)
    compressed_bzip2 = is_bz2(fname)
    try:
        if compressed_gzip:
            in_file = gzip.GzipFile(fname, 'r')
        elif compressed_bzip2:
            in_file = bz2.BZ2File(fname, 'r')
        else:
            in_file = open(fname, 'rt')
        for idx, line in enumerate(in_file):
            line = line.rstrip('\n\r')
            if is_multi_byte:
                # TODO: fix this - sep is never found in line
                line = unicodify( line, 'utf-8' )
                sep = sep.encode( 'utf-8' )
            headers.append( line.split(sep) )
            if idx == count:
                break
    finally:
        in_file.close()
    return headers
Beispiel #14
0
 def _read_post_payload(self, environ):
     request_body_size = int(environ.get('CONTENT_LENGTH', 0))
     request_body = environ['wsgi.input'].read(request_body_size) or '{}'
     # TODO: json decode error handling
     # log.debug( 'request_body: (%s)\n%s', type( request_body ), request_body )
     payload = json.loads(unicodify(request_body))
     return payload
 def check_page_for_string(self, patt):
     """Looks for 'patt' in the current browser page"""
     page = unicodify(self.last_page())
     if page.find(patt) == -1:
         fname = self.write_temp_file(page)
         errmsg = "no match to '%s'\npage content written to '%s'\npage: [[%s]]" % (patt, fname, page)
         raise AssertionError(errmsg)
Beispiel #16
0
def check_html(file_path, chunk=None):
    if chunk is None:
        temp = open(file_path)
    elif hasattr(chunk, "splitlines"):
        temp = chunk.splitlines()
    else:
        temp = chunk
    regexp1 = re.compile("<A\s+[^>]*HREF[^>]+>", re.I)
    regexp2 = re.compile("<IFRAME[^>]*>", re.I)
    regexp3 = re.compile("<FRAMESET[^>]*>", re.I)
    regexp4 = re.compile("<META[\W][^>]*>", re.I)
    regexp5 = re.compile("<SCRIPT[^>]*>", re.I)
    lineno = 0
    # TODO: Potentially reading huge lines into string here, this should be
    # reworked.
    for line in temp:
        line = util.unicodify(line)
        lineno += 1
        matches = regexp1.search(line) or regexp2.search(line) or regexp3.search(line) or regexp4.search(line) or regexp5.search(line)
        if matches:
            if chunk is None:
                temp.close()
            return True
        if HTML_CHECK_LINES and (lineno > HTML_CHECK_LINES):
            break
    if chunk is None:
        temp.close()
    return False
Beispiel #17
0
def _handle_test_output_file(ctx, report_type, test_data, kwds):
    kwd_name = "test_output"
    if report_type != "html":
        kwd_name = "test_output_%s" % report_type

    path = kwds.get(kwd_name, None)
    if path is None:
        message = "No file specified for %s, skipping test output." % kwd_name
        ctx.vlog(message)
        return

    try:
        contents = build_report.build_report(
            test_data, report_type=report_type
        )
    except Exception:
        message = "Problem producing report file %s for %s" % (
            path, kwd_name
        )
        ctx.vlog(message, exception=True)
        raise

    try:
        with io.open(path, mode='w', encoding='utf-8') as handle:
            handle.write(unicodify(contents))
    except Exception:
        message = "Problem writing output file %s for %s" % (
            kwd_name, path
        )
        ctx.vlog(message, exception=True)
        raise
Beispiel #18
0
def __extract_payload_from_request(trans, func, kwargs):
    content_type = trans.request.headers.get('content-type', '')
    if content_type.startswith('application/x-www-form-urlencoded') or content_type.startswith('multipart/form-data'):
        # If the content type is a standard type such as multipart/form-data, the wsgi framework parses the request body
        # and loads all field values into kwargs. However, kwargs also contains formal method parameters etc. which
        # are not a part of the request body. This is a problem because it's not possible to differentiate between values
        # which are a part of the request body, and therefore should be a part of the payload, and values which should not be
        # in the payload. Therefore, the decorated method's formal arguments are discovered through reflection and removed from
        # the payload dictionary. This helps to prevent duplicate argument conflicts in downstream methods.
        payload = kwargs.copy()
        named_args, _, _, _ = inspect.getargspec(func)
        for arg in named_args:
            payload.pop(arg, None)
        for k, v in payload.items():
            if isinstance(v, string_types):
                try:
                    # note: parse_non_hex_float only needed here for single string values where something like
                    # 40000000000000e5 will be parsed as a scientific notation float. This is as opposed to hex strings
                    # in larger JSON structures where quoting prevents this (further below)
                    payload[k] = loads(v, parse_float=parse_non_hex_float)
                except Exception:
                    # may not actually be json, just continue
                    pass
    else:
        # Assume application/json content type and parse request body manually, since wsgi won't do it. However, the order of this check
        # should ideally be in reverse, with the if clause being a check for application/json and the else clause assuming a standard encoding
        # such as multipart/form-data. Leaving it as is for backward compatibility, just in case.
        payload = loads(unicodify(trans.request.body))
    return payload
def best_search_result(conda_target, conda_context, channels_override=None, offline=False):
    """Find best "conda search" result for specified target.

    Return ``None`` if no results match.
    """
    search_cmd = [conda_context.conda_exec, "search", "--full-name", "--json"]
    if offline:
        search_cmd.append("--offline")
    if channels_override:
        search_cmd.append("--override-channels")
        for channel in channels_override:
            search_cmd.extend(["--channel", channel])
    else:
        search_cmd.extend(conda_context._override_channels_args)
    search_cmd.append(conda_target.package)
    res = commands.execute(search_cmd)
    res = unicodify(res)
    hits = json.loads(res).get(conda_target.package, [])
    hits = sorted(hits, key=lambda hit: packaging.version.parse(hit['version']), reverse=True)

    if len(hits) == 0:
        return (None, None)

    best_result = (hits[0], False)

    for hit in hits:
        if is_search_hit_exact(conda_target, hit):
            best_result = (hit, True)
            break

    return best_result
Beispiel #20
0
    def _get_extended_config( self, trans ):
        app = trans.app
        configured_for_inactivity_warning = app.config.user_activation_on and app.config.inactivity_box_content is not None
        user_requests = bool( trans.user and ( trans.user.requests or app.security_agent.get_accessible_request_types( trans, trans.user ) ) )
        config = {
            'active_view'                   : 'analysis',
            'params'                        : dict( trans.request.params ),
            'enable_cloud_launch'           : app.config.get_bool( 'enable_cloud_launch', False ),
            'search_url'                    : web.url_for( controller='root', action='tool_search' ),
            # TODO: next two should be redundant - why can't we build one from the other?
            'toolbox'                       : app.toolbox.to_dict( trans, in_panel=False ),
            'toolbox_in_panel'              : app.toolbox.to_dict( trans ),
            'message_box_visible'           : app.config.message_box_visible,
            'show_inactivity_warning'       : configured_for_inactivity_warning and trans.user and not trans.user.active,
            # TODO: move to user
            'user_requests'                 : user_requests
        }

        # TODO: move to user
        stored_workflow_menu_entries = config[ 'stored_workflow_menu_entries' ] = []
        for menu_item in getattr( trans.user, 'stored_workflow_menu_entries', [] ):
            stored_workflow_menu_entries.append({
                'encoded_stored_workflow_id' : trans.security.encode_id( menu_item.stored_workflow_id ),
                'stored_workflow' : {
                    'name' : util.unicodify( menu_item.stored_workflow.name )
                }
            })

        return config
Beispiel #21
0
def get_file_peek( file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5, skipchars=None, line_wrap=True ):
    """
    Returns the first LINE_COUNT lines wrapped to WIDTH

    ## >>> fname = get_test_fname('4.bed')
    ## >>> get_file_peek(fname)
    ## 'chr22    30128507    31828507    uc003bnx.1_cds_2_0_chr22_29227_f    0    +\n'

    """
    # Set size for file.readline() to a negative number to force it to
    # read until either a newline or EOF.  Needed for datasets with very
    # long lines.
    if WIDTH == 'unlimited':
        WIDTH = -1
    if skipchars is None:
        skipchars = []
    lines = []
    count = 0
    file_type = None
    data_checked = False
    temp = open( file_name, "U" )
    while count < LINE_COUNT:
        line = temp.readline( WIDTH )
        if line and not is_multi_byte and not data_checked:
            # See if we have a compressed or binary file
            if line[0:2] == util.gzip_magic:
                file_type = 'gzipped'
            else:
                for char in line:
                    if ord( char ) > 128:
                        file_type = 'binary'
                        break
            data_checked = True
            if file_type in [ 'gzipped', 'binary' ]:
                break
        if not line_wrap:
            if line.endswith('\n'):
                line = line[:-1]
            else:
                while True:
                    i = temp.read(1)
                    if not i or i == '\n':
                        break
        skip_line = False
        for skipchar in skipchars:
            if line.startswith( skipchar ):
                skip_line = True
                break
        if not skip_line:
            lines.append( line )
            count += 1
    temp.close()
    if file_type in [ 'gzipped', 'binary' ]:
        text = "%s file" % file_type
    else:
        try:
            text = util.unicodify( '\n'.join( lines ) )
        except UnicodeDecodeError:
            text = "binary/unknown file"
    return text
Beispiel #22
0
 def get_html(self, prefix=""):
     return unicodify('<script src="https://gsui.genomespace.org/jsui/upload/gsuploadwindow.js"></script>'
                      '<input type="text" name="{0}{1}" value="{2}">&nbsp;'
                      '<a href="javascript:gsLocationByGet({{ successCallback: function(config)'
                      ' {{ selector_name = \'{0}{1}\'; selector = \'input[name=\' + selector_name.replace(\'|\', \'\\\\|\') + \']\';'
                      ' $(selector).val(config.destination + \'^\' + config.token); }} }});">'
                      'Browse</a>'.format(prefix, self.name, escape(str(self.value), quote=True)))
Beispiel #23
0
    def display_by_username_and_slug( self, trans, username, slug ):
        """ Display page based on a username and slug. """

        # Get page.
        session = trans.sa_session
        user = session.query( model.User ).filter_by( username=username ).first()
        page = trans.sa_session.query( model.Page ).filter_by( user=user, slug=slug, deleted=False ).first()
        if page is None:
            raise web.httpexceptions.HTTPNotFound()
        # Security check raises error if user cannot access page.
        self.security_check( trans, page, False, True)

        # Process page content.
        processor = _PageContentProcessor( trans, 'utf-8', 'text/html', self._get_embed_html )
        processor.feed( page.latest_revision.content )

        # Get rating data.
        user_item_rating = 0
        if trans.get_user():
            user_item_rating = self.get_user_item_rating( trans.sa_session, trans.get_user(), page )
            if user_item_rating:
                user_item_rating = user_item_rating.rating
            else:
                user_item_rating = 0
        ave_item_rating, num_ratings = self.get_ave_item_rating_data( trans.sa_session, page )

        # Output is string, so convert to unicode for display.
        page_content = unicodify( processor.output(), 'utf-8' )
        return trans.fill_template_mako( "page/display.mako", item=page,
                                         item_data=page_content,
                                         user_item_rating=user_item_rating,
                                         ave_item_rating=ave_item_rating,
                                         num_ratings=num_ratings,
                                         content_only=True )
Beispiel #24
0
 def get_html_radio(self, prefix="", disabled=False):
     rval = []
     ctr = 0
     for text, value, selected in self.options:
         style = ""
         escaped_value = escape(str(value), quote=True)
         uniq_id = "%s%s|%s" % (prefix, self.name, escaped_value)
         if len(self.options) > 2 and ctr % 2 == 1:
             style = " class=\"odd_row\""
         selected_text = ""
         if selected:
             selected_text = " checked='checked'"
         rval.append('<div%s><input type="radio" name="%s%s"%s value="%s" id="%s"%s%s%s><label class="inline" for="%s">%s</label></div>'
                     % (style,
                        prefix,
                        self.name,
                        self.refresh_on_change_text,
                        escaped_value,
                        uniq_id,
                        selected_text,
                        self.get_disabled_str(disabled),
                        self.extra_attributes,
                        uniq_id,
                        text))
         ctr += 1
     return unicodify("\n".join(rval))
 def conda_info(self):
     if self.conda_exec is not None:
         info_out = commands.execute([self.conda_exec, "info", "--json"])
         info_out = unicodify(info_out)
         info = json.loads(info_out)
         return info
     else:
         return None
Beispiel #26
0
 def set_tags_from_list(self, user, item, new_tags_list):
     # precondition: item is already security checked against user
     # precondition: incoming tags is a list of sanitized/formatted strings
     self.delete_item_tags(user, item)
     new_tags_str = ','.join(new_tags_list)
     self.apply_item_tags(user, item, unicodify(new_tags_str, 'utf-8'))
     self.sa_session.flush()
     return item.tags
Beispiel #27
0
 def get_html(self, prefix="", disabled=False):
     if self.checked:
         checked_text = ' checked="checked"'
     else:
         checked_text = ''
     id_name = prefix + self.name
     return unicodify('<input type="checkbox" id="%s" name="%s" value="__CHECKED__"%s%s%s><input type="hidden" name="%s" value="__NOTHING__"%s>'
                      % (id_name, id_name, checked_text, self.get_disabled_str(disabled), self.refresh_on_change_text, id_name, self.get_disabled_str(disabled)))
Beispiel #28
0
 def get_html(self, prefix=""):
     value_text = ""
     if self.value:
         value_text = ' value="%s"' % escape(str(self.value), quote=True)
     ajax_text = ""
     if self.ajax:
         ajax_text = ' galaxy-ajax-upload="true"'
     return unicodify('<input type="file" name="%s%s"%s%s>' % (prefix, self.name, ajax_text, value_text))
Beispiel #29
0
def safe_dict(d):
    """Recursively clone JSON structure with unicode dictionary keys."""
    if isinstance(d, dict):
        return dict([(unicodify(k), safe_dict(v)) for k, v in d.items()])
    elif isinstance(d, list):
        return [safe_dict(x) for x in d]
    else:
        return d
Beispiel #30
0
 def body_renderer(self, trans, body, environ, start_response):
     # this is a dummy renderer that does not call start_response
     # See 'We have to re-create the handle request method...' in _process_batch_request above
     return dict(
         status=trans.response.status,
         headers=trans.response.headers,
         body=json.loads(unicodify(self.galaxy.make_body_iterable(trans, body)[0]))
     )
Beispiel #31
0
def cli(ctx, runnable_identifier, job_path, **kwds):
    """Planemo command for running tools and jobs.

    \b
        % planemo run cat1-tool.cwl cat-job.json
    """
    runnable = for_runnable_identifier(ctx, runnable_identifier, kwds)
    is_cwl = runnable.type.is_cwl_artifact
    kwds["cwl"] = is_cwl
    kwds["execution_type"] = "Run"
    if kwds.get("engine", None) is None:
        if is_cwl:
            kwds["engine"] = "cwltool"
        elif kwds.get('galaxy_url', None):
            kwds["engine"] = "external_galaxy"
        else:
            kwds["engine"] = "galaxy"
    with engine_context(ctx, **kwds) as engine:
        run_result = engine.run(runnable, job_path)

    if not run_result.was_successful:
        warn("Run failed [%s]" % unicodify(run_result))
    elif kwds.get('no_wait'):
        info('Run successfully executed - exiting without waiting for results.')
    else:
        output_json = kwds.get("output_json", None)
        outputs_dict = run_result.outputs_dict
        if output_json:
            with open(output_json, "w") as f:
                json.dump(outputs_dict, f)
        info('Run completed successfully.')

    report_data = StructuredData(data={'tests': [run_result.structured_data()], 'version': '0.1'})
    report_data.calculate_summary_data()
    return_value = handle_reports_and_summary(ctx, report_data.structured_data, kwds=kwds)
    ctx.exit(return_value)
Beispiel #32
0
 def get_html(self, prefix="", disabled=False):
     value = unicodify(self.value or "")
     return unicodify('<input type="text" name="%s%s" size="%d" value="%s"%s>'
                      % (prefix, self.name, self.size, escape(value, quote=True), self.get_disabled_str(disabled)))
Beispiel #33
0
def wrap_in_middleware(app, global_conf, application_stack, **local_conf):
    """Based on the configuration wrap `app` in a set of common and useful middleware."""
    stack = application_stack
    # Merge the global and local configurations
    conf = global_conf.copy()
    conf.update(local_conf)
    debug = asbool(conf.get('debug', False))
    interactive = asbool(conf.get('use_interactive', False))
    # First put into place httpexceptions, which must be most closely
    # wrapped around the application (it can interact poorly with
    # other middleware):
    app = wrap_if_allowed(app,
                          stack,
                          httpexceptions.make_middleware,
                          name='paste.httpexceptions',
                          args=(conf, ))
    # Create a separate mapper for redirects to prevent conflicts.
    redirect_mapper = routes.Mapper()
    redirect_mapper = _map_redirects(redirect_mapper)
    # Load the Routes middleware which we use for redirecting
    app = wrap_if_allowed(app,
                          stack,
                          RoutesMiddleware,
                          args=(redirect_mapper, ))
    # If we're using remote_user authentication, add middleware that
    # protects Galaxy from improperly configured authentication in the
    # upstream server
    if asbool(conf.get('use_remote_user', False)):
        from galaxy.webapps.tool_shed.framework.middleware.remoteuser import RemoteUser
        app = wrap_if_allowed(
            app,
            stack,
            RemoteUser,
            kwargs=dict(
                maildomain=conf.get('remote_user_maildomain', None),
                display_servers=util.listify(conf.get('display_servers', '')),
                admin_users=conf.get('admin_users', '').split(','),
                remote_user_header=conf.get('remote_user_header',
                                            'HTTP_REMOTE_USER'),
                remote_user_secret_header=conf.get('remote_user_secret', None),
                normalize_remote_user_email=conf.get(
                    'normalize_remote_user_email', False)))
    # The recursive middleware allows for including requests in other
    # requests or forwarding of requests, all on the server side.
    if asbool(conf.get('use_recursive', True)):
        from paste import recursive
        app = wrap_if_allowed(app,
                              stack,
                              recursive.RecursiveMiddleware,
                              args=(conf, ))
    # Transaction logging (apache access.log style)
    if asbool(conf.get('use_translogger', True)):
        from paste.translogger import TransLogger
        app = wrap_if_allowed(app, stack, TransLogger)
    # If sentry logging is enabled, log here before propogating up to
    # the error middleware
    # TODO sentry config is duplicated between tool_shed/galaxy, refactor this.
    sentry_dsn = conf.get('sentry_dsn', None)
    if sentry_dsn:
        from galaxy.web.framework.middleware.sentry import Sentry
        app = wrap_if_allowed(app, stack, Sentry, args=(sentry_dsn, ))
    # X-Forwarded-Host handling
    from galaxy.web.framework.middleware.xforwardedhost import XForwardedHostMiddleware
    app = wrap_if_allowed(app, stack, XForwardedHostMiddleware)
    # Various debug middleware that can only be turned on if the debug
    # flag is set, either because they are insecure or greatly hurt
    # performance.
    if debug:
        # Middleware to check for WSGI compliance
        if asbool(conf.get('use_lint', True)):
            from paste import lint
            app = wrap_if_allowed(app,
                                  stack,
                                  lint.make_middleware,
                                  name='paste.lint',
                                  args=(conf, ))
        # Middleware to run the python profiler on each request
        if asbool(conf.get('use_profile', False)):
            from paste.debug import profile
            app = wrap_if_allowed(app,
                                  stack,
                                  profile.ProfileMiddleware,
                                  args=(conf, ))
        if interactive:
            # Interactive exception debugging, scary dangerous if publicly
            # accessible, if not enabled we'll use the regular error printing
            # middleware.
            try:
                from weberror import evalexception
                app = wrap_if_allowed_or_fail(
                    app,
                    stack,
                    evalexception.EvalException,
                    args=(conf, ),
                    kwargs=dict(
                        templating_formatters=build_template_error_formatters(
                        )))
            except MiddlewareWrapUnsupported as exc:
                log.warning(util.unicodify(exc))
                import galaxy.web.framework.middleware.error
                app = wrap_if_allowed(
                    app,
                    stack,
                    galaxy.web.framework.middleware.error.ErrorMiddleware,
                    args=(conf, ))
        else:
            # Not in interactive debug mode, just use the regular error middleware
            import galaxy.web.framework.middleware.error
            app = wrap_if_allowed(
                app,
                stack,
                galaxy.web.framework.middleware.error.ErrorMiddleware,
                args=(conf, ))
    return app
Beispiel #34
0
CHRONOS_IMPORT_MSG = ('The Python \'chronos\' package is required to use '
                      'this feature, please install it or correct the '
                      'following error:\nImportError {msg!s}')

try:
    import chronos
    chronos_exceptions = (
        chronos.ChronosAPIError,
        chronos.UnauthorizedError,
        chronos.MissingFieldError,
        chronos.OneOfViolationError,
    )
except ImportError as e:
    chronos = None
    CHRONOS_IMPORT_MSG.format(msg=unicodify(e))

__all__ = ('ChronosJobRunner', )
LOGGER = logging.getLogger(__name__)


class ChronosRunnerException(Exception):
    pass


def handle_exception_call(func):
    # Catch chronos exceptions. The latest version of chronos-python does
    # support a hierarchy over the exceptions.

    @functools.wraps(func)
    def wrapper(*args, **kwargs):
Beispiel #35
0
    def split(cls, input_datasets, subdir_generator_function, split_params):
        """
        Split the input files by molecule records.
        """
        if split_params is None:
            return None

        if len(input_datasets) > 1:
            raise Exception(
                "CML-file splitting does not support multiple files")
        input_files = [ds.file_name for ds in input_datasets]

        chunk_size = None
        if split_params['split_mode'] == 'number_of_parts':
            raise Exception(
                'Split mode "%s" is currently not implemented for CML-files.' %
                split_params['split_mode'])
        elif split_params['split_mode'] == 'to_size':
            chunk_size = int(split_params['split_size'])
        else:
            raise Exception('Unsupported split mode %s' %
                            split_params['split_mode'])

        def _read_cml_records(filename):
            lines = []
            with open(filename) as handle:
                for line in handle:
                    if line.lstrip().startswith('<?xml version="1.0"?>') or \
                       line.lstrip().startswith('<cml xmlns="http://www.xml-cml.org/schema') or \
                       line.lstrip().startswith('</cml>'):
                        continue
                    lines.append(line)
                    if line.lstrip().startswith('</molecule>'):
                        yield lines
                        lines = []

        header_lines = [
            '<?xml version="1.0"?>\n',
            '<cml xmlns="http://www.xml-cml.org/schema">\n'
        ]
        footer_line = ['</cml>\n']

        def _write_part_cml_file(accumulated_lines):
            part_dir = subdir_generator_function()
            part_path = os.path.join(part_dir,
                                     os.path.basename(input_files[0]))
            with open(part_path, 'w') as part_file:
                part_file.writelines(header_lines)
                part_file.writelines(accumulated_lines)
                part_file.writelines(footer_line)

        try:
            cml_records = _read_cml_records(input_files[0])
            cml_lines_accumulated = []
            for counter, cml_record in enumerate(cml_records, start=1):
                cml_lines_accumulated.extend(cml_record)
                if counter % chunk_size == 0:
                    _write_part_cml_file(cml_lines_accumulated)
                    cml_lines_accumulated = []
            if cml_lines_accumulated:
                _write_part_cml_file(cml_lines_accumulated)
        except Exception as e:
            log.error('Unable to split files: %s', unicodify(e))
            raise
Beispiel #36
0
def verify(
    item_label,
    output_content,
    attributes,
    filename=None,
    get_filecontent=None,
    get_filename=None,
    keep_outputs_dir=None,
    verify_extra_files=None,
    mode='file',
):
    """Verify the content of a test output using test definitions described by attributes.

    Throw an informative assertion error if any of these tests fail.
    """
    if get_filename is None:
        if get_filecontent is None:
            get_filecontent = DEFAULT_TEST_DATA_RESOLVER.get_filecontent

        def get_filename(filename):
            file_content = get_filecontent(filename)
            local_name = make_temp_fname(fname=filename)
            with open(local_name, 'wb') as f:
                f.write(file_content)
            return local_name

    # Check assertions...
    assertions = attributes.get("assert_list", None)
    if attributes is not None and assertions is not None:
        try:
            verify_assertions(output_content, attributes["assert_list"])
        except AssertionError as err:
            errmsg = '%s different than expected\n' % (item_label)
            errmsg += unicodify(err)
            raise AssertionError(errmsg)

    # Verify checksum attributes...
    # works with older Galaxy style md5=<expected_sum> or cwltest
    # style checksum=<hash_type>$<hash>.
    expected_checksum_type = None
    expected_checksum = None
    if attributes is not None and attributes.get("md5", None) is not None:
        expected_checksum_type = "md5"
        expected_checksum = attributes.get("md5")
    elif attributes is not None and attributes.get("checksum", None) is not None:
        checksum_value = attributes.get("checksum", None)
        expected_checksum_type, expected_checksum = checksum_value.split("$", 1)

    if expected_checksum_type:
        try:
            _verify_checksum(output_content, expected_checksum_type, expected_checksum)
        except AssertionError as err:
            errmsg = '%s different than expected\n' % (item_label)
            errmsg += unicodify(err)
            raise AssertionError(errmsg)

    if attributes is None:
        attributes = {}

    if filename is not None:
        temp_name = make_temp_fname(fname=filename)
        with open(temp_name, 'wb') as f:
            f.write(output_content)

        # If the server's env has GALAXY_TEST_SAVE, save the output file to that
        # directory.
        # This needs to be done before the call to `get_filename()` because that
        # may raise an exception if `filename` does not exist (e.g. when
        # generating a tool output file from scratch with
        # `planemo test --update_test_data`).
        if keep_outputs_dir:
            ofn = os.path.join(keep_outputs_dir, filename)
            out_dir = os.path.dirname(ofn)
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            log.debug('keep_outputs_dir: %s, ofn: %s', keep_outputs_dir, ofn)
            try:
                shutil.copy(temp_name, ofn)
            except Exception:
                log.exception('Could not save output file %s to %s', temp_name, ofn)
            else:
                log.debug('## GALAXY_TEST_SAVE=%s. saved %s', keep_outputs_dir, ofn)

        if mode == 'directory':
            # if verifying a file inside a extra_files_path directory
            # filename already point to a file that exists on disk
            local_name = filename
        else:
            local_name = get_filename(filename)

        compare = attributes.get('compare', 'diff')
        try:
            if attributes.get('ftype', None) in ['bam', 'qname_sorted.bam', 'qname_input_sorted.bam', 'unsorted.bam', 'cram']:
                try:
                    local_fh, temp_name = _bam_to_sam(local_name, temp_name)
                    local_name = local_fh.name
                except Exception as e:
                    log.warning("%s. Will compare BAM files", unicodify(e))
            if compare == 'diff':
                files_diff(local_name, temp_name, attributes=attributes)
            elif compare == 're_match':
                files_re_match(local_name, temp_name, attributes=attributes)
            elif compare == 're_match_multiline':
                files_re_match_multiline(local_name, temp_name, attributes=attributes)
            elif compare == 'sim_size':
                delta = int(attributes.get('delta', DEFAULT_DELTA))
                s1 = len(output_content)
                s2 = os.path.getsize(local_name)
                if abs(s1 - s2) > int(delta):
                    raise AssertionError('Files %s=%db but %s=%db - compare by size (delta=%s) failed' % (temp_name, s1, local_name, s2, delta))
            elif compare == "contains":
                files_contains(local_name, temp_name, attributes=attributes)
            else:
                raise Exception('Unimplemented Compare type: %s' % compare)
        except AssertionError as err:
            errmsg = '%s different than expected, difference (using %s):\n' % (item_label, compare)
            errmsg += "( %s v. %s )\n" % (local_name, temp_name)
            errmsg += unicodify(err)
            raise AssertionError(errmsg)
        finally:
            if 'GALAXY_TEST_NO_CLEANUP' not in os.environ:
                os.remove(temp_name)

    if verify_extra_files:
        extra_files = attributes.get('extra_files', None)
        if extra_files:
            verify_extra_files(extra_files)
Beispiel #37
0
def files_diff(file1, file2, attributes=None):
    """Check the contents of 2 files for differences."""
    def get_lines_diff(diff):
        count = 0
        for line in diff:
            if (line.startswith('+') and not line.startswith('+++')) or (line.startswith('-') and not line.startswith('---')):
                count += 1
        return count

    if not filecmp.cmp(file1, file2, shallow=False):
        if attributes is None:
            attributes = {}
        decompress = attributes.get("decompress", None)
        if decompress:
            # None means all compressed formats are allowed
            compressed_formats = None
        else:
            compressed_formats = []
        is_pdf = False
        try:
            with get_fileobj(file2, compressed_formats=compressed_formats) as fh:
                history_data = fh.readlines()
            with get_fileobj(file1, compressed_formats=compressed_formats) as fh:
                local_file = fh.readlines()
        except UnicodeDecodeError:
            if file1.endswith('.pdf') or file2.endswith('.pdf'):
                is_pdf = True
                # Replace non-Unicode characters using unicodify(),
                # difflib.unified_diff doesn't work on list of bytes
                history_data = [unicodify(l) for l in get_fileobj(file2, mode='rb', compressed_formats=compressed_formats)]
                local_file = [unicodify(l) for l in get_fileobj(file1, mode='rb', compressed_formats=compressed_formats)]
            else:
                raise AssertionError("Binary data detected, not displaying diff")
        if attributes.get('sort', False):
            local_file.sort()
            history_data.sort()
        allowed_diff_count = int(attributes.get('lines_diff', 0))
        diff = list(difflib.unified_diff(local_file, history_data, "local_file", "history_data"))
        diff_lines = get_lines_diff(diff)
        if diff_lines > allowed_diff_count:
            if 'GALAXY_TEST_RAW_DIFF' in os.environ:
                diff_slice = diff
            else:
                if len(diff) < 60:
                    diff_slice = diff[0:40]
                else:
                    diff_slice = diff[:25] + ["********\n", "*SNIP *\n", "********\n"] + diff[-25:]
            # FIXME: This pdf stuff is rather special cased and has not been updated to consider lines_diff
            # due to unknown desired behavior when used in conjunction with a non-zero lines_diff
            # PDF forgiveness can probably be handled better by not special casing by __extension__ here
            # and instead using lines_diff or a regular expression matching
            # or by creating and using a specialized pdf comparison function
            if is_pdf:
                # PDF files contain creation dates, modification dates, ids and descriptions that change with each
                # new file, so we need to handle these differences.  As long as the rest of the PDF file does
                # not differ we're ok.
                valid_diff_strs = ['description', 'createdate', 'creationdate', 'moddate', 'id', 'producer', 'creator']
                valid_diff = False
                invalid_diff_lines = 0
                for line in diff_slice:
                    # Make sure to lower case strings before checking.
                    line = line.lower()
                    # Diff lines will always start with a + or - character, but handle special cases: '--- local_file \n', '+++ history_data \n'
                    if (line.startswith('+') or line.startswith('-')) and line.find('local_file') < 0 and line.find('history_data') < 0:
                        for vdf in valid_diff_strs:
                            if line.find(vdf) < 0:
                                valid_diff = False
                            else:
                                valid_diff = True
                                # Stop checking as soon as we know we have a valid difference
                                break
                        if not valid_diff:
                            invalid_diff_lines += 1
                log.info("## files diff on '%s' and '%s': lines_diff = %d, found diff = %d, found pdf invalid diff = %d" % (file1, file2, allowed_diff_count, diff_lines, invalid_diff_lines))
                if invalid_diff_lines > allowed_diff_count:
                    # Print out diff_slice so we can see what failed
                    log.info("###### diff_slice ######")
                    raise AssertionError("".join(diff_slice))
            else:
                log.info("## files diff on '%s' and '%s': lines_diff = %d, found diff = %d" % (file1, file2, allowed_diff_count, diff_lines))
                raise AssertionError("".join(diff_slice))
Beispiel #38
0
    def create(self, trans, library_id, payload, **kwd):
        """
        create( self, trans, library_id, payload, **kwd )
        * POST /api/libraries/{library_id}/contents:
            create a new library file or folder

        To copy an HDA into a library send ``create_type`` of 'file' and
        the HDA's encoded id in ``from_hda_id`` (and optionally ``ldda_message``).

        To copy an HDCA into a library send ``create_type`` of 'file' and
        the HDCA's encoded id in ``from_hdca_id`` (and optionally ``ldda_message``).

        :type   library_id: str
        :param  library_id: the encoded id of the library where to create the new item
        :type   payload:    dict
        :param  payload:    dictionary structure containing:

            * folder_id:    the encoded id of the parent folder of the new item
            * create_type:  the type of item to create ('file', 'folder' or 'collection')
            * from_hda_id:  (optional, only if create_type is 'file') the
                encoded id of an accessible HDA to copy into the library
            * ldda_message: (optional) the new message attribute of the LDDA created
            * extended_metadata: (optional) sub-dictionary containing any extended
                metadata to associate with the item
            * upload_option: (optional) one of 'upload_file' (default), 'upload_directory' or 'upload_paths'
            * server_dir: (optional, only if upload_option is
                'upload_directory') relative path of the subdirectory of Galaxy
                ``library_import_dir`` (if admin) or ``user_library_import_dir``
                (if non-admin) to upload. All and only the files (i.e.
                no subdirectories) contained in the specified directory will be
                uploaded.
            * filesystem_paths: (optional, only if upload_option is
                'upload_paths' and the user is an admin) file paths on the
                Galaxy server to upload to the library, one file per line
            * link_data_only: (optional, only when upload_option is
                'upload_directory' or 'upload_paths') either 'copy_files'
                (default) or 'link_to_files'. Setting to 'link_to_files'
                symlinks instead of copying the files
            * name: (optional, only if create_type is 'folder') name of the
                folder to create
            * description: (optional, only if create_type is 'folder')
                description of the folder to create
            * tag_using_filename: (optional)
                create tags on datasets using the file's original name

        :returns:   a dictionary describing the new item unless ``from_hdca_id`` is supplied,
                    in that case a list of such dictionaries is returned.
        :rtype:     object
        """
        if 'create_type' not in payload:
            trans.response.status = 400
            return "Missing required 'create_type' parameter."
        else:
            create_type = payload.pop('create_type')
        if create_type not in ('file', 'folder', 'collection'):
            trans.response.status = 400
            return "Invalid value for 'create_type' parameter ( %s ) specified." % create_type

        if 'folder_id' not in payload:
            trans.response.status = 400
            return "Missing required 'folder_id' parameter."
        else:
            folder_id = payload.pop('folder_id')
            class_name, folder_id = self._decode_library_content_id(folder_id)
        try:
            # security is checked in the downstream controller
            parent = self.get_library_folder(trans,
                                             folder_id,
                                             check_ownership=False,
                                             check_accessible=False)
        except Exception as e:
            return util.unicodify(e)
        # The rest of the security happens in the library_common controller.
        real_folder_id = trans.security.encode_id(parent.id)

        payload['tag_using_filenames'] = util.string_as_bool(
            payload.get('tag_using_filenames', None))

        # are we copying an HDA to the library folder?
        #   we'll need the id and any message to attach, then branch to that private function
        from_hda_id, from_hdca_id, ldda_message = (payload.pop(
            'from_hda_id',
            None), payload.pop('from_hdca_id',
                               None), payload.pop('ldda_message', ''))
        if create_type == 'file':
            if from_hda_id:
                return self._copy_hda_to_library_folder(
                    trans, self.hda_manager, self.decode_id(from_hda_id),
                    real_folder_id, ldda_message)
            if from_hdca_id:
                return self._copy_hdca_to_library_folder(
                    trans, self.hda_manager, self.decode_id(from_hdca_id),
                    real_folder_id, ldda_message)

        # check for extended metadata, store it and pop it out of the param
        # otherwise sanitize_param will have a fit
        ex_meta_payload = payload.pop('extended_metadata', None)

        # Now create the desired content object, either file or folder.
        if create_type == 'file':
            status, output = self._upload_library_dataset(
                trans, library_id, real_folder_id, **payload)
        elif create_type == 'folder':
            status, output = self._create_folder(trans, real_folder_id,
                                                 library_id, **payload)
        elif create_type == 'collection':
            # Not delegating to library_common, so need to check access to parent
            # folder here.
            self.check_user_can_add_to_library_item(trans,
                                                    parent,
                                                    check_accessible=True)
            create_params = api_payload_to_create_params(payload)
            create_params['parent'] = parent
            service = trans.app.dataset_collections_service
            dataset_collection_instance = service.create(**create_params)
            return [
                dictify_dataset_collection_instance(
                    dataset_collection_instance,
                    security=trans.security,
                    parent=parent)
            ]
        if status != 200:
            trans.response.status = status
            return output
        else:
            rval = []
            for v in output.values():
                if ex_meta_payload is not None:
                    # If there is extended metadata, store it, attach it to the dataset, and index it
                    ex_meta = ExtendedMetadata(ex_meta_payload)
                    trans.sa_session.add(ex_meta)
                    v.extended_metadata = ex_meta
                    trans.sa_session.add(v)
                    trans.sa_session.flush()
                    for path, value in self._scan_json_block(ex_meta_payload):
                        meta_i = ExtendedMetadataIndex(ex_meta, path, value)
                        trans.sa_session.add(meta_i)
                    trans.sa_session.flush()
                if type(v) == trans.app.model.LibraryDatasetDatasetAssociation:
                    v = v.library_dataset
                encoded_id = trans.security.encode_id(v.id)
                if create_type == 'folder':
                    encoded_id = 'F' + encoded_id
                rval.append(
                    dict(id=encoded_id,
                         name=v.name,
                         url=url_for('library_content',
                                     library_id=library_id,
                                     id=encoded_id)))
            return rval
Beispiel #39
0
def _handle_realization_error(exception, **kwds):
    fail_fast = kwds.get("fail_fast", False)
    if fail_fast:
        raise exception
    else:
        error(unicodify(exception))
Beispiel #40
0
def main(argv):
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-r',
                        '--report-directory',
                        help='Directory to store reports in',
                        default=os.path.abspath(os.path.join('.', 'reports')))
    parser.add_argument('-g',
                        '--grt-config',
                        help='Path to GRT config file',
                        default=default_config)
    parser.add_argument(
        "-l",
        "--loglevel",
        choices=['debug', 'info', 'warning', 'error', 'critical'],
        help="Set the logging level",
        default='warning')
    parser.add_argument("-b",
                        "--batch-size",
                        type=int,
                        default=1000,
                        help="Batch size for sql queries")
    parser.add_argument(
        "-m",
        "--max-records",
        type=int,
        default=5000000,
        help=
        "Maximum number of records to include in a single report. This option should ONLY be used when reporting historical data. Setting this may require running GRT multiple times to capture all historical logs."
    )
    populate_config_args(parser)

    args = parser.parse_args()
    logging.getLogger().setLevel(getattr(logging, args.loglevel.upper()))

    _times = []
    _start_time = time.time()

    def annotate(label, human_label=None):
        if human_label:
            logging.info(human_label)
        _times.append((label, time.time() - _start_time))

    annotate('init_start', 'Loading GRT configuration...')
    try:
        with open(args.grt_config) as handle:
            config = yaml.safe_load(handle)
    except Exception:
        logging.info('Using default GRT configuration')
        with open(sample_config) as handle:
            config = yaml.safe_load(handle)
    annotate('init_end')

    REPORT_DIR = args.report_directory
    CHECK_POINT_FILE = os.path.join(REPORT_DIR, '.checkpoint')
    REPORT_IDENTIFIER = str(time.time())
    REPORT_BASE = os.path.join(REPORT_DIR, REPORT_IDENTIFIER)

    if os.path.exists(CHECK_POINT_FILE):
        with open(CHECK_POINT_FILE, 'r') as handle:
            last_job_sent = int(handle.read())
    else:
        last_job_sent = -1

    annotate('galaxy_init', 'Loading Galaxy...')
    model, object_store, gxconfig = _init(args)

    # Galaxy overrides our logging level.
    logging.getLogger().setLevel(getattr(logging, args.loglevel.upper()))
    sa_session = model.context.current
    annotate('galaxy_end')

    # Fetch jobs COMPLETED with status OK that have not yet been sent.

    # Set up our arrays
    active_users = defaultdict(int)
    job_state_data = defaultdict(int)

    if not os.path.exists(REPORT_DIR):
        os.makedirs(REPORT_DIR)

    # Pick an end point so our queries can return uniform data.
    annotate('endpoint_start', 'Identifying a safe endpoint for SQL queries')
    end_job_id = sa_session.query(model.Job.id) \
        .order_by(model.Job.id.desc()) \
        .first()[0]

    # Allow users to only report N records at once.
    if args.max_records > 0:
        if end_job_id - last_job_sent > args.max_records:
            end_job_id = last_job_sent + args.max_records

    annotate('endpoint_end',
             'Processing jobs (%s, %s]' % (last_job_sent, end_job_id))

    # Remember the last job sent.
    if end_job_id == last_job_sent:
        logging.info("No new jobs to report")
        # So we can just quit now.
        sys.exit(0)

    # Unfortunately we have to keep this mapping for the sanitizer to work properly.
    job_tool_map = {}
    blacklisted_tools = config['sanitization']['tools']

    annotate('export_jobs_start', 'Exporting Jobs')
    with io.open(REPORT_BASE + '.jobs.tsv', 'w',
                 encoding='utf-8') as handle_job:
        handle_job.write(u'\t'.join(('id', 'tool_id', 'tool_version', 'state',
                                     'create_time')) + '\n')
        for offset_start in range(last_job_sent, end_job_id, args.batch_size):
            logging.debug("Processing %s:%s", offset_start,
                          min(end_job_id, offset_start + args.batch_size))
            for job in sa_session.query(model.Job.id, model.Job.user_id, model.Job.tool_id, model.Job.tool_version, model.Job.state, model.Job.create_time) \
                    .filter(model.Job.id > offset_start) \
                    .filter(model.Job.id <= min(end_job_id, offset_start + args.batch_size)) \
                    .all():
                # If the tool is blacklisted, exclude everywhere
                if job[2] in blacklisted_tools:
                    continue

                try:
                    line = [
                        str(job[0]),  # id
                        job[2],  # tool_id
                        job[3],  # tool_version
                        job[4],  # state
                        str(job[5])  # create_time
                    ]
                    cline = unicodify('\t'.join(line) + '\n')
                    handle_job.write(cline)
                except Exception:
                    logging.warning(
                        "Unable to write out a 'handle_job' row. Ignoring the row.",
                        exc_info=True)
                    continue
                # meta counts
                job_state_data[job[4]] += 1
                active_users[job[1]] += 1
                job_tool_map[job[0]] = job[2]
    annotate('export_jobs_end')

    annotate('export_datasets_start', 'Exporting Datasets')
    with io.open(REPORT_BASE + '.datasets.tsv', 'w',
                 encoding='utf-8') as handle_datasets:
        handle_datasets.write(u'\t'.join(('job_id', 'dataset_id', 'extension',
                                          'file_size', 'param_name', 'type')) +
                              '\n')
        for offset_start in range(last_job_sent, end_job_id, args.batch_size):
            logging.debug("Processing %s:%s", offset_start,
                          min(end_job_id, offset_start + args.batch_size))

            # four queries: JobToInputDatasetAssociation, JobToOutputDatasetAssociation, HistoryDatasetAssociation, Dataset

            job_to_input_hda_ids = sa_session.query(model.JobToInputDatasetAssociation.job_id, model.JobToInputDatasetAssociation.dataset_id,
                model.JobToInputDatasetAssociation.name) \
                .filter(model.JobToInputDatasetAssociation.job_id > offset_start) \
                .filter(model.JobToInputDatasetAssociation.job_id <= min(end_job_id, offset_start + args.batch_size)) \
                .all()

            job_to_output_hda_ids = sa_session.query(model.JobToOutputDatasetAssociation.job_id, model.JobToOutputDatasetAssociation.dataset_id,
                model.JobToOutputDatasetAssociation.name) \
                .filter(model.JobToOutputDatasetAssociation.job_id > offset_start) \
                .filter(model.JobToOutputDatasetAssociation.job_id <= min(end_job_id, offset_start + args.batch_size)) \
                .all()

            # add type and concat
            job_to_hda_ids = [[list(i), "input"] for i in job_to_input_hda_ids
                              ] + [[list(i), "output"]
                                   for i in job_to_output_hda_ids]

            # put all of the hda_ids into a list
            hda_ids = [i[0][1] for i in job_to_hda_ids]

            hdas = sa_session.query(model.HistoryDatasetAssociation.id, model.HistoryDatasetAssociation.dataset_id,
                model.HistoryDatasetAssociation.extension) \
                .filter(model.HistoryDatasetAssociation.id.in_(hda_ids)) \
                .all()

            # put all the dataset ids into a list
            dataset_ids = [i[1] for i in hdas]

            # get the sizes of the datasets
            datasets = sa_session.query(model.Dataset.id, model.Dataset.total_size) \
                .filter(model.Dataset.id.in_(dataset_ids)) \
                .all()

            # datasets to dictionay for easy search
            hdas = {i[0]: i[1:] for i in hdas}
            datasets = {i[0]: i[1:] for i in datasets}

            for job_to_hda in job_to_hda_ids:

                job = job_to_hda[0]  # job_id, hda_id, name
                filetype = job_to_hda[1]  # input|output

                # No associated job
                if job[0] not in job_tool_map:
                    continue

                # If the tool is blacklisted, exclude everywhere
                if job_tool_map[job[0]] in blacklisted_tools:
                    continue

                hda_id = job[1]
                if hda_id is None:
                    continue

                dataset_id = hdas[hda_id][0]
                if dataset_id is None:
                    continue

                try:
                    line = [
                        str(job[0]),  # Job ID
                        str(hda_id),  # HDA ID
                        str(hdas[hda_id][1]),  # Extension
                        round_to_2sd(datasets[dataset_id][0]),  # File size
                        job[2],  # Parameter name
                        str(filetype)  # input/output
                    ]
                    cline = unicodify('\t'.join(line) + '\n')
                    handle_datasets.write(cline)
                except Exception:
                    logging.warning(
                        "Unable to write out a 'handle_datasets' row. Ignoring the row.",
                        exc_info=True)
                    continue
    annotate('export_datasets_end')

    annotate('export_metric_num_start', 'Exporting Metrics (Numeric)')
    with io.open(REPORT_BASE + '.metric_num.tsv', 'w',
                 encoding='utf-8') as handle_metric_num:
        handle_metric_num.write(u'\t'.join(('job_id', 'plugin', 'name',
                                            'value')) + '\n')
        for offset_start in range(last_job_sent, end_job_id, args.batch_size):
            logging.debug("Processing %s:%s", offset_start,
                          min(end_job_id, offset_start + args.batch_size))
            for metric in sa_session.query(model.JobMetricNumeric.job_id, model.JobMetricNumeric.plugin, model.JobMetricNumeric.metric_name, model.JobMetricNumeric.metric_value) \
                    .filter(model.JobMetricNumeric.job_id > offset_start) \
                    .filter(model.JobMetricNumeric.job_id <= min(end_job_id, offset_start + args.batch_size)) \
                    .all():
                # No associated job
                if metric[0] not in job_tool_map:
                    continue
                # If the tool is blacklisted, exclude everywhere
                if job_tool_map[metric[0]] in blacklisted_tools:
                    continue

                try:
                    line = [
                        str(metric[0]),  # job id
                        metric[1],  # plugin
                        metric[2],  # name
                        str(metric[3])  # value
                    ]

                    cline = unicodify('\t'.join(line) + '\n')
                    handle_metric_num.write(cline)
                except Exception:
                    logging.warning(
                        "Unable to write out a 'handle_metric_num' row. Ignoring the row.",
                        exc_info=True)
                    continue
    annotate('export_metric_num_end')

    # Now on to outputs.
    with tarfile.open(REPORT_BASE + '.tar.gz', 'w:gz') as handle:
        for name in ('jobs', 'metric_num', 'datasets'):
            path = REPORT_BASE + '.' + name + '.tsv'
            if os.path.exists(path):
                handle.add(path)

    for name in ('jobs', 'metric_num', 'datasets'):
        path = REPORT_BASE + '.' + name + '.tsv'
        if os.path.exists(path):
            os.unlink(REPORT_BASE + '.' + name + '.tsv')

    _times.append(('job_finish', time.time() - _start_time))
    sha = hash_util.memory_bound_hexdigest(hash_func=hash_util.sha256,
                                           path=REPORT_BASE + ".tar.gz")
    _times.append(('hash_finish', time.time() - _start_time))

    # Now serialize the individual report data.
    with open(REPORT_BASE + '.json', 'w') as handle:
        json.dump(
            {
                "version": 3,
                "galaxy_version": gxconfig.version_major,
                "generated": REPORT_IDENTIFIER,
                "report_hash": "sha256:" + sha,
                "metrics": {
                    "_times": _times,
                },
                "users": {
                    "active": len(active_users.keys()),
                    "total": sa_session.query(model.User.id).count(),
                },
                "jobs": job_state_data,
            }, handle)

    # Write our checkpoint file so we know where to start next time.
    with open(CHECK_POINT_FILE, 'w') as handle:
        handle.write(str(end_job_id))
Beispiel #41
0
def set_metadata_legacy():
    import galaxy.model
    galaxy.model.metadata.MetadataTempFile.tmp_dir = tool_job_working_directory = os.path.abspath(
        os.getcwd())

    # This is ugly, but to transition from existing jobs without this parameter
    # to ones with, smoothly, it has to be the last optional parameter and we
    # have to sniff it.
    try:
        max_metadata_value_size = int(sys.argv[-1])
        sys.argv = sys.argv[:-1]
    except ValueError:
        max_metadata_value_size = 0
        # max_metadata_value_size is unspecified and should be 0

    # Set up datatypes registry
    datatypes_config = sys.argv.pop(1)
    datatypes_registry = validate_and_load_datatypes_config(datatypes_config)

    job_metadata = sys.argv.pop(1)
    tool_provided_metadata = load_job_metadata(job_metadata)

    def set_meta(new_dataset_instance, file_dict):
        set_meta_with_tool_provided(new_dataset_instance, file_dict,
                                    set_meta_kwds, datatypes_registry,
                                    max_metadata_value_size)

    for filenames in sys.argv[1:]:
        fields = filenames.split(',')
        filename_in = fields.pop(0)
        filename_kwds = fields.pop(0)
        filename_out = fields.pop(0)
        filename_results_code = fields.pop(0)
        dataset_filename_override = fields.pop(0)
        override_metadata = fields.pop(0)
        set_meta_kwds = stringify_dictionary_keys(
            json.load(open(filename_kwds))
        )  # load kwds; need to ensure our keywords are not unicode
        try:
            dataset = cPickle.load(open(filename_in,
                                        'rb'))  # load DatasetInstance
            dataset.dataset.external_filename = dataset_filename_override
            store_by = set_meta_kwds.get("object_store_store_by", "id")
            extra_files_dir_name = "dataset_%s_files" % getattr(
                dataset.dataset, store_by)
            files_path = os.path.abspath(
                os.path.join(tool_job_working_directory, "working",
                             extra_files_dir_name))
            dataset.dataset.external_extra_files_path = files_path
            file_dict = tool_provided_metadata.get_dataset_meta(
                None, dataset.dataset.id)
            if 'ext' in file_dict:
                dataset.extension = file_dict['ext']
            # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles
            override_metadata = json.load(open(override_metadata))
            for metadata_name, metadata_file_override in override_metadata:
                if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value(
                        metadata_file_override):
                    metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON(
                        metadata_file_override)
                setattr(dataset.metadata, metadata_name,
                        metadata_file_override)
            set_meta(dataset, file_dict)
            dataset.metadata.to_JSON_dict(
                filename_out)  # write out results of set_meta
            json.dump((True, 'Metadata has been set successfully'),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has succeeded
        except Exception as e:
            json.dump((False, unicodify(e)),
                      open(filename_results_code,
                           'wt+'))  # setting metadata has failed somehow

    write_job_metadata(tool_job_working_directory, job_metadata, set_meta,
                       tool_provided_metadata)
Beispiel #42
0
def handle_compressed_file(
    filename,
    datatypes_registry,
    ext='auto',
    tmp_prefix='sniff_uncompress_',
    tmp_dir=None,
    in_place=False,
    check_content=True,
    auto_decompress=True,
):
    """
    Check uploaded files for compression, check compressed file contents, and uncompress if necessary.

    Supports GZip, BZip2, and the first file in a Zip file.

    For performance reasons, the temporary file used for uncompression is located in the same directory as the
    input/output file. This behavior can be changed with the `tmp_dir` param.

    ``ext`` as returned will only be changed from the ``ext`` input param if the param was an autodetect type (``auto``)
    and the file was sniffed as a keep-compressed datatype.

    ``is_valid`` as returned will only be set if the file is compressed and contains invalid contents (or the first file
    in the case of a zip file), this is so lengthy decompression can be bypassed if there is invalid content in the
    first 32KB. Otherwise the caller should be checking content.
    """
    CHUNK_SIZE = 2**20  # 1Mb
    is_compressed = False
    compressed_type = None
    keep_compressed = False
    is_valid = False
    uncompressed = filename
    tmp_dir = tmp_dir or os.path.dirname(filename)
    for key, check_compressed_function in COMPRESSION_CHECK_FUNCTIONS:
        is_compressed, is_valid = check_compressed_function(
            filename, check_content=check_content)
        if is_compressed:
            compressed_type = key
            break  # found compression type
    if is_compressed and is_valid:
        if ext in AUTO_DETECT_EXTENSIONS:
            # attempt to sniff for a keep-compressed datatype (observing the sniff order)
            sniff_datatypes = filter(lambda d: getattr(d, 'compressed', False),
                                     datatypes_registry.sniff_order)
            sniffed_ext = run_sniffers_raw(filename, sniff_datatypes)
            if sniffed_ext:
                ext = sniffed_ext
                keep_compressed = True
        else:
            datatype = datatypes_registry.get_datatype_by_extension(ext)
            keep_compressed = getattr(datatype, 'compressed', False)
    # don't waste time decompressing if we sniff invalid contents
    if is_compressed and is_valid and auto_decompress and not keep_compressed:
        with tempfile.NamedTemporaryFile(prefix=tmp_prefix,
                                         dir=tmp_dir,
                                         delete=False) as uncompressed:
            compressed_file = DECOMPRESSION_FUNCTIONS[compressed_type](
                filename)
            # TODO: it'd be ideal to convert to posix newlines and space-to-tab here as well
            while True:
                try:
                    chunk = compressed_file.read(CHUNK_SIZE)
                except OSError as e:
                    os.remove(uncompressed.name)
                    compressed_file.close()
                    raise OSError(
                        'Problem uncompressing {} data, please try retrieving the data uncompressed: {}'
                        .format(compressed_type, util.unicodify(e)))
                if not chunk:
                    break
                uncompressed.write(chunk)
        uncompressed = uncompressed.name
        compressed_file.close()
        if in_place:
            # Replace the compressed file with the uncompressed file
            shutil.move(uncompressed, filename)
            uncompressed = filename
    elif not is_compressed or not check_content:
        is_valid = True
    return is_valid, ext, uncompressed, compressed_type
Beispiel #43
0
def build_readme_files_dict(app, repository, changeset_revision, metadata, tool_path=None):
    """
    Return a dictionary of valid readme file name <-> readme file content pairs for all readme files defined in the received metadata.  Since the
    received changeset_revision (which is associated with the received metadata) may not be the latest installable changeset revision, the README
    file contents may not be available on disk.  This method is used by both Galaxy and the Tool Shed.
    """
    if app.name == 'galaxy':
        can_use_disk_files = True
    else:
        latest_downloadable_changeset_revision = metadata_util.get_latest_downloadable_changeset_revision(app, repository)
        can_use_disk_files = changeset_revision == latest_downloadable_changeset_revision
    readme_files_dict = {}
    if metadata:
        if 'readme_files' in metadata:
            for relative_path_to_readme_file in metadata['readme_files']:
                readme_file_name = os.path.split(relative_path_to_readme_file)[1]
                if can_use_disk_files:
                    if tool_path:
                        full_path_to_readme_file = os.path.abspath(os.path.join(tool_path, relative_path_to_readme_file))
                    else:
                        full_path_to_readme_file = os.path.abspath(relative_path_to_readme_file)
                    text = None
                    try:
                        with open(full_path_to_readme_file, encoding='utf-8') as f:
                            text = f.read()
                    except Exception:
                        log.exception("Error reading README file '%s' from disk", relative_path_to_readme_file)
                        text = None
                    if text:
                        text_of_reasonable_length = basic_util.size_string(text)
                        if text_of_reasonable_length.find('.. image:: ') >= 0:
                            # Handle image display for README files that are contained in repositories in the tool shed or installed into Galaxy.
                            try:
                                text_of_reasonable_length = suc.set_image_paths(app,
                                                                                text_of_reasonable_length,
                                                                                encoded_repository_id=app.security.encode_id(repository.id))
                            except Exception:
                                log.exception("Exception in build_readme_files_dict, so images may not be properly displayed")
                        if readme_file_name.endswith('.rst'):
                            text_of_reasonable_length = Template(rst_to_html(text_of_reasonable_length),
                                                                 input_encoding='utf-8',
                                                                 default_filters=['decode.utf8'],
                                                                 encoding_errors='replace')
                            text_of_reasonable_length = text_of_reasonable_length.render(static_path=web.url_for('/static'),
                                                                                         host_url=web.url_for('/', qualified=True))
                            text_of_reasonable_length = unicodify(text_of_reasonable_length)
                        else:
                            text_of_reasonable_length = basic_util.to_html_string(text_of_reasonable_length)
                        readme_files_dict[readme_file_name] = text_of_reasonable_length
                else:
                    # We must be in the tool shed and have an old changeset_revision, so we need to retrieve the file contents from the repository manifest.
                    repo = repository.hg_repo
                    ctx = hg_util.get_changectx_for_changeset(repo, changeset_revision)
                    if ctx:
                        fctx = hg_util.get_file_context_from_ctx(ctx, readme_file_name)
                        if fctx and fctx not in ['DELETED']:
                            try:
                                text = unicodify(fctx.data())
                                readme_files_dict[readme_file_name] = basic_util.size_string(text)
                            except Exception:
                                log.exception("Error reading README file '%s' from repository manifest", relative_path_to_readme_file)
    return readme_files_dict
Beispiel #44
0
    def submit_report(self, dataset, job, tool, **kwargs):
        """Submit the error report to sentry
        """
        log.info(self.github)

        if self.github:

            # Determine the ToolShed url, initially we connect with HTTP and if redirect to HTTPS is set up,
            # this will be detected by requests and used further down the line. Also cache this so everything is
            # as fast as possible
            log.info(tool.tool_shed)
            ts_url = self._determine_ts_url(tool)
            log.info("GitLab error reporting - Determined ToolShed is %s",
                     ts_url)

            # Find the repo inside the ToolShed
            ts_repourl = self._get_gitrepo_from_ts(job, ts_url)

            # Determine the GitLab project URL and the issue cache key
            github_projecturl = urlparse.urlparse(ts_repourl).path[1:] if (ts_repourl and not self.git_default_repo_only) \
                else "/".join([self.git_default_repo_owner, self.git_default_repo_name])
            issue_cache_key = self._get_issue_cache_key(job, ts_repourl)

            # Connect to the repo
            if github_projecturl not in self.git_project_cache:
                self.git_project_cache[
                    github_projecturl] = self.github.get_repo(
                        '%s' % github_projecturl)
            gh_project = self.git_project_cache[github_projecturl]

            # Make sure we keep a cache of the issues, per tool in this case
            if issue_cache_key not in self.issue_cache:
                self._fill_issue_cache(gh_project, issue_cache_key)

            # Retrieve label
            label = self.get_label(
                '%s/%s' %
                (unicodify(job.tool_id), unicodify(job.tool_version)),
                gh_project, issue_cache_key)

            # Generate information for the tool
            error_title = self._generate_error_title(job)

            # Generate the error message
            error_message = self._generate_error_message(dataset, job, kwargs)

            log.info(error_title in self.issue_cache[issue_cache_key])
            if error_title not in self.issue_cache[issue_cache_key]:
                # Create a new issue.
                self._create_issue(issue_cache_key,
                                   error_title,
                                   error_message,
                                   gh_project,
                                   label=label)
            else:
                self._append_issue(issue_cache_key, error_title, error_message)
            return (
                'Submitted error report to Github. Your issue number is <a href="%s/%s/issues/%s" '
                'target="_blank">#%s</a>.' %
                (self.github_base_url, github_projecturl,
                 self.issue_cache[issue_cache_key][error_title].number,
                 self.issue_cache[issue_cache_key][error_title].number),
                'success')
Beispiel #45
0
 def _upload_dataset(self,
                     trans,
                     library_id,
                     folder_id,
                     replace_dataset=None,
                     **kwd):
     # Set up the traditional tool state/params
     cntrller = 'api'
     tool_id = 'upload1'
     message = None
     file_type = kwd.get('file_type')
     try:
         upload_common.validate_datatype_extension(
             datatypes_registry=trans.app.datatypes_registry, ext=file_type)
     except RequestParameterInvalidException as e:
         return (400, util.unicodify(e))
     tool = trans.app.toolbox.get_tool(tool_id)
     state = tool.new_state(trans)
     populate_state(trans, tool.inputs, kwd, state.inputs)
     tool_params = state.inputs
     dataset_upload_inputs = []
     for input in tool.inputs.values():
         if input.type == "upload_dataset":
             dataset_upload_inputs.append(input)
     # Library-specific params
     server_dir = kwd.get('server_dir', '')
     upload_option = kwd.get('upload_option', 'upload_file')
     response_code = 200
     if upload_option == 'upload_directory':
         full_dir, import_dir_desc = validate_server_directory_upload(
             trans, server_dir)
         message = 'Select a directory'
     elif upload_option == 'upload_paths':
         # Library API already checked this - following check isn't actually needed.
         validate_path_upload(trans)
     # Some error handling should be added to this method.
     try:
         # FIXME: instead of passing params here ( which have been processed by util.Params(), the original kwd
         # should be passed so that complex objects that may have been included in the initial request remain.
         library_bunch = upload_common.handle_library_params(
             trans, kwd, folder_id, replace_dataset)
     except Exception:
         response_code = 500
         message = "Unable to parse upload parameters, please report this error."
     # Proceed with (mostly) regular upload processing if we're still errorless
     if response_code == 200:
         if upload_option == 'upload_file':
             tool_params = upload_common.persist_uploads(tool_params, trans)
             uploaded_datasets = upload_common.get_uploaded_datasets(
                 trans,
                 cntrller,
                 tool_params,
                 dataset_upload_inputs,
                 library_bunch=library_bunch)
         elif upload_option == 'upload_directory':
             uploaded_datasets, response_code, message = self._get_server_dir_uploaded_datasets(
                 trans, kwd, full_dir, import_dir_desc, library_bunch,
                 response_code, message)
         elif upload_option == 'upload_paths':
             uploaded_datasets, response_code, message = self._get_path_paste_uploaded_datasets(
                 trans, kwd, library_bunch, response_code, message)
         if upload_option == 'upload_file' and not uploaded_datasets:
             response_code = 400
             message = 'Select a file, enter a URL or enter text'
     if response_code != 200:
         return (response_code, message)
     json_file_path = upload_common.create_paramfile(
         trans, uploaded_datasets)
     data_list = [ud.data for ud in uploaded_datasets]
     job_params = {}
     job_params['link_data_only'] = json.dumps(
         kwd.get('link_data_only', 'copy_files'))
     job_params['uuid'] = json.dumps(kwd.get('uuid', None))
     job, output = upload_common.create_job(trans,
                                            tool_params,
                                            tool,
                                            json_file_path,
                                            data_list,
                                            folder=library_bunch.folder,
                                            job_params=job_params)
     trans.sa_session.add(job)
     trans.sa_session.flush()
     return output
Beispiel #46
0
    def index(self, trans, library_id, **kwd):
        """
        index( self, trans, library_id, **kwd )
        * GET /api/libraries/{library_id}/contents:
            Returns a list of library files and folders.

        .. note:: May be slow! Returns all content traversing recursively through all folders.
        .. seealso:: :class:`galaxy.webapps.galaxy.api.FolderContentsController.index` for a non-recursive solution

        :param  library_id: the encoded id of the library
        :type   library_id: str

        :returns:   list of dictionaries of the form:
            * id:   the encoded id of the library item
            * name: the 'library path'
                or relationship of the library item to the root
            * type: 'file' or 'folder'
            * url:  the url to get detailed information on the library item
        :rtype:     list

        :raises:  MalformedId, InconsistentDatabase, RequestParameterInvalidException, InternalServerError
        """
        rval = []
        current_user_roles = trans.get_current_user_roles()

        def traverse(folder):
            admin = trans.user_is_admin
            rval = []
            for subfolder in folder.active_folders:
                if not admin:
                    can_access, folder_ids = trans.app.security_agent.check_folder_contents(
                        trans.user, current_user_roles, subfolder)
                if (admin or can_access) and not subfolder.deleted:
                    subfolder.api_path = folder.api_path + '/' + subfolder.name
                    subfolder.api_type = 'folder'
                    rval.append(subfolder)
                    rval.extend(traverse(subfolder))
            for ld in folder.datasets:
                if not admin:
                    can_access = trans.app.security_agent.can_access_dataset(
                        current_user_roles,
                        ld.library_dataset_dataset_association.dataset)
                if (admin or can_access) and not ld.deleted:
                    ld.api_path = folder.api_path + '/' + ld.name
                    ld.api_type = 'file'
                    rval.append(ld)
            return rval

        try:
            decoded_library_id = self.decode_id(library_id)
        except Exception:
            raise exceptions.MalformedId(
                'Malformed library id ( %s ) specified, unable to decode.' %
                library_id)
        try:
            library = trans.sa_session.query(trans.app.model.Library).filter(
                trans.app.model.Library.table.c.id ==
                decoded_library_id).one()
        except MultipleResultsFound:
            raise exceptions.InconsistentDatabase(
                'Multiple libraries found with the same id.')
        except NoResultFound:
            raise exceptions.RequestParameterInvalidException(
                'No library found with the id provided.')
        except Exception as e:
            raise exceptions.InternalServerError(
                'Error loading from the database.' + util.unicodify(e))
        if not (trans.user_is_admin
                or trans.app.security_agent.can_access_library(
                    current_user_roles, library)):
            raise exceptions.RequestParameterInvalidException(
                'No library found with the id provided.')
        encoded_id = 'F' + trans.security.encode_id(library.root_folder.id)
        # appending root folder
        rval.append(
            dict(id=encoded_id,
                 type='folder',
                 name='/',
                 url=url_for('library_content',
                             library_id=library_id,
                             id=encoded_id)))
        library.root_folder.api_path = ''
        # appending all other items in the library recursively
        for content in traverse(library.root_folder):
            encoded_id = trans.security.encode_id(content.id)
            if content.api_type == 'folder':
                encoded_id = 'F' + encoded_id
            rval.append(
                dict(id=encoded_id,
                     type=content.api_type,
                     name=content.api_path,
                     url=url_for(
                         'library_content',
                         library_id=library_id,
                         id=encoded_id,
                     )))
        return rval
Beispiel #47
0
    def _get_drmaa_state_qacct(self, job_id, extinfo):
        '''
        get the job (drmaa) state with qacct.

        extinfo: dict where signal, exit_status, deleted = True, time_wasted, and memory_wasted can be stored:
        - signal signal as reported in exit state from qstat (see below)
        - exit_status set to exit status if returned (ie if qstat returns an exits state
            larger 0 and less 129 (for exit states > 128 signal is set)
            in any case (exit state > 0) state FAILED is returned
        - deleted set to true if the job was deleted (otherwise not set at all),
        - time_wasted time used in seconds (taken from wallclock)
        - memory_wasted memory used by the program in byte (taken from maxvmem)

        return state
        - first initalised with UNDETERMINED and changed in the following case
        - DONE if exit state == 0
        - FAILED if exit state != 0
        - RUNNING if failed in 24,25
        - FAILED if failed not in [0,24,25,100]
        '''
        # log.debug("UnivaJobRunner._get_drmaa_state_qacct ({jobid})".format(jobid=job_id))
        signals = {k: v for v, k in reversed(sorted(signal.__dict__.items()))
           if v.startswith('SIG') and not v.startswith('SIG_')}
        cmd = ['qacct', '-j', job_id]
        slp = 1
        # run qacct -j JOBID (since the accounting data for the job might not be
        # available immediately a simple retry mechanism is implemented ..
        # max wait is approx 1min)
        while True:
            try:
                stdout = commands.execute(cmd).strip()
            except commands.CommandLineException as e:
                if slp <= 32 and f"job id {job_id} not found" in e.stderr:
                    time.sleep(slp)
                    slp *= 2
                    continue
                else:
                    log.error(unicodify(e))
                    return self.drmaa.JobState.UNDETERMINED
            else:
                break
        qacct = dict()
        for line in stdout.split("\n"):
            # remove header
            if line.startswith("=") or line == "":
                continue
            line = line.split()
            qacct[line[0]] = " ".join(line[1:])
        # qacct has three fields of interest: failed, exit_status, deleted_by
        # experiments
        #            failed  exit_status deleted_by
        # BASH ------------------------------------
        # time-limit 100     137
        # mem-limit  0       2
        # python --------------------------------------------------------------
        # time-limit
        # mem-limit  0       1
        # C -------------------------------------------------------------------
        # time-limit
        # mem-limit  0       C programm either have segfault (139) or allocated memory is checked for NULL (then a programmer defined message/exit code is given)
        #                    note that max_vmem might not be reliable, since the program never gets the memory.
        # C++ -----------------------------------------------------------------
        # time-limit
        # mem-limit  0       same as for C programs
        # JAVA ----------------------------------------------------------------
        # time-limit
        # mem-limit
        # perl ----------------------------------------------------------------
        # time-limit
        # mem-limit
        # bash other tests ----------------------------------------------------
        # qdel       100     137          user@mail

        extinfo["time_wasted"] = _parse_time(qacct["wallclock"])
        extinfo["memory_wasted"] = size_to_bytes(qacct["maxvmem"])
        extinfo["slots"] = int(qacct["slots"])

        # deleted_by
        # If the job (the array task) has been deleted via qdel, "<username>@<hostname>", else
        # "NONE". If qdel was called multiple times, every invocation is recorded in a comma
        # separated list.
        if "deleted_by" in qacct and qacct["deleted_by"] != "NONE":
            log.info(f"DRMAAUniva: job {job_id} was aborted by {qacct['deleted_by']}")
            extinfo["deleted"] = True
            return self.drmaa.JobState.FAILED

        state = self.drmaa.JobState.UNDETERMINED
        # exit_status
        # Exit status of the job script (or Univa Grid Engine specific status in case of certain
        # error conditions). The exit status is determined by following the normal shell conventions
        # If the command terminates normally the value of the command is its exit status.
        # However, in the case that the command exits abnormally, a value of 0200 (octal), 128
        # (decimal) is added to the value of the command to make up the exit status.
        # For example: If a job dies through signal 9 (SIGKILL) then the exit status
        # becomes 128 + 9 = 137.
        if "exit_status" in qacct:
            qacct["exit_status"] = int(qacct["exit_status"])
            if qacct["exit_status"] < 1:
                log.error(f"DRMAAUniva: job {job_id} has exit status {qacct['exit_status']}")
                state = self.drmaa.JobState.DONE
            elif 0 < qacct["exit_status"] < 129:
                log.error(f"DRMAAUniva: job {job_id} has exit status {qacct['exit_status']}")
                extinfo['exit_status'] = qacct["exit_status"]
                state = self.drmaa.JobState.FAILED
            else:
                log.error(f"DRMAAUniva: job {job_id} was killed by signal {qacct['exit_status'] - 128}")
                state = self.drmaa.JobState.FAILED
                extinfo["signal"] = signals[qacct["exit_status"] - 128]

        # failed
        # Indicates the problem which occurred in case a job could not be started on the execution
        # host (e.g. because the owner of the job did not have a valid account on that
        # machine). If Univa Grid Engine tries to start a job multiple times, this may lead to
        # multiple entries in the accounting file corresponding to the same job ID.
        # for the codes see https://docs.oracle.com/cd/E19957-01/820-0699/chp11-2/index.html
        if "failed" in qacct:
            code = int(qacct["failed"].split()[0])
            # this happens in case of no error or exit_code!=0 (0) or a signal (100).
            # both cases are covered already
            if code in [0, 100]:
                pass
            # these seem to be OK as well
            elif code in [24, 25]:
                state = self.drmaa.JobState.RUNNING
            else:
                log.error(f"DRMAAUniva: job {job_id} failed with failure {qacct['failed']}")
                state = self.drmaa.JobState.FAILED
        # log.debug("UnivaJobRunner._get_drmaa_state_qacct ({jobid}) -> {state}".format(jobid=job_id, state=self.drmaa_job_state_strings[state]))
        return state
Beispiel #48
0
    def delete(self, trans, library_id, id, **kwd):
        """
        delete( self, trans, library_id, id, **kwd )
        * DELETE /api/libraries/{library_id}/contents/{id}
            delete the LibraryDataset with the given ``id``

        :type   id:     str
        :param  id:     the encoded id of the library dataset to delete
        :type   kwd:    dict
        :param  kwd:    (optional) dictionary structure containing:

            * payload:     a dictionary itself containing:
                * purge:   if True, purge the LD

        :rtype:     dict
        :returns:   an error object if an error occurred or a dictionary containing:
            * id:         the encoded id of the library dataset,
            * deleted:    if the library dataset was marked as deleted,
            * purged:     if the library dataset was purged
        """
        # a request body is optional here
        purge = False
        if kwd.get('payload', None):
            purge = util.string_as_bool(kwd['payload'].get('purge', False))

        rval = {'id': id}
        try:
            ld = self.get_library_dataset(trans,
                                          id,
                                          check_ownership=False,
                                          check_accessible=True)
            user_is_admin = trans.user_is_admin
            can_modify = trans.app.security_agent.can_modify_library_item(
                trans.user.all_roles(), ld)
            log.debug('is_admin: %s, can_modify: %s', user_is_admin,
                      can_modify)
            if not (user_is_admin or can_modify):
                trans.response.status = 403
                rval.update({
                    'error':
                    'Unauthorized to delete or purge this library dataset'
                })
                return rval

            ld.deleted = True
            if purge:
                ld.purged = True
                trans.sa_session.add(ld)
                trans.sa_session.flush()

                # TODO: had to change this up a bit from Dataset.user_can_purge
                dataset = ld.library_dataset_dataset_association.dataset
                no_history_assoc = len(dataset.history_associations) == len(
                    dataset.purged_history_associations)
                no_library_assoc = dataset.library_associations == [
                    ld.library_dataset_dataset_association
                ]
                can_purge_dataset = not dataset.purged and no_history_assoc and no_library_assoc

                if can_purge_dataset:
                    try:
                        ld.library_dataset_dataset_association.dataset.full_delete(
                        )
                        trans.sa_session.add(ld.dataset)
                    except Exception:
                        pass
                    # flush now to preserve deleted state in case of later interruption
                    trans.sa_session.flush()
                rval['purged'] = True
            trans.sa_session.flush()
            rval['deleted'] = True

        except exceptions.httpexceptions.HTTPInternalServerError:
            log.exception(
                'Library_contents API, delete: uncaught HTTPInternalServerError: %s, %s',
                id, str(kwd))
            raise
        except exceptions.httpexceptions.HTTPException:
            raise
        except Exception as exc:
            log.exception(
                'library_contents API, delete: uncaught exception: %s, %s', id,
                str(kwd))
            trans.response.status = 500
            rval.update({'error': util.unicodify(exc)})
        return rval
 def get_value(self, trans, grid, tool_shed_repository):
     return util.unicodify(tool_shed_repository.description)
Beispiel #50
0
def verify_tool(tool_id,
                galaxy_interactor,
                resource_parameters=None,
                register_job_data=None,
                test_index=0,
                tool_version=None,
                quiet=False,
                test_history=None,
                force_path_paste=False,
                maxseconds=DEFAULT_TOOL_TEST_WAIT,
                tool_test_dicts=None):
    if resource_parameters is None:
        resource_parameters = {}
    tool_test_dicts = tool_test_dicts or galaxy_interactor.get_tool_tests(tool_id, tool_version=tool_version)
    tool_test_dict = tool_test_dicts[test_index]
    tool_test_dict.setdefault('maxseconds', maxseconds)
    testdef = ToolTestDescription(tool_test_dict)
    _handle_def_errors(testdef)

    if test_history is None:
        test_history = galaxy_interactor.new_history()

    stage_data_in_history(galaxy_interactor,
                          tool_id,
                          testdef.test_data(),
                          history=test_history,
                          force_path_paste=force_path_paste,
                          maxseconds=maxseconds)

    # Once data is ready, run the tool and check the outputs - record API
    # input, job info, tool run exception, as well as exceptions related to
    # job output checking and register they with the test plugin so it can
    # record structured information.
    tool_inputs = None
    job_stdio = None
    job_output_exceptions = None
    tool_execution_exception = None
    expected_failure_occurred = False
    begin_time = time.time()
    try:
        try:
            tool_response = galaxy_interactor.run_tool(testdef, test_history, resource_parameters=resource_parameters)
            data_list, jobs, tool_inputs = tool_response.outputs, tool_response.jobs, tool_response.inputs
            data_collection_list = tool_response.output_collections
        except RunToolException as e:
            tool_inputs = e.inputs
            tool_execution_exception = e
            if not testdef.expect_failure:
                raise e
            else:
                expected_failure_occurred = True
        except Exception as e:
            tool_execution_exception = e
            raise e

        if not expected_failure_occurred:
            assert data_list or data_collection_list

            try:
                job_stdio = _verify_outputs(testdef, test_history, jobs, tool_id, data_list, data_collection_list, galaxy_interactor, quiet=quiet)
            except JobOutputsError as e:
                job_stdio = e.job_stdio
                job_output_exceptions = e.output_exceptions
                raise e
            except Exception as e:
                job_output_exceptions = [e]
                raise e
    finally:
        if register_job_data is not None:
            end_time = time.time()
            job_data = {
                "tool_id": tool_id,
                "tool_version": tool_version,
                "test_index": test_index,
                "time_seconds": end_time - begin_time,
            }
            if tool_inputs is not None:
                job_data["inputs"] = tool_inputs
            if job_stdio is not None:
                job_data["job"] = job_stdio
            status = "success"
            if job_output_exceptions:
                job_data["output_problems"] = [util.unicodify(_) for _ in job_output_exceptions]
                status = "failure"
            if tool_execution_exception:
                job_data["execution_problem"] = util.unicodify(tool_execution_exception)
                status = "error"
            job_data["status"] = status
            register_job_data(job_data)

    galaxy_interactor.delete_history(test_history)
Beispiel #51
0
 def get_html(self, prefix="", disabled=False):
     return unicodify('<textarea name="%s%s" rows="%d" cols="%d"%s>%s</textarea>'
                      % (prefix, self.name, self.rows, self.cols, self.get_disabled_str(disabled), escape(str(self.value), quote=True)))
Beispiel #52
0
def _verify_outputs(testdef, history, jobs, tool_id, data_list, data_collection_list, galaxy_interactor, quiet=False):
    assert len(jobs) == 1, "Test framework logic error, somehow tool test resulted in more than one job."
    job = jobs[0]

    maxseconds = testdef.maxseconds
    if testdef.num_outputs is not None:
        expected = testdef.num_outputs
        actual = len(data_list) + len(data_collection_list)
        if expected != actual:
            message_template = "Incorrect number of outputs - expected %d, found %s."
            message = message_template % (expected, actual)
            raise Exception(message)
    found_exceptions = []

    def register_exception(e):
        if not found_exceptions and not quiet:
            # Only print this stuff out once.
            for stream in ['stdout', 'stderr']:
                if stream in job_stdio:
                    print(_format_stream(job_stdio[stream], stream=stream, format=True), file=sys.stderr)
        found_exceptions.append(e)

    if testdef.expect_failure:
        if testdef.outputs:
            raise Exception("Cannot specify outputs in a test expecting failure.")

    # Wait for the job to complete and register expections if the final
    # status was not what test was expecting.
    job_failed = False
    try:
        galaxy_interactor.wait_for_job(job['id'], history, maxseconds)
    except Exception as e:
        job_failed = True
        if not testdef.expect_failure:
            found_exceptions.append(e)

    job_stdio = galaxy_interactor.get_job_stdio(job['id'])

    if not job_failed and testdef.expect_failure:
        error = AssertionError("Expected job to fail but Galaxy indicated the job successfully completed.")
        register_exception(error)

    expect_exit_code = testdef.expect_exit_code
    if expect_exit_code is not None:
        exit_code = job_stdio["exit_code"]
        if str(expect_exit_code) != str(exit_code):
            error = AssertionError("Expected job to complete with exit code %s, found %s" % (expect_exit_code, exit_code))
            register_exception(error)

    for output_index, output_dict in enumerate(testdef.outputs):
        # Get the correct hid
        name = output_dict["name"]
        outfile = output_dict["value"]
        attributes = output_dict["attributes"]
        output_testdef = Bunch(name=name, outfile=outfile, attributes=attributes)
        try:
            output_data = data_list[name]
        except (TypeError, KeyError):
            # Legacy - fall back on ordered data list access if data_list is
            # just a list (case with twill variant or if output changes its
            # name).
            if hasattr(data_list, "values"):
                output_data = list(data_list.values())[output_index]
            else:
                output_data = data_list[len(data_list) - len(testdef.outputs) + output_index]
        assert output_data is not None
        try:
            galaxy_interactor.verify_output(history, jobs, output_data, output_testdef=output_testdef, tool_id=tool_id, maxseconds=maxseconds)
        except Exception as e:
            register_exception(e)

    other_checks = {
        "command_line": "Command produced by the job",
        "stdout": "Standard output of the job",
        "stderr": "Standard error of the job",
    }
    # TODO: Only hack the stdio like this for older profile, for newer tool profiles
    # add some syntax for asserting job messages maybe - or just drop this because exit
    # code and regex on stdio can be tested directly - so this is really testing Galaxy
    # core handling more than the tool.
    job_messages = job_stdio.get("job_messages") or []
    stdout_prefix = ""
    stderr_prefix = ""
    for job_message in job_messages:
        message_type = job_message.get("type")
        if message_type == "regex" and job_message.get("stream") == "stderr":
            stderr_prefix += (job_message.get("desc") or '') + "\n"
        elif message_type == "regex" and job_message.get("stream") == "stdout":
            stdout_prefix += (job_message.get("desc") or '') + "\n"
        elif message_type == "exit_code":
            stderr_prefix += (job_message.get("desc") or '') + "\n"
        else:
            raise Exception("Unknown job message type [%s] in [%s]" % (message_type, job_message))

    for what, description in other_checks.items():
        if getattr(testdef, what, None) is not None:
            try:
                raw_data = job_stdio[what]
                assertions = getattr(testdef, what)
                if what == "stdout":
                    data = stdout_prefix + raw_data
                elif what == "stderr":
                    data = stderr_prefix + raw_data
                else:
                    data = raw_data
                verify_assertions(data, assertions)
            except AssertionError as err:
                errmsg = '%s different than expected\n' % description
                errmsg += util.unicodify(err)
                register_exception(AssertionError(errmsg))

    for output_collection_def in testdef.output_collections:
        try:
            name = output_collection_def.name
            # TODO: data_collection_list is clearly a bad name for dictionary.
            if name not in data_collection_list:
                template = "Failed to find output [%s], tool outputs include [%s]"
                message = template % (name, ",".join(data_collection_list.keys()))
                raise AssertionError(message)

            # Data collection returned from submission, elements may have been populated after
            # the job completed so re-hit the API for more information.
            data_collection_returned = data_collection_list[name]
            data_collection = galaxy_interactor._get("dataset_collections/%s" % data_collection_returned["id"], data={"instance_type": "history"}).json()

            def get_element(elements, id):
                for element in elements:
                    if element["element_identifier"] == id:
                        return element
                return False

            expected_collection_type = output_collection_def.collection_type
            if expected_collection_type:
                collection_type = data_collection["collection_type"]
                if expected_collection_type != collection_type:
                    template = "Expected output collection [%s] to be of type [%s], was of type [%s]."
                    message = template % (name, expected_collection_type, collection_type)
                    raise AssertionError(message)

            expected_element_count = output_collection_def.count
            if expected_element_count:
                actual_element_count = len(data_collection["elements"])
                if expected_element_count != actual_element_count:
                    template = "Expected output collection [%s] to have %s elements, but it had %s."
                    message = template % (name, expected_element_count, actual_element_count)
                    raise AssertionError(message)

            def verify_elements(element_objects, element_tests):
                for element_identifier, (element_outfile, element_attrib) in element_tests.items():
                    element = get_element(element_objects, element_identifier)
                    if not element:
                        template = "Failed to find identifier [%s] for testing, tool generated collection elements [%s]"
                        message = template % (element_identifier, element_objects)
                        raise AssertionError(message)

                    element_type = element["element_type"]
                    if element_type != "dataset_collection":
                        hda = element["object"]
                        galaxy_interactor.verify_output_dataset(
                            history,
                            hda_id=hda["id"],
                            outfile=element_outfile,
                            attributes=element_attrib,
                            tool_id=tool_id
                        )
                    if element_type == "dataset_collection":
                        elements = element["object"]["elements"]
                        verify_elements(elements, element_attrib.get("elements", {}))

            verify_elements(data_collection["elements"], output_collection_def.element_tests)
        except Exception as e:
            register_exception(e)

    if found_exceptions:
        raise JobOutputsError(found_exceptions, job_stdio)
    else:
        return job_stdio
Beispiel #53
0
 def wrapper(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except chronos_exceptions as e:
         LOGGER.error(unicodify(e))
Beispiel #54
0
def add_file(dataset, registry, output_path):
    ext = None
    compression_type = None
    line_count = None
    link_data_only_str = dataset.get('link_data_only', 'copy_files')
    if link_data_only_str not in ['link_to_files', 'copy_files']:
        raise UploadProblemException(
            "Invalid setting '%s' for option link_data_only - upload request misconfigured"
            % link_data_only_str)
    link_data_only = link_data_only_str == 'link_to_files'

    # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed)
    # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their
    # paths during data conversions since this user already owns that path.
    # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206
    run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get(
        "in_place", False)

    # purge_source defaults to True unless this is an FTP import and
    # ftp_upload_purge has been overridden to False in Galaxy's config.
    # We set purge_source to False if:
    # - the job does not have write access to the file, e.g. when running as the
    #   real user
    # - the files are uploaded from external paths.
    purge_source = dataset.get(
        'purge_source',
        True) and not run_as_real_user and dataset.type not in ('server_dir',
                                                                'path_paste')

    # in_place is True unless we are running as a real user or importing external paths (i.e.
    # this is a real upload and not a path paste or ftp import).
    # in_place should always be False if running as real user because the uploaded file will
    # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't
    # modify files not controlled by Galaxy.
    in_place = not run_as_real_user and dataset.type not in ('server_dir',
                                                             'path_paste',
                                                             'ftp_import')

    # Base on the check_upload_content Galaxy config option and on by default, this enables some
    # security related checks on the uploaded content, but can prevent uploads from working in some cases.
    check_content = dataset.get('check_content', True)

    # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically
    # decompressing archive files before sniffing.
    auto_decompress = dataset.get('auto_decompress', True)
    try:
        dataset.file_type
    except AttributeError:
        raise UploadProblemException(
            'Unable to process uploaded file, missing file_type parameter.')

    if dataset.type == 'url':
        try:
            dataset.path = sniff.stream_url_to_file(
                dataset.path, file_sources=get_file_sources())
        except Exception as e:
            raise UploadProblemException('Unable to fetch %s\n%s' %
                                         (dataset.path, unicodify(e)))

    # See if we have an empty file
    if not os.path.exists(dataset.path):
        raise UploadProblemException(
            'Uploaded temporary file (%s) does not exist.' % dataset.path)

    stdout, ext, datatype, is_binary, converted_path = handle_upload(
        registry=registry,
        path=dataset.path,
        requested_ext=dataset.file_type,
        name=dataset.name,
        tmp_prefix='data_id_%s_upload_' % dataset.dataset_id,
        tmp_dir=output_adjacent_tmpdir(output_path),
        check_content=check_content,
        link_data_only=link_data_only,
        in_place=in_place,
        auto_decompress=auto_decompress,
        convert_to_posix_lines=dataset.to_posix_lines,
        convert_spaces_to_tabs=dataset.space_to_tab,
    )

    # Strip compression extension from name
    if compression_type and not getattr(
            datatype, 'compressed',
            False) and dataset.name.endswith('.' + compression_type):
        dataset.name = dataset.name[:-len('.' + compression_type)]

    # Move dataset
    if link_data_only:
        # Never alter a file that will not be copied to Galaxy's local file store.
        if datatype.dataset_content_needs_grooming(dataset.path):
            err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \
                '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.'
            raise UploadProblemException(err_msg)
    if not link_data_only:
        # Move the dataset to its "real" path. converted_path is a tempfile so we move it even if purge_source is False.
        if purge_source or converted_path:
            try:
                # If user has indicated that the original file to be purged and have converted_path tempfile
                if purge_source and converted_path:
                    shutil.move(converted_path, output_path)
                    os.remove(dataset.path)
                else:
                    shutil.move(converted_path or dataset.path, output_path)
            except OSError as e:
                # We may not have permission to remove the input
                if e.errno != errno.EACCES:
                    raise
        else:
            shutil.copy(dataset.path, output_path)

    # Write the job info
    stdout = stdout or 'uploaded %s file' % ext
    info = dict(type='dataset',
                dataset_id=dataset.dataset_id,
                ext=ext,
                stdout=stdout,
                name=dataset.name,
                line_count=line_count)
    if dataset.get('uuid', None) is not None:
        info['uuid'] = dataset.get('uuid')
    # FIXME: does this belong here? also not output-adjacent-tmpdir aware =/
    if not link_data_only and datatype and datatype.dataset_content_needs_grooming(
            output_path):
        # Groom the dataset content if necessary
        datatype.groom_dataset_content(output_path)
    return info
Beispiel #55
0
 def get_html(self, prefix=""):
     return unicodify('<input type="hidden" name="%s%s" value="%s">' % (prefix, self.name, escape(str(self.value), quote=True)))
Beispiel #56
0
def get_file_peek(file_name,
                  is_multi_byte=False,
                  WIDTH=256,
                  LINE_COUNT=5,
                  skipchars=None,
                  line_wrap=True):
    """
    Returns the first LINE_COUNT lines wrapped to WIDTH

    >>> fname = get_test_fname('4.bed')
    >>> get_file_peek(fname, LINE_COUNT=1)
    u'chr22\\t30128507\\t31828507\\tuc003bnx.1_cds_2_0_chr22_29227_f\\t0\\t+\\n'
    """
    # Set size for file.readline() to a negative number to force it to
    # read until either a newline or EOF.  Needed for datasets with very
    # long lines.
    if WIDTH == 'unlimited':
        WIDTH = -1
    if skipchars is None:
        skipchars = []
    lines = []
    count = 0
    file_type = None
    data_checked = False
    temp = compression_utils.get_fileobj(file_name, "U")
    try:
        while count < LINE_COUNT:
            line = temp.readline(WIDTH)
            if line and not is_multi_byte and not data_checked:
                # See if we have a compressed or binary file
                for char in line:
                    if ord(char) > 128:
                        file_type = 'binary'
                        break
                data_checked = True
                if file_type == 'binary':
                    break
            if not line_wrap:
                if line.endswith('\n'):
                    line = line[:-1]
                else:
                    while True:
                        i = temp.read(1)
                        if not i or i == '\n':
                            break
            skip_line = False
            for skipchar in skipchars:
                if line.startswith(skipchar):
                    skip_line = True
                    break
            if not skip_line:
                lines.append(line)
                count += 1
    finally:
        temp.close()
    if file_type == 'binary':
        text = "%s file" % file_type
    else:
        try:
            text = util.unicodify('\n'.join(lines))
        except UnicodeDecodeError:
            text = "binary/unknown file"
    return text
Beispiel #57
0
 def upload(self, trans, **kwd):
     message = escape(kwd.get('message', ''))
     status = kwd.get('status', 'done')
     commit_message = escape(kwd.get('commit_message', 'Uploaded'))
     repository_id = kwd.get('repository_id', '')
     repository = repository_util.get_repository_in_tool_shed(
         trans.app, repository_id)
     repo_dir = repository.repo_path(trans.app)
     uncompress_file = util.string_as_bool(
         kwd.get('uncompress_file', 'true'))
     remove_repo_files_not_in_tar = util.string_as_bool(
         kwd.get('remove_repo_files_not_in_tar', 'true'))
     uploaded_file = None
     upload_point = commit_util.get_upload_point(repository, **kwd)
     tip = repository.tip()
     file_data = kwd.get('file_data', '')
     url = kwd.get('url', '')
     # Part of the upload process is sending email notification to those that have registered to
     # receive them.  One scenario occurs when the first change set is produced for the repository.
     # See the suc.handle_email_alerts() method for the definition of the scenarios.
     new_repo_alert = repository.is_new()
     uploaded_directory = None
     if kwd.get('upload_button', False):
         if file_data == '' and url == '':
             message = 'No files were entered on the upload form.'
             status = 'error'
             uploaded_file = None
         elif url and url.startswith('hg'):
             # Use mercurial clone to fetch repository, contents will then be copied over.
             uploaded_directory = tempfile.mkdtemp()
             repo_url = 'http%s' % url[len('hg'):]
             cloned_ok, error_message = hg_util.clone_repository(
                 repo_url, uploaded_directory)
             if not cloned_ok:
                 message = 'Error uploading via mercurial clone: %s' % error_message
                 status = 'error'
                 basic_util.remove_dir(uploaded_directory)
                 uploaded_directory = None
         elif url:
             valid_url = True
             try:
                 stream = requests.get(url, stream=True)
             except Exception as e:
                 valid_url = False
                 message = 'Error uploading file via http: %s' % util.unicodify(
                     e)
                 status = 'error'
                 uploaded_file = None
             if valid_url:
                 fd, uploaded_file_name = tempfile.mkstemp()
                 uploaded_file = open(uploaded_file_name, 'wb')
                 for chunk in stream.iter_content(
                         chunk_size=util.CHUNK_SIZE):
                     if chunk:
                         uploaded_file.write(chunk)
                 uploaded_file.flush()
                 uploaded_file_filename = url.split('/')[-1]
                 isempty = os.path.getsize(
                     os.path.abspath(uploaded_file_name)) == 0
         elif file_data not in ('', None):
             uploaded_file = file_data.file
             uploaded_file_name = uploaded_file.name
             uploaded_file_filename = os.path.split(file_data.filename)[-1]
             isempty = os.path.getsize(
                 os.path.abspath(uploaded_file_name)) == 0
         if uploaded_file or uploaded_directory:
             rdah = attribute_handlers.RepositoryDependencyAttributeHandler(
                 trans.app, unpopulate=False)
             tdah = attribute_handlers.ToolDependencyAttributeHandler(
                 trans.app, unpopulate=False)
             stdtm = ShedToolDataTableManager(trans.app)
             ok = True
             isgzip = False
             isbz2 = False
             if uploaded_file:
                 if uncompress_file:
                     isgzip = checkers.is_gzip(uploaded_file_name)
                     if not isgzip:
                         isbz2 = checkers.is_bz2(uploaded_file_name)
                 if isempty:
                     tar = None
                     istar = False
                 else:
                     # Determine what we have - a single file or an archive
                     try:
                         if (isgzip or isbz2) and uncompress_file:
                             # Open for reading with transparent compression.
                             tar = tarfile.open(uploaded_file_name, 'r:*')
                         else:
                             tar = tarfile.open(uploaded_file_name)
                         istar = True
                     except tarfile.ReadError:
                         tar = None
                         istar = False
             else:
                 # Uploaded directory
                 istar = False
             if istar:
                 ok, message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \
                     repository_content_util.upload_tar(
                         trans,
                         rdah,
                         tdah,
                         repository,
                         tar,
                         uploaded_file,
                         upload_point,
                         remove_repo_files_not_in_tar,
                         commit_message,
                         new_repo_alert
                     )
             elif uploaded_directory:
                 ok, message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \
                     self.upload_directory(trans,
                                           rdah,
                                           tdah,
                                           repository,
                                           uploaded_directory,
                                           upload_point,
                                           remove_repo_files_not_in_tar,
                                           commit_message,
                                           new_repo_alert)
             else:
                 if (isgzip or isbz2) and uncompress_file:
                     uploaded_file_filename = commit_util.uncompress(
                         repository,
                         uploaded_file_name,
                         uploaded_file_filename,
                         isgzip=isgzip,
                         isbz2=isbz2)
                 if repository.type == rt_util.REPOSITORY_SUITE_DEFINITION and \
                         uploaded_file_filename != rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME:
                     ok = False
                     message = 'Repositories of type <b>Repository suite definition</b> can only contain a single file named '
                     message += '<b>repository_dependencies.xml</b>.'
                 elif repository.type == rt_util.TOOL_DEPENDENCY_DEFINITION and \
                         uploaded_file_filename != rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME:
                     ok = False
                     message = 'Repositories of type <b>Tool dependency definition</b> can only contain a single file named '
                     message += '<b>tool_dependencies.xml</b>.'
                 if ok:
                     if upload_point is not None:
                         full_path = os.path.abspath(
                             os.path.join(repo_dir, upload_point,
                                          uploaded_file_filename))
                     else:
                         full_path = os.path.abspath(
                             os.path.join(repo_dir, uploaded_file_filename))
                     # Move some version of the uploaded file to the load_point within the repository hierarchy.
                     if uploaded_file_filename in [
                             rt_util.
                             REPOSITORY_DEPENDENCY_DEFINITION_FILENAME
                     ]:
                         # Inspect the contents of the file to see if toolshed or changeset_revision attributes
                         # are missing and if so, set them appropriately.
                         altered, root_elem, error_message = rdah.handle_tag_attributes(
                             uploaded_file_name)
                         if error_message:
                             ok = False
                             message = error_message
                             status = 'error'
                         elif altered:
                             tmp_filename = xml_util.create_and_write_tmp_file(
                                 root_elem)
                             shutil.move(tmp_filename, full_path)
                         else:
                             shutil.move(uploaded_file_name, full_path)
                     elif uploaded_file_filename in [
                             rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME
                     ]:
                         # Inspect the contents of the file to see if changeset_revision values are
                         # missing and if so, set them appropriately.
                         altered, root_elem, error_message = tdah.handle_tag_attributes(
                             uploaded_file_name)
                         if error_message:
                             ok = False
                             message = error_message
                             status = 'error'
                         if ok:
                             if altered:
                                 tmp_filename = xml_util.create_and_write_tmp_file(
                                     root_elem)
                                 shutil.move(tmp_filename, full_path)
                             else:
                                 shutil.move(uploaded_file_name, full_path)
                     else:
                         shutil.move(uploaded_file_name, full_path)
                     if ok:
                         # See if any admin users have chosen to receive email alerts when a repository is updated.
                         # If so, check every uploaded file to ensure content is appropriate.
                         check_contents = commit_util.check_file_contents_for_email_alerts(
                             trans.app)
                         if check_contents and os.path.isfile(full_path):
                             content_alert_str = commit_util.check_file_content_for_html_and_images(
                                 full_path)
                         else:
                             content_alert_str = ''
                         hg_util.add_changeset(repo_dir, full_path)
                         hg_util.commit_changeset(
                             repo_dir,
                             full_path_to_changeset=full_path,
                             username=trans.user.username,
                             message=commit_message)
                         if full_path.endswith(
                                 'tool_data_table_conf.xml.sample'):
                             # Handle the special case where a tool_data_table_conf.xml.sample file is being uploaded
                             # by parsing the file and adding new entries to the in-memory trans.app.tool_data_tables
                             # dictionary.
                             error, error_message = stdtm.handle_sample_tool_data_table_conf_file(
                                 full_path, persist=False)
                             if error:
                                 message = '%s<br/>%s' % (message,
                                                          error_message)
                         # See if the content of the change set was valid.
                         admin_only = len(
                             repository.downloadable_revisions) != 1
                         suc.handle_email_alerts(
                             trans.app,
                             trans.request.host,
                             repository,
                             content_alert_str=content_alert_str,
                             new_repo_alert=new_repo_alert,
                             admin_only=admin_only)
             if ok:
                 # Update the repository files for browsing.
                 hg_util.update_repository(repo_dir)
                 # Get the new repository tip.
                 if tip == repository.tip():
                     message = 'No changes to repository.  '
                     status = 'warning'
                 else:
                     if (isgzip or isbz2) and uncompress_file:
                         uncompress_str = ' uncompressed and '
                     else:
                         uncompress_str = ' '
                     if uploaded_directory:
                         source_type = "repository"
                         source = url
                     else:
                         source_type = "file"
                         source = uploaded_file_filename
                     message = "The %s <b>%s</b> has been successfully%suploaded to the repository.  " % \
                         (source_type, escape(source), uncompress_str)
                     if istar and (undesirable_dirs_removed
                                   or undesirable_files_removed):
                         items_removed = undesirable_dirs_removed + undesirable_files_removed
                         message += "  %d undesirable items (.hg .svn .git directories, .DS_Store, hgrc files, etc) " % items_removed
                         message += "were removed from the archive.  "
                     if istar and remove_repo_files_not_in_tar and files_to_remove:
                         if upload_point is not None:
                             message += "  %d files were removed from the repository relative to the selected upload point '%s'.  " % \
                                 (len(files_to_remove), upload_point)
                         else:
                             message += "  %d files were removed from the repository root.  " % len(
                                 files_to_remove)
                     rmm = repository_metadata_manager.RepositoryMetadataManager(
                         app=trans.app,
                         user=trans.user,
                         repository=repository)
                     status, error_message = \
                         rmm.set_repository_metadata_due_to_new_tip(trans.request.host,
                                                                    content_alert_str=content_alert_str,
                                                                    **kwd)
                     if error_message:
                         message = error_message
                     kwd['message'] = message
                 if repository.metadata_revisions:
                     # A repository's metadata revisions are order descending by update_time, so the zeroth revision
                     # will be the tip just after an upload.
                     metadata_dict = repository.metadata_revisions[
                         0].metadata
                 else:
                     metadata_dict = {}
                 dd = dependency_display.DependencyDisplayer(trans.app)
                 if str(repository.type) not in [
                         rt_util.REPOSITORY_SUITE_DEFINITION,
                         rt_util.TOOL_DEPENDENCY_DEFINITION
                 ]:
                     change_repository_type_message = rt_util.generate_message_for_repository_type_change(
                         trans.app, repository)
                     if change_repository_type_message:
                         message += change_repository_type_message
                         status = 'warning'
                     else:
                         # Provide a warning message if a tool_dependencies.xml file is provided, but tool dependencies
                         # weren't loaded due to a requirement tag mismatch or some other problem.  Tool dependency
                         # definitions can define orphan tool dependencies (no relationship to any tools contained in the
                         # repository), so warning messages are important because orphans are always valid.  The repository
                         # owner must be warned in case they did not intend to define an orphan dependency, but simply
                         # provided incorrect information (tool shed, name owner, changeset_revision) for the definition.
                         orphan_message = dd.generate_message_for_orphan_tool_dependencies(
                             repository, metadata_dict)
                         if orphan_message:
                             message += orphan_message
                             status = 'warning'
                 # Handle messaging for invalid tool dependencies.
                 invalid_tool_dependencies_message = dd.generate_message_for_invalid_tool_dependencies(
                     metadata_dict)
                 if invalid_tool_dependencies_message:
                     message += invalid_tool_dependencies_message
                     status = 'error'
                 # Handle messaging for invalid repository dependencies.
                 invalid_repository_dependencies_message = \
                     dd.generate_message_for_invalid_repository_dependencies(metadata_dict,
                                                                             error_from_tuple=True)
                 if invalid_repository_dependencies_message:
                     message += invalid_repository_dependencies_message
                     status = 'error'
                 # Reset the tool_data_tables by loading the empty tool_data_table_conf.xml file.
                 stdtm.reset_tool_data_tables()
                 if uploaded_directory:
                     basic_util.remove_dir(uploaded_directory)
                 trans.response.send_redirect(
                     web.url_for(controller='repository',
                                 action='browse_repository',
                                 id=repository_id,
                                 commit_message='Deleted selected files',
                                 message=message,
                                 status=status))
             else:
                 if uploaded_directory:
                     basic_util.remove_dir(uploaded_directory)
                 status = 'error'
             # Reset the tool_data_tables by loading the empty tool_data_table_conf.xml file.
             stdtm.reset_tool_data_tables()
     return trans.fill_template(
         '/webapps/tool_shed/repository/upload.mako',
         repository=repository,
         changeset_revision=tip,
         url=url,
         commit_message=commit_message,
         uncompress_file=uncompress_file,
         remove_repo_files_not_in_tar=remove_repo_files_not_in_tar,
         message=message,
         status=status)
Beispiel #58
0
 def display_name(self, dataset):
     """Returns formatted html of dataset name"""
     try:
         return escape(unicodify(dataset.name, 'utf-8'))
     except Exception:
         return "name unavailable"
Beispiel #59
0
 def get_html(self, prefix="", disabled=False):
     return unicodify('<input type="password" name="%s%s" size="%d" value="%s"%s>'
                      % (prefix, self.name, self.size, escape(str(self.value), quote=True), self.get_disabled_str(disabled)))
Beispiel #60
0
def validate_url(url, ip_whitelist):
    # If it doesn't look like a URL, ignore it.
    if not (url.lstrip().startswith('http://')
            or url.lstrip().startswith('https://')):
        return url

    # Extract hostname component
    parsed_url = urlparse(url).netloc
    # If credentials are in this URL, we need to strip those.
    if parsed_url.count('@') > 0:
        # credentials.
        parsed_url = parsed_url[parsed_url.rindex('@') + 1:]
    # Percent encoded colons and other characters will not be resolved as such
    # so we don't have to either.

    # Sometimes the netloc will contain the port which is not desired, so we
    # need to extract that.
    port = None
    # However, it could ALSO be an IPv6 address they've supplied.
    if ':' in parsed_url:
        # IPv6 addresses have colons in them already (it seems like always more than two)
        if parsed_url.count(':') >= 2:
            # Since IPv6 already use colons extensively, they wrap it in
            # brackets when there is a port, e.g. http://[2001:db8:1f70::999:de8:7648:6e8]:100/
            # However if it ends with a ']' then there is no port after it and
            # they've wrapped it in brackets just for fun.
            if ']' in parsed_url and not parsed_url.endswith(']'):
                # If this +1 throws a range error, we don't care, their url
                # shouldn't end with a colon.
                idx = parsed_url.rindex(':')
                # We parse as an int and let this fail ungracefully if parsing
                # fails because we desire to fail closed rather than open.
                port = int(parsed_url[idx + 1:])
                parsed_url = parsed_url[:idx]
            else:
                # Plain ipv6 without port
                pass
        else:
            # This should finally be ipv4 with port. It cannot be IPv6 as that
            # was caught by earlier cases, and it cannot be due to credentials.
            idx = parsed_url.rindex(':')
            port = int(parsed_url[idx + 1:])
            parsed_url = parsed_url[:idx]

    # safe to log out, no credentials/request path, just an IP + port
    log.debug("parsed url, port: %s : %s", parsed_url, port)
    # Call getaddrinfo to resolve hostname into tuples containing IPs.
    addrinfo = socket.getaddrinfo(parsed_url, port)
    # Get the IP addresses that this entry resolves to (uniquely)
    # We drop:
    #   AF_* family: It will resolve to AF_INET or AF_INET6, getaddrinfo(3) doesn't even mention AF_UNIX,
    #   socktype: We don't care if a stream/dgram/raw protocol
    #   protocol: we don't care if it is tcp or udp.
    addrinfo_results = set([info[4][0] for info in addrinfo])
    # There may be multiple (e.g. IPv4 + IPv6 or DNS round robin). Any one of these
    # could resolve to a local addresses (and could be returned by chance),
    # therefore we must check them all.
    for raw_ip in addrinfo_results:
        # Convert to an IP object so we can tell if it is in private space.
        ip = ipaddress.ip_address(unicodify(raw_ip))
        # If this is a private address
        if ip.is_private:
            results = []
            # If this IP is not anywhere in the whitelist
            for whitelisted in ip_whitelist:
                # If it's an IP address range (rather than a single one...)
                if hasattr(whitelisted, 'subnets'):
                    results.append(ip in whitelisted)
                else:
                    results.append(ip == whitelisted)

            if any(results):
                # If we had any True, then THIS (and ONLY THIS) IP address that
                # that specific DNS entry resolved to is in whitelisted and
                # safe to access. But we cannot exit here, we must ensure that
                # all IPs that that DNS entry resolves to are likewise safe.
                pass
            else:
                # Otherwise, we deny access.
                raise Exception(
                    "Access to this address in not permitted by server configuration"
                )
    return url