def get_html_default(self, prefix="", disabled=False): if self.multiple: multiple = " multiple" else: multiple = "" if self.size: size = ' size="%s"' % str(self.size) else: size = '' rval = [] last_selected_value = "" for text, value, selected in self.options: if selected: selected_text = " selected" last_selected_value = value if not isinstance(last_selected_value, string_types): last_selected_value = str(last_selected_value) else: selected_text = "" rval.append('<option value="%s"%s>%s</option>' % (escape(unicodify(value), quote=True), selected_text, escape(unicodify(text), quote=True))) if last_selected_value: last_selected_value = ' last_selected_value="%s"' % escape(unicodify(last_selected_value), quote=True) if self.field_id is not None: id_string = ' id="%s"' % self.field_id else: id_string = '' rval.insert(0, '<select name="%s%s"%s%s%s%s%s%s%s>' % (prefix, self.name, multiple, size, self.refresh_on_change_text, last_selected_value, self.get_disabled_str(disabled), id_string, self.extra_attributes)) rval.append('</select>') return unicodify("\n".join(rval))
def get_html( self, prefix="", disabled=False ): value = self.value if not isinstance( value, basestring ): value = str( value ) value = unicodify( value ) return unicodify( '<input type="text" name="%s%s" size="%d" value="%s"%s>' % ( prefix, self.name, self.size, escape( value, quote=True ), self.get_disabled_str( disabled ) ) )
def handle_command( self, tool_dependency, cmd, return_output=False ): """Handle a command and log the results.""" context = self.app.install_model.context command = str( cmd ) output = self.handle_complex_command( command ) self.log_results( cmd, output, os.path.join( self.install_dir, basic_util.INSTALLATION_LOG ) ) stdout = output.stdout stderr = output.stderr if len( stdout ) > DATABASE_MAX_STRING_SIZE: print "Length of stdout > %s, so only a portion will be saved in the database." % str( DATABASE_MAX_STRING_SIZE_PRETTY ) stdout = shrink_string_by_size( stdout, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True ) if len( stderr ) > DATABASE_MAX_STRING_SIZE: print "Length of stderr > %s, so only a portion will be saved in the database." % str( DATABASE_MAX_STRING_SIZE_PRETTY ) stderr = shrink_string_by_size( stderr, DATABASE_MAX_STRING_SIZE, join_by="\n..\n", left_larger=True, beginning_on_size_error=True ) if output.return_code not in [ 0 ]: tool_dependency.status = self.app.install_model.ToolDependency.installation_status.ERROR if stderr: tool_dependency.error_message = unicodify( stderr ) elif stdout: tool_dependency.error_message = unicodify( stdout ) else: # We have a problem if there was no stdout and no stderr. tool_dependency.error_message = "Unknown error occurred executing shell command %s, return_code: %s" % \ ( str( cmd ), str( output.return_code ) ) context.add( tool_dependency ) context.flush() if return_output: return output return output.return_code
def _handle_script_integrity(path, config): if not check_script_integrity(config): return script_integrity_verified = False count = getattr(config, "check_job_script_integrity_count", DEFAULT_INTEGRITY_COUNT) sleep_amt = getattr(config, "check_job_script_integrity_sleep", DEFAULT_INTEGRITY_SLEEP) for i in range(count): try: returncode = subprocess.call([path], env={"ABC_TEST_JOB_SCRIPT_INTEGRITY_XYZ": "1"}) if returncode == 42: script_integrity_verified = True break log.debug("Script integrity error for file '%s': returncode was %d", path, returncode) # Else we will sync and wait to see if the script becomes # executable. try: # sync file system to avoid "Text file busy" problems. # These have occurred both in Docker containers and on EC2 clusters # under high load. subprocess.check_call(INTEGRITY_SYNC_COMMAND) except Exception as e: log.debug("Error syncing the filesystem: %s", unicodify(e)) except Exception as exc: log.debug("Script not available yet: %s", unicodify(exc)) time.sleep(sleep_amt) if not script_integrity_verified: raise Exception("Failed to write job script '%s', could not verify job script integrity." % path)
def submit_report(self, dataset, job, tool, **kwargs): """Submit the error report to sentry """ log.info(self.github) if self.github: tool_kw = {'tool_id': unicodify(job.tool_id), 'tool_version': unicodify(job.tool_version)} label = self.get_label('{tool_id}/{tool_version}'.format(**tool_kw)) error_title = u"""Galaxy Job Error: {tool_id} v{tool_version}""".format(**tool_kw) # We'll re-use the email error reporter's template since github supports HTML error_reporter = EmailErrorReporter(dataset.id, self.app) error_reporter.create_report(job.get_user(), email=kwargs.get('email', None), message=kwargs.get('message', None)) # The HTML report error_message = error_reporter.html_report log.info(error_title in self.issue_cache) if error_title not in self.issue_cache: # Create a new issue. self.issue_cache[error_title] = self.repo.create_issue( title=error_title, body=error_message, # Label it with a tag: tool_id/tool_version labels=[label] ) else: self.issue_cache[error_title].create_comment(error_message) return ('Submitted bug report to Github. Your issue number is %s' % self.issue_cache[error_title].number, 'success')
def __create_jstree( self, directory, disable='folders' ): """ Loads recursively all files and folders within the given folder and its subfolders and returns jstree representation of its structure. """ userdir_jstree = None jstree_paths = [] if os.path.exists( directory ): for ( dirpath, dirnames, filenames ) in os.walk( directory ): for dirname in dirnames: dir_path = os.path.relpath( os.path.join( dirpath, dirname ), directory ) dir_path_hash = hashlib.sha1(unicodify(dir_path).encode('utf-8')).hexdigest() disabled = True if disable == 'folders' else False jstree_paths.append( jstree.Path( dir_path, dir_path_hash, { 'type': 'folder', 'state': { 'disabled': disabled }, 'li_attr': { 'full_path': dir_path } } ) ) for filename in filenames: file_path = os.path.relpath( os.path.join( dirpath, filename ), directory ) file_path_hash = hashlib.sha1(unicodify(file_path).encode('utf-8')).hexdigest() disabled = True if disable == 'files' else False jstree_paths.append( jstree.Path( file_path, file_path_hash, { 'type': 'file', 'state': { 'disabled': disabled }, 'li_attr': { 'full_path': file_path } } ) ) else: raise exceptions.ConfigDoesNotAllowException( 'The given directory does not exist.' ) userdir_jstree = jstree.JSTree( jstree_paths ) return userdir_jstree
def main(argv): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-k', '--secret-key', help='Key to convert pages with', default='') parser.add_argument('-d', '--dry-run', help='No changes, just test it.', action='store_true') populate_config_args(parser) args = parser.parse_args() properties = app_properties_from_args(args) config = galaxy.config.Configuration(**properties) secret = args.secret_key or config.id_secret security_helper = SecurityHelper(id_secret=secret) object_store = build_object_store_from_config(config) if not config.database_connection: print("The database connection is empty. If you are using the default value, please uncomment that in your galaxy.yml") model = galaxy.config.init_models_from_config(config, object_store=object_store) session = model.context.current pagerevs = session.query(model.PageRevision).all() mock_trans = Bunch(app=Bunch(security=security_helper), model=model, user_is_admin=lambda: True, sa_session=session) for p in pagerevs: try: processor = _PageContentProcessor(mock_trans, _placeholderRenderForSave) processor.feed(p.content) newcontent = unicodify(processor.output(), 'utf-8') if p.content != newcontent: if not args.dry_run: p.content = unicodify(processor.output(), 'utf-8') session.add(p) session.flush() else: print("Modifying revision %s." % p.id) print(difflib.unified_diff(p.content, newcontent)) except Exception: logging.exception("Error parsing page, rolling changes back and skipping revision %s. Please report this error." % p.id) session.rollback()
def create_report( self, user, email='', message='', **kwd ): hda = self.hda job = self.job host = web.url_for( '/', qualified=True ) history_view_link = web.url_for( controller="history", action="view", id=self.app.security.encode_id( hda.history_id ), qualified=True ) # Build the email message if user and user.email != email: email_str = "'%s' (providing preferred contact email '%s')" % (user.email, email) else: email_str = "'%s'" % (email or 'anonymously') self.report = string.Template( error_report_template ) \ .safe_substitute( host=host, dataset_id=hda.dataset_id, history_id=hda.history_id, hid=hda.hid, history_item_name=hda.get_display_name(), history_view_link=history_view_link, job_id=job.id, tool_version=job.tool_version, job_tool_id=job.tool_id, job_tool_version=hda.tool_version, job_runner_external_id=job.job_runner_external_id, job_command_line=job.command_line, job_stderr=util.unicodify( job.stderr ), job_stdout=util.unicodify( job.stdout ), job_info=util.unicodify( job.info ), job_traceback=util.unicodify( job.traceback ), email_str=email_str, message=util.unicodify( message ) )
def build_readme_files_dict( app, repository, changeset_revision, metadata, tool_path=None ): """ Return a dictionary of valid readme file name <-> readme file content pairs for all readme files defined in the received metadata. Since the received changeset_revision (which is associated with the received metadata) may not be the latest installable changeset revision, the README file contents may not be available on disk. This method is used by both Galaxy and the Tool Shed. """ if app.name == 'galaxy': can_use_disk_files = True else: repo = hg_util.get_repo_for_repository( app, repository=repository, repo_path=None, create=False ) latest_downloadable_changeset_revision = suc.get_latest_downloadable_changeset_revision( app, repository, repo ) can_use_disk_files = changeset_revision == latest_downloadable_changeset_revision readme_files_dict = {} if metadata: if 'readme_files' in metadata: for relative_path_to_readme_file in metadata[ 'readme_files' ]: readme_file_name = os.path.split( relative_path_to_readme_file )[ 1 ] if can_use_disk_files: if tool_path: full_path_to_readme_file = os.path.abspath( os.path.join( tool_path, relative_path_to_readme_file ) ) else: full_path_to_readme_file = os.path.abspath( relative_path_to_readme_file ) text = None try: f = open( full_path_to_readme_file, 'r' ) text = unicodify( f.read() ) f.close() except Exception, e: log.exception( "Error reading README file '%s' from disk: %s" % ( str( relative_path_to_readme_file ), str( e ) ) ) text = None if text: text_of_reasonable_length = basic_util.size_string( text ) if text_of_reasonable_length.find( '.. image:: ' ) >= 0: # Handle image display for README files that are contained in repositories in the tool shed or installed into Galaxy. lock = threading.Lock() lock.acquire( True ) try: text_of_reasonable_length = suc.set_image_paths( app, app.security.encode_id( repository.id ), text_of_reasonable_length ) except Exception, e: log.exception( "Exception in build_readme_files_dict, so images may not be properly displayed:\n%s" % str( e ) ) finally: lock.release() if readme_file_name.endswith( '.rst' ): text_of_reasonable_length = Template( rst_to_html( text_of_reasonable_length ), input_encoding='utf-8', output_encoding='utf-8', default_filters=[ 'decode.utf8' ], encoding_errors='replace' ) text_of_reasonable_length = text_of_reasonable_length.render( static_path=web.url_for( '/static' ), host_url=web.url_for( '/', qualified=True ) ) text_of_reasonable_length = unicodify( text_of_reasonable_length ) else: text_of_reasonable_length = basic_util.to_html_string( text_of_reasonable_length ) readme_files_dict[ readme_file_name ] = text_of_reasonable_length
def process_result_value(self, value, dialect): if value is None: return None ret = None try: ret = metadata_pickler.loads(unicodify(value)) if ret: ret = dict(ret.__dict__) except Exception: try: ret = json_decoder.decode(unicodify(_sniffnfix_pg9_hex(value))) except Exception: ret = None return ret
def execute(self, cmd, timeout=60): def retry(): try: _, stdout, stderr = self._execute(cmd, timeout) except paramiko.SSHException as e: log.error(e) time.sleep(10) self.connect() _, stdout, stderr = self._execute(cmd, timeout) return stdout, stderr stdout, stderr = self.retry_action_executor.execute(retry) return_code = stdout.channel.recv_exit_status() return Bunch(stdout=unicodify(stdout.read()), stderr=unicodify(stderr.read()), returncode=return_code)
def _scripts_check_output(self, script, args): cwd = galaxy_directory() cmd = ["python", os.path.join(cwd, "scripts", script)] + args clean_env = { "PATH": os.environ.get("PATH", None), } # Don't let testing environment variables interfere with config. return unicodify(subprocess.check_output(cmd, cwd=cwd, env=clean_env))
def get_headers( fname, sep, count=60, is_multi_byte=False ): """ Returns a list with the first 'count' lines split by 'sep' >>> fname = get_test_fname('complete.bed') >>> get_headers(fname,'\\t') [['chr7', '127475281', '127491632', 'NM_000230', '0', '+', '127486022', '127488767', '0', '3', '29,172,3225,', '0,10713,13126,'], ['chr7', '127486011', '127488900', 'D49487', '0', '+', '127486022', '127488767', '0', '2', '155,490,', '0,2399']] """ headers = [] compressed_gzip = is_gzip(fname) compressed_bzip2 = is_bz2(fname) try: if compressed_gzip: in_file = gzip.GzipFile(fname, 'r') elif compressed_bzip2: in_file = bz2.BZ2File(fname, 'r') else: in_file = open(fname, 'rt') for idx, line in enumerate(in_file): line = line.rstrip('\n\r') if is_multi_byte: # TODO: fix this - sep is never found in line line = unicodify( line, 'utf-8' ) sep = sep.encode( 'utf-8' ) headers.append( line.split(sep) ) if idx == count: break finally: in_file.close() return headers
def _read_post_payload(self, environ): request_body_size = int(environ.get('CONTENT_LENGTH', 0)) request_body = environ['wsgi.input'].read(request_body_size) or '{}' # TODO: json decode error handling # log.debug( 'request_body: (%s)\n%s', type( request_body ), request_body ) payload = json.loads(unicodify(request_body)) return payload
def check_page_for_string(self, patt): """Looks for 'patt' in the current browser page""" page = unicodify(self.last_page()) if page.find(patt) == -1: fname = self.write_temp_file(page) errmsg = "no match to '%s'\npage content written to '%s'\npage: [[%s]]" % (patt, fname, page) raise AssertionError(errmsg)
def check_html(file_path, chunk=None): if chunk is None: temp = open(file_path) elif hasattr(chunk, "splitlines"): temp = chunk.splitlines() else: temp = chunk regexp1 = re.compile("<A\s+[^>]*HREF[^>]+>", re.I) regexp2 = re.compile("<IFRAME[^>]*>", re.I) regexp3 = re.compile("<FRAMESET[^>]*>", re.I) regexp4 = re.compile("<META[\W][^>]*>", re.I) regexp5 = re.compile("<SCRIPT[^>]*>", re.I) lineno = 0 # TODO: Potentially reading huge lines into string here, this should be # reworked. for line in temp: line = util.unicodify(line) lineno += 1 matches = regexp1.search(line) or regexp2.search(line) or regexp3.search(line) or regexp4.search(line) or regexp5.search(line) if matches: if chunk is None: temp.close() return True if HTML_CHECK_LINES and (lineno > HTML_CHECK_LINES): break if chunk is None: temp.close() return False
def _handle_test_output_file(ctx, report_type, test_data, kwds): kwd_name = "test_output" if report_type != "html": kwd_name = "test_output_%s" % report_type path = kwds.get(kwd_name, None) if path is None: message = "No file specified for %s, skipping test output." % kwd_name ctx.vlog(message) return try: contents = build_report.build_report( test_data, report_type=report_type ) except Exception: message = "Problem producing report file %s for %s" % ( path, kwd_name ) ctx.vlog(message, exception=True) raise try: with io.open(path, mode='w', encoding='utf-8') as handle: handle.write(unicodify(contents)) except Exception: message = "Problem writing output file %s for %s" % ( kwd_name, path ) ctx.vlog(message, exception=True) raise
def __extract_payload_from_request(trans, func, kwargs): content_type = trans.request.headers.get('content-type', '') if content_type.startswith('application/x-www-form-urlencoded') or content_type.startswith('multipart/form-data'): # If the content type is a standard type such as multipart/form-data, the wsgi framework parses the request body # and loads all field values into kwargs. However, kwargs also contains formal method parameters etc. which # are not a part of the request body. This is a problem because it's not possible to differentiate between values # which are a part of the request body, and therefore should be a part of the payload, and values which should not be # in the payload. Therefore, the decorated method's formal arguments are discovered through reflection and removed from # the payload dictionary. This helps to prevent duplicate argument conflicts in downstream methods. payload = kwargs.copy() named_args, _, _, _ = inspect.getargspec(func) for arg in named_args: payload.pop(arg, None) for k, v in payload.items(): if isinstance(v, string_types): try: # note: parse_non_hex_float only needed here for single string values where something like # 40000000000000e5 will be parsed as a scientific notation float. This is as opposed to hex strings # in larger JSON structures where quoting prevents this (further below) payload[k] = loads(v, parse_float=parse_non_hex_float) except Exception: # may not actually be json, just continue pass else: # Assume application/json content type and parse request body manually, since wsgi won't do it. However, the order of this check # should ideally be in reverse, with the if clause being a check for application/json and the else clause assuming a standard encoding # such as multipart/form-data. Leaving it as is for backward compatibility, just in case. payload = loads(unicodify(trans.request.body)) return payload
def best_search_result(conda_target, conda_context, channels_override=None, offline=False): """Find best "conda search" result for specified target. Return ``None`` if no results match. """ search_cmd = [conda_context.conda_exec, "search", "--full-name", "--json"] if offline: search_cmd.append("--offline") if channels_override: search_cmd.append("--override-channels") for channel in channels_override: search_cmd.extend(["--channel", channel]) else: search_cmd.extend(conda_context._override_channels_args) search_cmd.append(conda_target.package) res = commands.execute(search_cmd) res = unicodify(res) hits = json.loads(res).get(conda_target.package, []) hits = sorted(hits, key=lambda hit: packaging.version.parse(hit['version']), reverse=True) if len(hits) == 0: return (None, None) best_result = (hits[0], False) for hit in hits: if is_search_hit_exact(conda_target, hit): best_result = (hit, True) break return best_result
def _get_extended_config( self, trans ): app = trans.app configured_for_inactivity_warning = app.config.user_activation_on and app.config.inactivity_box_content is not None user_requests = bool( trans.user and ( trans.user.requests or app.security_agent.get_accessible_request_types( trans, trans.user ) ) ) config = { 'active_view' : 'analysis', 'params' : dict( trans.request.params ), 'enable_cloud_launch' : app.config.get_bool( 'enable_cloud_launch', False ), 'search_url' : web.url_for( controller='root', action='tool_search' ), # TODO: next two should be redundant - why can't we build one from the other? 'toolbox' : app.toolbox.to_dict( trans, in_panel=False ), 'toolbox_in_panel' : app.toolbox.to_dict( trans ), 'message_box_visible' : app.config.message_box_visible, 'show_inactivity_warning' : configured_for_inactivity_warning and trans.user and not trans.user.active, # TODO: move to user 'user_requests' : user_requests } # TODO: move to user stored_workflow_menu_entries = config[ 'stored_workflow_menu_entries' ] = [] for menu_item in getattr( trans.user, 'stored_workflow_menu_entries', [] ): stored_workflow_menu_entries.append({ 'encoded_stored_workflow_id' : trans.security.encode_id( menu_item.stored_workflow_id ), 'stored_workflow' : { 'name' : util.unicodify( menu_item.stored_workflow.name ) } }) return config
def get_file_peek( file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5, skipchars=None, line_wrap=True ): """ Returns the first LINE_COUNT lines wrapped to WIDTH ## >>> fname = get_test_fname('4.bed') ## >>> get_file_peek(fname) ## 'chr22 30128507 31828507 uc003bnx.1_cds_2_0_chr22_29227_f 0 +\n' """ # Set size for file.readline() to a negative number to force it to # read until either a newline or EOF. Needed for datasets with very # long lines. if WIDTH == 'unlimited': WIDTH = -1 if skipchars is None: skipchars = [] lines = [] count = 0 file_type = None data_checked = False temp = open( file_name, "U" ) while count < LINE_COUNT: line = temp.readline( WIDTH ) if line and not is_multi_byte and not data_checked: # See if we have a compressed or binary file if line[0:2] == util.gzip_magic: file_type = 'gzipped' else: for char in line: if ord( char ) > 128: file_type = 'binary' break data_checked = True if file_type in [ 'gzipped', 'binary' ]: break if not line_wrap: if line.endswith('\n'): line = line[:-1] else: while True: i = temp.read(1) if not i or i == '\n': break skip_line = False for skipchar in skipchars: if line.startswith( skipchar ): skip_line = True break if not skip_line: lines.append( line ) count += 1 temp.close() if file_type in [ 'gzipped', 'binary' ]: text = "%s file" % file_type else: try: text = util.unicodify( '\n'.join( lines ) ) except UnicodeDecodeError: text = "binary/unknown file" return text
def get_html(self, prefix=""): return unicodify('<script src="https://gsui.genomespace.org/jsui/upload/gsuploadwindow.js"></script>' '<input type="text" name="{0}{1}" value="{2}"> ' '<a href="javascript:gsLocationByGet({{ successCallback: function(config)' ' {{ selector_name = \'{0}{1}\'; selector = \'input[name=\' + selector_name.replace(\'|\', \'\\\\|\') + \']\';' ' $(selector).val(config.destination + \'^\' + config.token); }} }});">' 'Browse</a>'.format(prefix, self.name, escape(str(self.value), quote=True)))
def display_by_username_and_slug( self, trans, username, slug ): """ Display page based on a username and slug. """ # Get page. session = trans.sa_session user = session.query( model.User ).filter_by( username=username ).first() page = trans.sa_session.query( model.Page ).filter_by( user=user, slug=slug, deleted=False ).first() if page is None: raise web.httpexceptions.HTTPNotFound() # Security check raises error if user cannot access page. self.security_check( trans, page, False, True) # Process page content. processor = _PageContentProcessor( trans, 'utf-8', 'text/html', self._get_embed_html ) processor.feed( page.latest_revision.content ) # Get rating data. user_item_rating = 0 if trans.get_user(): user_item_rating = self.get_user_item_rating( trans.sa_session, trans.get_user(), page ) if user_item_rating: user_item_rating = user_item_rating.rating else: user_item_rating = 0 ave_item_rating, num_ratings = self.get_ave_item_rating_data( trans.sa_session, page ) # Output is string, so convert to unicode for display. page_content = unicodify( processor.output(), 'utf-8' ) return trans.fill_template_mako( "page/display.mako", item=page, item_data=page_content, user_item_rating=user_item_rating, ave_item_rating=ave_item_rating, num_ratings=num_ratings, content_only=True )
def get_html_radio(self, prefix="", disabled=False): rval = [] ctr = 0 for text, value, selected in self.options: style = "" escaped_value = escape(str(value), quote=True) uniq_id = "%s%s|%s" % (prefix, self.name, escaped_value) if len(self.options) > 2 and ctr % 2 == 1: style = " class=\"odd_row\"" selected_text = "" if selected: selected_text = " checked='checked'" rval.append('<div%s><input type="radio" name="%s%s"%s value="%s" id="%s"%s%s%s><label class="inline" for="%s">%s</label></div>' % (style, prefix, self.name, self.refresh_on_change_text, escaped_value, uniq_id, selected_text, self.get_disabled_str(disabled), self.extra_attributes, uniq_id, text)) ctr += 1 return unicodify("\n".join(rval))
def conda_info(self): if self.conda_exec is not None: info_out = commands.execute([self.conda_exec, "info", "--json"]) info_out = unicodify(info_out) info = json.loads(info_out) return info else: return None
def set_tags_from_list(self, user, item, new_tags_list): # precondition: item is already security checked against user # precondition: incoming tags is a list of sanitized/formatted strings self.delete_item_tags(user, item) new_tags_str = ','.join(new_tags_list) self.apply_item_tags(user, item, unicodify(new_tags_str, 'utf-8')) self.sa_session.flush() return item.tags
def get_html(self, prefix="", disabled=False): if self.checked: checked_text = ' checked="checked"' else: checked_text = '' id_name = prefix + self.name return unicodify('<input type="checkbox" id="%s" name="%s" value="__CHECKED__"%s%s%s><input type="hidden" name="%s" value="__NOTHING__"%s>' % (id_name, id_name, checked_text, self.get_disabled_str(disabled), self.refresh_on_change_text, id_name, self.get_disabled_str(disabled)))
def get_html(self, prefix=""): value_text = "" if self.value: value_text = ' value="%s"' % escape(str(self.value), quote=True) ajax_text = "" if self.ajax: ajax_text = ' galaxy-ajax-upload="true"' return unicodify('<input type="file" name="%s%s"%s%s>' % (prefix, self.name, ajax_text, value_text))
def safe_dict(d): """Recursively clone JSON structure with unicode dictionary keys.""" if isinstance(d, dict): return dict([(unicodify(k), safe_dict(v)) for k, v in d.items()]) elif isinstance(d, list): return [safe_dict(x) for x in d] else: return d
def body_renderer(self, trans, body, environ, start_response): # this is a dummy renderer that does not call start_response # See 'We have to re-create the handle request method...' in _process_batch_request above return dict( status=trans.response.status, headers=trans.response.headers, body=json.loads(unicodify(self.galaxy.make_body_iterable(trans, body)[0])) )
def cli(ctx, runnable_identifier, job_path, **kwds): """Planemo command for running tools and jobs. \b % planemo run cat1-tool.cwl cat-job.json """ runnable = for_runnable_identifier(ctx, runnable_identifier, kwds) is_cwl = runnable.type.is_cwl_artifact kwds["cwl"] = is_cwl kwds["execution_type"] = "Run" if kwds.get("engine", None) is None: if is_cwl: kwds["engine"] = "cwltool" elif kwds.get('galaxy_url', None): kwds["engine"] = "external_galaxy" else: kwds["engine"] = "galaxy" with engine_context(ctx, **kwds) as engine: run_result = engine.run(runnable, job_path) if not run_result.was_successful: warn("Run failed [%s]" % unicodify(run_result)) elif kwds.get('no_wait'): info('Run successfully executed - exiting without waiting for results.') else: output_json = kwds.get("output_json", None) outputs_dict = run_result.outputs_dict if output_json: with open(output_json, "w") as f: json.dump(outputs_dict, f) info('Run completed successfully.') report_data = StructuredData(data={'tests': [run_result.structured_data()], 'version': '0.1'}) report_data.calculate_summary_data() return_value = handle_reports_and_summary(ctx, report_data.structured_data, kwds=kwds) ctx.exit(return_value)
def get_html(self, prefix="", disabled=False): value = unicodify(self.value or "") return unicodify('<input type="text" name="%s%s" size="%d" value="%s"%s>' % (prefix, self.name, self.size, escape(value, quote=True), self.get_disabled_str(disabled)))
def wrap_in_middleware(app, global_conf, application_stack, **local_conf): """Based on the configuration wrap `app` in a set of common and useful middleware.""" stack = application_stack # Merge the global and local configurations conf = global_conf.copy() conf.update(local_conf) debug = asbool(conf.get('debug', False)) interactive = asbool(conf.get('use_interactive', False)) # First put into place httpexceptions, which must be most closely # wrapped around the application (it can interact poorly with # other middleware): app = wrap_if_allowed(app, stack, httpexceptions.make_middleware, name='paste.httpexceptions', args=(conf, )) # Create a separate mapper for redirects to prevent conflicts. redirect_mapper = routes.Mapper() redirect_mapper = _map_redirects(redirect_mapper) # Load the Routes middleware which we use for redirecting app = wrap_if_allowed(app, stack, RoutesMiddleware, args=(redirect_mapper, )) # If we're using remote_user authentication, add middleware that # protects Galaxy from improperly configured authentication in the # upstream server if asbool(conf.get('use_remote_user', False)): from galaxy.webapps.tool_shed.framework.middleware.remoteuser import RemoteUser app = wrap_if_allowed( app, stack, RemoteUser, kwargs=dict( maildomain=conf.get('remote_user_maildomain', None), display_servers=util.listify(conf.get('display_servers', '')), admin_users=conf.get('admin_users', '').split(','), remote_user_header=conf.get('remote_user_header', 'HTTP_REMOTE_USER'), remote_user_secret_header=conf.get('remote_user_secret', None), normalize_remote_user_email=conf.get( 'normalize_remote_user_email', False))) # The recursive middleware allows for including requests in other # requests or forwarding of requests, all on the server side. if asbool(conf.get('use_recursive', True)): from paste import recursive app = wrap_if_allowed(app, stack, recursive.RecursiveMiddleware, args=(conf, )) # Transaction logging (apache access.log style) if asbool(conf.get('use_translogger', True)): from paste.translogger import TransLogger app = wrap_if_allowed(app, stack, TransLogger) # If sentry logging is enabled, log here before propogating up to # the error middleware # TODO sentry config is duplicated between tool_shed/galaxy, refactor this. sentry_dsn = conf.get('sentry_dsn', None) if sentry_dsn: from galaxy.web.framework.middleware.sentry import Sentry app = wrap_if_allowed(app, stack, Sentry, args=(sentry_dsn, )) # X-Forwarded-Host handling from galaxy.web.framework.middleware.xforwardedhost import XForwardedHostMiddleware app = wrap_if_allowed(app, stack, XForwardedHostMiddleware) # Various debug middleware that can only be turned on if the debug # flag is set, either because they are insecure or greatly hurt # performance. if debug: # Middleware to check for WSGI compliance if asbool(conf.get('use_lint', True)): from paste import lint app = wrap_if_allowed(app, stack, lint.make_middleware, name='paste.lint', args=(conf, )) # Middleware to run the python profiler on each request if asbool(conf.get('use_profile', False)): from paste.debug import profile app = wrap_if_allowed(app, stack, profile.ProfileMiddleware, args=(conf, )) if interactive: # Interactive exception debugging, scary dangerous if publicly # accessible, if not enabled we'll use the regular error printing # middleware. try: from weberror import evalexception app = wrap_if_allowed_or_fail( app, stack, evalexception.EvalException, args=(conf, ), kwargs=dict( templating_formatters=build_template_error_formatters( ))) except MiddlewareWrapUnsupported as exc: log.warning(util.unicodify(exc)) import galaxy.web.framework.middleware.error app = wrap_if_allowed( app, stack, galaxy.web.framework.middleware.error.ErrorMiddleware, args=(conf, )) else: # Not in interactive debug mode, just use the regular error middleware import galaxy.web.framework.middleware.error app = wrap_if_allowed( app, stack, galaxy.web.framework.middleware.error.ErrorMiddleware, args=(conf, )) return app
CHRONOS_IMPORT_MSG = ('The Python \'chronos\' package is required to use ' 'this feature, please install it or correct the ' 'following error:\nImportError {msg!s}') try: import chronos chronos_exceptions = ( chronos.ChronosAPIError, chronos.UnauthorizedError, chronos.MissingFieldError, chronos.OneOfViolationError, ) except ImportError as e: chronos = None CHRONOS_IMPORT_MSG.format(msg=unicodify(e)) __all__ = ('ChronosJobRunner', ) LOGGER = logging.getLogger(__name__) class ChronosRunnerException(Exception): pass def handle_exception_call(func): # Catch chronos exceptions. The latest version of chronos-python does # support a hierarchy over the exceptions. @functools.wraps(func) def wrapper(*args, **kwargs):
def split(cls, input_datasets, subdir_generator_function, split_params): """ Split the input files by molecule records. """ if split_params is None: return None if len(input_datasets) > 1: raise Exception( "CML-file splitting does not support multiple files") input_files = [ds.file_name for ds in input_datasets] chunk_size = None if split_params['split_mode'] == 'number_of_parts': raise Exception( 'Split mode "%s" is currently not implemented for CML-files.' % split_params['split_mode']) elif split_params['split_mode'] == 'to_size': chunk_size = int(split_params['split_size']) else: raise Exception('Unsupported split mode %s' % split_params['split_mode']) def _read_cml_records(filename): lines = [] with open(filename) as handle: for line in handle: if line.lstrip().startswith('<?xml version="1.0"?>') or \ line.lstrip().startswith('<cml xmlns="http://www.xml-cml.org/schema') or \ line.lstrip().startswith('</cml>'): continue lines.append(line) if line.lstrip().startswith('</molecule>'): yield lines lines = [] header_lines = [ '<?xml version="1.0"?>\n', '<cml xmlns="http://www.xml-cml.org/schema">\n' ] footer_line = ['</cml>\n'] def _write_part_cml_file(accumulated_lines): part_dir = subdir_generator_function() part_path = os.path.join(part_dir, os.path.basename(input_files[0])) with open(part_path, 'w') as part_file: part_file.writelines(header_lines) part_file.writelines(accumulated_lines) part_file.writelines(footer_line) try: cml_records = _read_cml_records(input_files[0]) cml_lines_accumulated = [] for counter, cml_record in enumerate(cml_records, start=1): cml_lines_accumulated.extend(cml_record) if counter % chunk_size == 0: _write_part_cml_file(cml_lines_accumulated) cml_lines_accumulated = [] if cml_lines_accumulated: _write_part_cml_file(cml_lines_accumulated) except Exception as e: log.error('Unable to split files: %s', unicodify(e)) raise
def verify( item_label, output_content, attributes, filename=None, get_filecontent=None, get_filename=None, keep_outputs_dir=None, verify_extra_files=None, mode='file', ): """Verify the content of a test output using test definitions described by attributes. Throw an informative assertion error if any of these tests fail. """ if get_filename is None: if get_filecontent is None: get_filecontent = DEFAULT_TEST_DATA_RESOLVER.get_filecontent def get_filename(filename): file_content = get_filecontent(filename) local_name = make_temp_fname(fname=filename) with open(local_name, 'wb') as f: f.write(file_content) return local_name # Check assertions... assertions = attributes.get("assert_list", None) if attributes is not None and assertions is not None: try: verify_assertions(output_content, attributes["assert_list"]) except AssertionError as err: errmsg = '%s different than expected\n' % (item_label) errmsg += unicodify(err) raise AssertionError(errmsg) # Verify checksum attributes... # works with older Galaxy style md5=<expected_sum> or cwltest # style checksum=<hash_type>$<hash>. expected_checksum_type = None expected_checksum = None if attributes is not None and attributes.get("md5", None) is not None: expected_checksum_type = "md5" expected_checksum = attributes.get("md5") elif attributes is not None and attributes.get("checksum", None) is not None: checksum_value = attributes.get("checksum", None) expected_checksum_type, expected_checksum = checksum_value.split("$", 1) if expected_checksum_type: try: _verify_checksum(output_content, expected_checksum_type, expected_checksum) except AssertionError as err: errmsg = '%s different than expected\n' % (item_label) errmsg += unicodify(err) raise AssertionError(errmsg) if attributes is None: attributes = {} if filename is not None: temp_name = make_temp_fname(fname=filename) with open(temp_name, 'wb') as f: f.write(output_content) # If the server's env has GALAXY_TEST_SAVE, save the output file to that # directory. # This needs to be done before the call to `get_filename()` because that # may raise an exception if `filename` does not exist (e.g. when # generating a tool output file from scratch with # `planemo test --update_test_data`). if keep_outputs_dir: ofn = os.path.join(keep_outputs_dir, filename) out_dir = os.path.dirname(ofn) if not os.path.exists(out_dir): os.makedirs(out_dir) log.debug('keep_outputs_dir: %s, ofn: %s', keep_outputs_dir, ofn) try: shutil.copy(temp_name, ofn) except Exception: log.exception('Could not save output file %s to %s', temp_name, ofn) else: log.debug('## GALAXY_TEST_SAVE=%s. saved %s', keep_outputs_dir, ofn) if mode == 'directory': # if verifying a file inside a extra_files_path directory # filename already point to a file that exists on disk local_name = filename else: local_name = get_filename(filename) compare = attributes.get('compare', 'diff') try: if attributes.get('ftype', None) in ['bam', 'qname_sorted.bam', 'qname_input_sorted.bam', 'unsorted.bam', 'cram']: try: local_fh, temp_name = _bam_to_sam(local_name, temp_name) local_name = local_fh.name except Exception as e: log.warning("%s. Will compare BAM files", unicodify(e)) if compare == 'diff': files_diff(local_name, temp_name, attributes=attributes) elif compare == 're_match': files_re_match(local_name, temp_name, attributes=attributes) elif compare == 're_match_multiline': files_re_match_multiline(local_name, temp_name, attributes=attributes) elif compare == 'sim_size': delta = int(attributes.get('delta', DEFAULT_DELTA)) s1 = len(output_content) s2 = os.path.getsize(local_name) if abs(s1 - s2) > int(delta): raise AssertionError('Files %s=%db but %s=%db - compare by size (delta=%s) failed' % (temp_name, s1, local_name, s2, delta)) elif compare == "contains": files_contains(local_name, temp_name, attributes=attributes) else: raise Exception('Unimplemented Compare type: %s' % compare) except AssertionError as err: errmsg = '%s different than expected, difference (using %s):\n' % (item_label, compare) errmsg += "( %s v. %s )\n" % (local_name, temp_name) errmsg += unicodify(err) raise AssertionError(errmsg) finally: if 'GALAXY_TEST_NO_CLEANUP' not in os.environ: os.remove(temp_name) if verify_extra_files: extra_files = attributes.get('extra_files', None) if extra_files: verify_extra_files(extra_files)
def files_diff(file1, file2, attributes=None): """Check the contents of 2 files for differences.""" def get_lines_diff(diff): count = 0 for line in diff: if (line.startswith('+') and not line.startswith('+++')) or (line.startswith('-') and not line.startswith('---')): count += 1 return count if not filecmp.cmp(file1, file2, shallow=False): if attributes is None: attributes = {} decompress = attributes.get("decompress", None) if decompress: # None means all compressed formats are allowed compressed_formats = None else: compressed_formats = [] is_pdf = False try: with get_fileobj(file2, compressed_formats=compressed_formats) as fh: history_data = fh.readlines() with get_fileobj(file1, compressed_formats=compressed_formats) as fh: local_file = fh.readlines() except UnicodeDecodeError: if file1.endswith('.pdf') or file2.endswith('.pdf'): is_pdf = True # Replace non-Unicode characters using unicodify(), # difflib.unified_diff doesn't work on list of bytes history_data = [unicodify(l) for l in get_fileobj(file2, mode='rb', compressed_formats=compressed_formats)] local_file = [unicodify(l) for l in get_fileobj(file1, mode='rb', compressed_formats=compressed_formats)] else: raise AssertionError("Binary data detected, not displaying diff") if attributes.get('sort', False): local_file.sort() history_data.sort() allowed_diff_count = int(attributes.get('lines_diff', 0)) diff = list(difflib.unified_diff(local_file, history_data, "local_file", "history_data")) diff_lines = get_lines_diff(diff) if diff_lines > allowed_diff_count: if 'GALAXY_TEST_RAW_DIFF' in os.environ: diff_slice = diff else: if len(diff) < 60: diff_slice = diff[0:40] else: diff_slice = diff[:25] + ["********\n", "*SNIP *\n", "********\n"] + diff[-25:] # FIXME: This pdf stuff is rather special cased and has not been updated to consider lines_diff # due to unknown desired behavior when used in conjunction with a non-zero lines_diff # PDF forgiveness can probably be handled better by not special casing by __extension__ here # and instead using lines_diff or a regular expression matching # or by creating and using a specialized pdf comparison function if is_pdf: # PDF files contain creation dates, modification dates, ids and descriptions that change with each # new file, so we need to handle these differences. As long as the rest of the PDF file does # not differ we're ok. valid_diff_strs = ['description', 'createdate', 'creationdate', 'moddate', 'id', 'producer', 'creator'] valid_diff = False invalid_diff_lines = 0 for line in diff_slice: # Make sure to lower case strings before checking. line = line.lower() # Diff lines will always start with a + or - character, but handle special cases: '--- local_file \n', '+++ history_data \n' if (line.startswith('+') or line.startswith('-')) and line.find('local_file') < 0 and line.find('history_data') < 0: for vdf in valid_diff_strs: if line.find(vdf) < 0: valid_diff = False else: valid_diff = True # Stop checking as soon as we know we have a valid difference break if not valid_diff: invalid_diff_lines += 1 log.info("## files diff on '%s' and '%s': lines_diff = %d, found diff = %d, found pdf invalid diff = %d" % (file1, file2, allowed_diff_count, diff_lines, invalid_diff_lines)) if invalid_diff_lines > allowed_diff_count: # Print out diff_slice so we can see what failed log.info("###### diff_slice ######") raise AssertionError("".join(diff_slice)) else: log.info("## files diff on '%s' and '%s': lines_diff = %d, found diff = %d" % (file1, file2, allowed_diff_count, diff_lines)) raise AssertionError("".join(diff_slice))
def create(self, trans, library_id, payload, **kwd): """ create( self, trans, library_id, payload, **kwd ) * POST /api/libraries/{library_id}/contents: create a new library file or folder To copy an HDA into a library send ``create_type`` of 'file' and the HDA's encoded id in ``from_hda_id`` (and optionally ``ldda_message``). To copy an HDCA into a library send ``create_type`` of 'file' and the HDCA's encoded id in ``from_hdca_id`` (and optionally ``ldda_message``). :type library_id: str :param library_id: the encoded id of the library where to create the new item :type payload: dict :param payload: dictionary structure containing: * folder_id: the encoded id of the parent folder of the new item * create_type: the type of item to create ('file', 'folder' or 'collection') * from_hda_id: (optional, only if create_type is 'file') the encoded id of an accessible HDA to copy into the library * ldda_message: (optional) the new message attribute of the LDDA created * extended_metadata: (optional) sub-dictionary containing any extended metadata to associate with the item * upload_option: (optional) one of 'upload_file' (default), 'upload_directory' or 'upload_paths' * server_dir: (optional, only if upload_option is 'upload_directory') relative path of the subdirectory of Galaxy ``library_import_dir`` (if admin) or ``user_library_import_dir`` (if non-admin) to upload. All and only the files (i.e. no subdirectories) contained in the specified directory will be uploaded. * filesystem_paths: (optional, only if upload_option is 'upload_paths' and the user is an admin) file paths on the Galaxy server to upload to the library, one file per line * link_data_only: (optional, only when upload_option is 'upload_directory' or 'upload_paths') either 'copy_files' (default) or 'link_to_files'. Setting to 'link_to_files' symlinks instead of copying the files * name: (optional, only if create_type is 'folder') name of the folder to create * description: (optional, only if create_type is 'folder') description of the folder to create * tag_using_filename: (optional) create tags on datasets using the file's original name :returns: a dictionary describing the new item unless ``from_hdca_id`` is supplied, in that case a list of such dictionaries is returned. :rtype: object """ if 'create_type' not in payload: trans.response.status = 400 return "Missing required 'create_type' parameter." else: create_type = payload.pop('create_type') if create_type not in ('file', 'folder', 'collection'): trans.response.status = 400 return "Invalid value for 'create_type' parameter ( %s ) specified." % create_type if 'folder_id' not in payload: trans.response.status = 400 return "Missing required 'folder_id' parameter." else: folder_id = payload.pop('folder_id') class_name, folder_id = self._decode_library_content_id(folder_id) try: # security is checked in the downstream controller parent = self.get_library_folder(trans, folder_id, check_ownership=False, check_accessible=False) except Exception as e: return util.unicodify(e) # The rest of the security happens in the library_common controller. real_folder_id = trans.security.encode_id(parent.id) payload['tag_using_filenames'] = util.string_as_bool( payload.get('tag_using_filenames', None)) # are we copying an HDA to the library folder? # we'll need the id and any message to attach, then branch to that private function from_hda_id, from_hdca_id, ldda_message = (payload.pop( 'from_hda_id', None), payload.pop('from_hdca_id', None), payload.pop('ldda_message', '')) if create_type == 'file': if from_hda_id: return self._copy_hda_to_library_folder( trans, self.hda_manager, self.decode_id(from_hda_id), real_folder_id, ldda_message) if from_hdca_id: return self._copy_hdca_to_library_folder( trans, self.hda_manager, self.decode_id(from_hdca_id), real_folder_id, ldda_message) # check for extended metadata, store it and pop it out of the param # otherwise sanitize_param will have a fit ex_meta_payload = payload.pop('extended_metadata', None) # Now create the desired content object, either file or folder. if create_type == 'file': status, output = self._upload_library_dataset( trans, library_id, real_folder_id, **payload) elif create_type == 'folder': status, output = self._create_folder(trans, real_folder_id, library_id, **payload) elif create_type == 'collection': # Not delegating to library_common, so need to check access to parent # folder here. self.check_user_can_add_to_library_item(trans, parent, check_accessible=True) create_params = api_payload_to_create_params(payload) create_params['parent'] = parent service = trans.app.dataset_collections_service dataset_collection_instance = service.create(**create_params) return [ dictify_dataset_collection_instance( dataset_collection_instance, security=trans.security, parent=parent) ] if status != 200: trans.response.status = status return output else: rval = [] for v in output.values(): if ex_meta_payload is not None: # If there is extended metadata, store it, attach it to the dataset, and index it ex_meta = ExtendedMetadata(ex_meta_payload) trans.sa_session.add(ex_meta) v.extended_metadata = ex_meta trans.sa_session.add(v) trans.sa_session.flush() for path, value in self._scan_json_block(ex_meta_payload): meta_i = ExtendedMetadataIndex(ex_meta, path, value) trans.sa_session.add(meta_i) trans.sa_session.flush() if type(v) == trans.app.model.LibraryDatasetDatasetAssociation: v = v.library_dataset encoded_id = trans.security.encode_id(v.id) if create_type == 'folder': encoded_id = 'F' + encoded_id rval.append( dict(id=encoded_id, name=v.name, url=url_for('library_content', library_id=library_id, id=encoded_id))) return rval
def _handle_realization_error(exception, **kwds): fail_fast = kwds.get("fail_fast", False) if fail_fast: raise exception else: error(unicodify(exception))
def main(argv): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-r', '--report-directory', help='Directory to store reports in', default=os.path.abspath(os.path.join('.', 'reports'))) parser.add_argument('-g', '--grt-config', help='Path to GRT config file', default=default_config) parser.add_argument( "-l", "--loglevel", choices=['debug', 'info', 'warning', 'error', 'critical'], help="Set the logging level", default='warning') parser.add_argument("-b", "--batch-size", type=int, default=1000, help="Batch size for sql queries") parser.add_argument( "-m", "--max-records", type=int, default=5000000, help= "Maximum number of records to include in a single report. This option should ONLY be used when reporting historical data. Setting this may require running GRT multiple times to capture all historical logs." ) populate_config_args(parser) args = parser.parse_args() logging.getLogger().setLevel(getattr(logging, args.loglevel.upper())) _times = [] _start_time = time.time() def annotate(label, human_label=None): if human_label: logging.info(human_label) _times.append((label, time.time() - _start_time)) annotate('init_start', 'Loading GRT configuration...') try: with open(args.grt_config) as handle: config = yaml.safe_load(handle) except Exception: logging.info('Using default GRT configuration') with open(sample_config) as handle: config = yaml.safe_load(handle) annotate('init_end') REPORT_DIR = args.report_directory CHECK_POINT_FILE = os.path.join(REPORT_DIR, '.checkpoint') REPORT_IDENTIFIER = str(time.time()) REPORT_BASE = os.path.join(REPORT_DIR, REPORT_IDENTIFIER) if os.path.exists(CHECK_POINT_FILE): with open(CHECK_POINT_FILE, 'r') as handle: last_job_sent = int(handle.read()) else: last_job_sent = -1 annotate('galaxy_init', 'Loading Galaxy...') model, object_store, gxconfig = _init(args) # Galaxy overrides our logging level. logging.getLogger().setLevel(getattr(logging, args.loglevel.upper())) sa_session = model.context.current annotate('galaxy_end') # Fetch jobs COMPLETED with status OK that have not yet been sent. # Set up our arrays active_users = defaultdict(int) job_state_data = defaultdict(int) if not os.path.exists(REPORT_DIR): os.makedirs(REPORT_DIR) # Pick an end point so our queries can return uniform data. annotate('endpoint_start', 'Identifying a safe endpoint for SQL queries') end_job_id = sa_session.query(model.Job.id) \ .order_by(model.Job.id.desc()) \ .first()[0] # Allow users to only report N records at once. if args.max_records > 0: if end_job_id - last_job_sent > args.max_records: end_job_id = last_job_sent + args.max_records annotate('endpoint_end', 'Processing jobs (%s, %s]' % (last_job_sent, end_job_id)) # Remember the last job sent. if end_job_id == last_job_sent: logging.info("No new jobs to report") # So we can just quit now. sys.exit(0) # Unfortunately we have to keep this mapping for the sanitizer to work properly. job_tool_map = {} blacklisted_tools = config['sanitization']['tools'] annotate('export_jobs_start', 'Exporting Jobs') with io.open(REPORT_BASE + '.jobs.tsv', 'w', encoding='utf-8') as handle_job: handle_job.write(u'\t'.join(('id', 'tool_id', 'tool_version', 'state', 'create_time')) + '\n') for offset_start in range(last_job_sent, end_job_id, args.batch_size): logging.debug("Processing %s:%s", offset_start, min(end_job_id, offset_start + args.batch_size)) for job in sa_session.query(model.Job.id, model.Job.user_id, model.Job.tool_id, model.Job.tool_version, model.Job.state, model.Job.create_time) \ .filter(model.Job.id > offset_start) \ .filter(model.Job.id <= min(end_job_id, offset_start + args.batch_size)) \ .all(): # If the tool is blacklisted, exclude everywhere if job[2] in blacklisted_tools: continue try: line = [ str(job[0]), # id job[2], # tool_id job[3], # tool_version job[4], # state str(job[5]) # create_time ] cline = unicodify('\t'.join(line) + '\n') handle_job.write(cline) except Exception: logging.warning( "Unable to write out a 'handle_job' row. Ignoring the row.", exc_info=True) continue # meta counts job_state_data[job[4]] += 1 active_users[job[1]] += 1 job_tool_map[job[0]] = job[2] annotate('export_jobs_end') annotate('export_datasets_start', 'Exporting Datasets') with io.open(REPORT_BASE + '.datasets.tsv', 'w', encoding='utf-8') as handle_datasets: handle_datasets.write(u'\t'.join(('job_id', 'dataset_id', 'extension', 'file_size', 'param_name', 'type')) + '\n') for offset_start in range(last_job_sent, end_job_id, args.batch_size): logging.debug("Processing %s:%s", offset_start, min(end_job_id, offset_start + args.batch_size)) # four queries: JobToInputDatasetAssociation, JobToOutputDatasetAssociation, HistoryDatasetAssociation, Dataset job_to_input_hda_ids = sa_session.query(model.JobToInputDatasetAssociation.job_id, model.JobToInputDatasetAssociation.dataset_id, model.JobToInputDatasetAssociation.name) \ .filter(model.JobToInputDatasetAssociation.job_id > offset_start) \ .filter(model.JobToInputDatasetAssociation.job_id <= min(end_job_id, offset_start + args.batch_size)) \ .all() job_to_output_hda_ids = sa_session.query(model.JobToOutputDatasetAssociation.job_id, model.JobToOutputDatasetAssociation.dataset_id, model.JobToOutputDatasetAssociation.name) \ .filter(model.JobToOutputDatasetAssociation.job_id > offset_start) \ .filter(model.JobToOutputDatasetAssociation.job_id <= min(end_job_id, offset_start + args.batch_size)) \ .all() # add type and concat job_to_hda_ids = [[list(i), "input"] for i in job_to_input_hda_ids ] + [[list(i), "output"] for i in job_to_output_hda_ids] # put all of the hda_ids into a list hda_ids = [i[0][1] for i in job_to_hda_ids] hdas = sa_session.query(model.HistoryDatasetAssociation.id, model.HistoryDatasetAssociation.dataset_id, model.HistoryDatasetAssociation.extension) \ .filter(model.HistoryDatasetAssociation.id.in_(hda_ids)) \ .all() # put all the dataset ids into a list dataset_ids = [i[1] for i in hdas] # get the sizes of the datasets datasets = sa_session.query(model.Dataset.id, model.Dataset.total_size) \ .filter(model.Dataset.id.in_(dataset_ids)) \ .all() # datasets to dictionay for easy search hdas = {i[0]: i[1:] for i in hdas} datasets = {i[0]: i[1:] for i in datasets} for job_to_hda in job_to_hda_ids: job = job_to_hda[0] # job_id, hda_id, name filetype = job_to_hda[1] # input|output # No associated job if job[0] not in job_tool_map: continue # If the tool is blacklisted, exclude everywhere if job_tool_map[job[0]] in blacklisted_tools: continue hda_id = job[1] if hda_id is None: continue dataset_id = hdas[hda_id][0] if dataset_id is None: continue try: line = [ str(job[0]), # Job ID str(hda_id), # HDA ID str(hdas[hda_id][1]), # Extension round_to_2sd(datasets[dataset_id][0]), # File size job[2], # Parameter name str(filetype) # input/output ] cline = unicodify('\t'.join(line) + '\n') handle_datasets.write(cline) except Exception: logging.warning( "Unable to write out a 'handle_datasets' row. Ignoring the row.", exc_info=True) continue annotate('export_datasets_end') annotate('export_metric_num_start', 'Exporting Metrics (Numeric)') with io.open(REPORT_BASE + '.metric_num.tsv', 'w', encoding='utf-8') as handle_metric_num: handle_metric_num.write(u'\t'.join(('job_id', 'plugin', 'name', 'value')) + '\n') for offset_start in range(last_job_sent, end_job_id, args.batch_size): logging.debug("Processing %s:%s", offset_start, min(end_job_id, offset_start + args.batch_size)) for metric in sa_session.query(model.JobMetricNumeric.job_id, model.JobMetricNumeric.plugin, model.JobMetricNumeric.metric_name, model.JobMetricNumeric.metric_value) \ .filter(model.JobMetricNumeric.job_id > offset_start) \ .filter(model.JobMetricNumeric.job_id <= min(end_job_id, offset_start + args.batch_size)) \ .all(): # No associated job if metric[0] not in job_tool_map: continue # If the tool is blacklisted, exclude everywhere if job_tool_map[metric[0]] in blacklisted_tools: continue try: line = [ str(metric[0]), # job id metric[1], # plugin metric[2], # name str(metric[3]) # value ] cline = unicodify('\t'.join(line) + '\n') handle_metric_num.write(cline) except Exception: logging.warning( "Unable to write out a 'handle_metric_num' row. Ignoring the row.", exc_info=True) continue annotate('export_metric_num_end') # Now on to outputs. with tarfile.open(REPORT_BASE + '.tar.gz', 'w:gz') as handle: for name in ('jobs', 'metric_num', 'datasets'): path = REPORT_BASE + '.' + name + '.tsv' if os.path.exists(path): handle.add(path) for name in ('jobs', 'metric_num', 'datasets'): path = REPORT_BASE + '.' + name + '.tsv' if os.path.exists(path): os.unlink(REPORT_BASE + '.' + name + '.tsv') _times.append(('job_finish', time.time() - _start_time)) sha = hash_util.memory_bound_hexdigest(hash_func=hash_util.sha256, path=REPORT_BASE + ".tar.gz") _times.append(('hash_finish', time.time() - _start_time)) # Now serialize the individual report data. with open(REPORT_BASE + '.json', 'w') as handle: json.dump( { "version": 3, "galaxy_version": gxconfig.version_major, "generated": REPORT_IDENTIFIER, "report_hash": "sha256:" + sha, "metrics": { "_times": _times, }, "users": { "active": len(active_users.keys()), "total": sa_session.query(model.User.id).count(), }, "jobs": job_state_data, }, handle) # Write our checkpoint file so we know where to start next time. with open(CHECK_POINT_FILE, 'w') as handle: handle.write(str(end_job_id))
def set_metadata_legacy(): import galaxy.model galaxy.model.metadata.MetadataTempFile.tmp_dir = tool_job_working_directory = os.path.abspath( os.getcwd()) # This is ugly, but to transition from existing jobs without this parameter # to ones with, smoothly, it has to be the last optional parameter and we # have to sniff it. try: max_metadata_value_size = int(sys.argv[-1]) sys.argv = sys.argv[:-1] except ValueError: max_metadata_value_size = 0 # max_metadata_value_size is unspecified and should be 0 # Set up datatypes registry datatypes_config = sys.argv.pop(1) datatypes_registry = validate_and_load_datatypes_config(datatypes_config) job_metadata = sys.argv.pop(1) tool_provided_metadata = load_job_metadata(job_metadata) def set_meta(new_dataset_instance, file_dict): set_meta_with_tool_provided(new_dataset_instance, file_dict, set_meta_kwds, datatypes_registry, max_metadata_value_size) for filenames in sys.argv[1:]: fields = filenames.split(',') filename_in = fields.pop(0) filename_kwds = fields.pop(0) filename_out = fields.pop(0) filename_results_code = fields.pop(0) dataset_filename_override = fields.pop(0) override_metadata = fields.pop(0) set_meta_kwds = stringify_dictionary_keys( json.load(open(filename_kwds)) ) # load kwds; need to ensure our keywords are not unicode try: dataset = cPickle.load(open(filename_in, 'rb')) # load DatasetInstance dataset.dataset.external_filename = dataset_filename_override store_by = set_meta_kwds.get("object_store_store_by", "id") extra_files_dir_name = "dataset_%s_files" % getattr( dataset.dataset, store_by) files_path = os.path.abspath( os.path.join(tool_job_working_directory, "working", extra_files_dir_name)) dataset.dataset.external_extra_files_path = files_path file_dict = tool_provided_metadata.get_dataset_meta( None, dataset.dataset.id) if 'ext' in file_dict: dataset.extension = file_dict['ext'] # Metadata FileParameter types may not be writable on a cluster node, and are therefore temporarily substituted with MetadataTempFiles override_metadata = json.load(open(override_metadata)) for metadata_name, metadata_file_override in override_metadata: if galaxy.datatypes.metadata.MetadataTempFile.is_JSONified_value( metadata_file_override): metadata_file_override = galaxy.datatypes.metadata.MetadataTempFile.from_JSON( metadata_file_override) setattr(dataset.metadata, metadata_name, metadata_file_override) set_meta(dataset, file_dict) dataset.metadata.to_JSON_dict( filename_out) # write out results of set_meta json.dump((True, 'Metadata has been set successfully'), open(filename_results_code, 'wt+')) # setting metadata has succeeded except Exception as e: json.dump((False, unicodify(e)), open(filename_results_code, 'wt+')) # setting metadata has failed somehow write_job_metadata(tool_job_working_directory, job_metadata, set_meta, tool_provided_metadata)
def handle_compressed_file( filename, datatypes_registry, ext='auto', tmp_prefix='sniff_uncompress_', tmp_dir=None, in_place=False, check_content=True, auto_decompress=True, ): """ Check uploaded files for compression, check compressed file contents, and uncompress if necessary. Supports GZip, BZip2, and the first file in a Zip file. For performance reasons, the temporary file used for uncompression is located in the same directory as the input/output file. This behavior can be changed with the `tmp_dir` param. ``ext`` as returned will only be changed from the ``ext`` input param if the param was an autodetect type (``auto``) and the file was sniffed as a keep-compressed datatype. ``is_valid`` as returned will only be set if the file is compressed and contains invalid contents (or the first file in the case of a zip file), this is so lengthy decompression can be bypassed if there is invalid content in the first 32KB. Otherwise the caller should be checking content. """ CHUNK_SIZE = 2**20 # 1Mb is_compressed = False compressed_type = None keep_compressed = False is_valid = False uncompressed = filename tmp_dir = tmp_dir or os.path.dirname(filename) for key, check_compressed_function in COMPRESSION_CHECK_FUNCTIONS: is_compressed, is_valid = check_compressed_function( filename, check_content=check_content) if is_compressed: compressed_type = key break # found compression type if is_compressed and is_valid: if ext in AUTO_DETECT_EXTENSIONS: # attempt to sniff for a keep-compressed datatype (observing the sniff order) sniff_datatypes = filter(lambda d: getattr(d, 'compressed', False), datatypes_registry.sniff_order) sniffed_ext = run_sniffers_raw(filename, sniff_datatypes) if sniffed_ext: ext = sniffed_ext keep_compressed = True else: datatype = datatypes_registry.get_datatype_by_extension(ext) keep_compressed = getattr(datatype, 'compressed', False) # don't waste time decompressing if we sniff invalid contents if is_compressed and is_valid and auto_decompress and not keep_compressed: with tempfile.NamedTemporaryFile(prefix=tmp_prefix, dir=tmp_dir, delete=False) as uncompressed: compressed_file = DECOMPRESSION_FUNCTIONS[compressed_type]( filename) # TODO: it'd be ideal to convert to posix newlines and space-to-tab here as well while True: try: chunk = compressed_file.read(CHUNK_SIZE) except OSError as e: os.remove(uncompressed.name) compressed_file.close() raise OSError( 'Problem uncompressing {} data, please try retrieving the data uncompressed: {}' .format(compressed_type, util.unicodify(e))) if not chunk: break uncompressed.write(chunk) uncompressed = uncompressed.name compressed_file.close() if in_place: # Replace the compressed file with the uncompressed file shutil.move(uncompressed, filename) uncompressed = filename elif not is_compressed or not check_content: is_valid = True return is_valid, ext, uncompressed, compressed_type
def build_readme_files_dict(app, repository, changeset_revision, metadata, tool_path=None): """ Return a dictionary of valid readme file name <-> readme file content pairs for all readme files defined in the received metadata. Since the received changeset_revision (which is associated with the received metadata) may not be the latest installable changeset revision, the README file contents may not be available on disk. This method is used by both Galaxy and the Tool Shed. """ if app.name == 'galaxy': can_use_disk_files = True else: latest_downloadable_changeset_revision = metadata_util.get_latest_downloadable_changeset_revision(app, repository) can_use_disk_files = changeset_revision == latest_downloadable_changeset_revision readme_files_dict = {} if metadata: if 'readme_files' in metadata: for relative_path_to_readme_file in metadata['readme_files']: readme_file_name = os.path.split(relative_path_to_readme_file)[1] if can_use_disk_files: if tool_path: full_path_to_readme_file = os.path.abspath(os.path.join(tool_path, relative_path_to_readme_file)) else: full_path_to_readme_file = os.path.abspath(relative_path_to_readme_file) text = None try: with open(full_path_to_readme_file, encoding='utf-8') as f: text = f.read() except Exception: log.exception("Error reading README file '%s' from disk", relative_path_to_readme_file) text = None if text: text_of_reasonable_length = basic_util.size_string(text) if text_of_reasonable_length.find('.. image:: ') >= 0: # Handle image display for README files that are contained in repositories in the tool shed or installed into Galaxy. try: text_of_reasonable_length = suc.set_image_paths(app, text_of_reasonable_length, encoded_repository_id=app.security.encode_id(repository.id)) except Exception: log.exception("Exception in build_readme_files_dict, so images may not be properly displayed") if readme_file_name.endswith('.rst'): text_of_reasonable_length = Template(rst_to_html(text_of_reasonable_length), input_encoding='utf-8', default_filters=['decode.utf8'], encoding_errors='replace') text_of_reasonable_length = text_of_reasonable_length.render(static_path=web.url_for('/static'), host_url=web.url_for('/', qualified=True)) text_of_reasonable_length = unicodify(text_of_reasonable_length) else: text_of_reasonable_length = basic_util.to_html_string(text_of_reasonable_length) readme_files_dict[readme_file_name] = text_of_reasonable_length else: # We must be in the tool shed and have an old changeset_revision, so we need to retrieve the file contents from the repository manifest. repo = repository.hg_repo ctx = hg_util.get_changectx_for_changeset(repo, changeset_revision) if ctx: fctx = hg_util.get_file_context_from_ctx(ctx, readme_file_name) if fctx and fctx not in ['DELETED']: try: text = unicodify(fctx.data()) readme_files_dict[readme_file_name] = basic_util.size_string(text) except Exception: log.exception("Error reading README file '%s' from repository manifest", relative_path_to_readme_file) return readme_files_dict
def submit_report(self, dataset, job, tool, **kwargs): """Submit the error report to sentry """ log.info(self.github) if self.github: # Determine the ToolShed url, initially we connect with HTTP and if redirect to HTTPS is set up, # this will be detected by requests and used further down the line. Also cache this so everything is # as fast as possible log.info(tool.tool_shed) ts_url = self._determine_ts_url(tool) log.info("GitLab error reporting - Determined ToolShed is %s", ts_url) # Find the repo inside the ToolShed ts_repourl = self._get_gitrepo_from_ts(job, ts_url) # Determine the GitLab project URL and the issue cache key github_projecturl = urlparse.urlparse(ts_repourl).path[1:] if (ts_repourl and not self.git_default_repo_only) \ else "/".join([self.git_default_repo_owner, self.git_default_repo_name]) issue_cache_key = self._get_issue_cache_key(job, ts_repourl) # Connect to the repo if github_projecturl not in self.git_project_cache: self.git_project_cache[ github_projecturl] = self.github.get_repo( '%s' % github_projecturl) gh_project = self.git_project_cache[github_projecturl] # Make sure we keep a cache of the issues, per tool in this case if issue_cache_key not in self.issue_cache: self._fill_issue_cache(gh_project, issue_cache_key) # Retrieve label label = self.get_label( '%s/%s' % (unicodify(job.tool_id), unicodify(job.tool_version)), gh_project, issue_cache_key) # Generate information for the tool error_title = self._generate_error_title(job) # Generate the error message error_message = self._generate_error_message(dataset, job, kwargs) log.info(error_title in self.issue_cache[issue_cache_key]) if error_title not in self.issue_cache[issue_cache_key]: # Create a new issue. self._create_issue(issue_cache_key, error_title, error_message, gh_project, label=label) else: self._append_issue(issue_cache_key, error_title, error_message) return ( 'Submitted error report to Github. Your issue number is <a href="%s/%s/issues/%s" ' 'target="_blank">#%s</a>.' % (self.github_base_url, github_projecturl, self.issue_cache[issue_cache_key][error_title].number, self.issue_cache[issue_cache_key][error_title].number), 'success')
def _upload_dataset(self, trans, library_id, folder_id, replace_dataset=None, **kwd): # Set up the traditional tool state/params cntrller = 'api' tool_id = 'upload1' message = None file_type = kwd.get('file_type') try: upload_common.validate_datatype_extension( datatypes_registry=trans.app.datatypes_registry, ext=file_type) except RequestParameterInvalidException as e: return (400, util.unicodify(e)) tool = trans.app.toolbox.get_tool(tool_id) state = tool.new_state(trans) populate_state(trans, tool.inputs, kwd, state.inputs) tool_params = state.inputs dataset_upload_inputs = [] for input in tool.inputs.values(): if input.type == "upload_dataset": dataset_upload_inputs.append(input) # Library-specific params server_dir = kwd.get('server_dir', '') upload_option = kwd.get('upload_option', 'upload_file') response_code = 200 if upload_option == 'upload_directory': full_dir, import_dir_desc = validate_server_directory_upload( trans, server_dir) message = 'Select a directory' elif upload_option == 'upload_paths': # Library API already checked this - following check isn't actually needed. validate_path_upload(trans) # Some error handling should be added to this method. try: # FIXME: instead of passing params here ( which have been processed by util.Params(), the original kwd # should be passed so that complex objects that may have been included in the initial request remain. library_bunch = upload_common.handle_library_params( trans, kwd, folder_id, replace_dataset) except Exception: response_code = 500 message = "Unable to parse upload parameters, please report this error." # Proceed with (mostly) regular upload processing if we're still errorless if response_code == 200: if upload_option == 'upload_file': tool_params = upload_common.persist_uploads(tool_params, trans) uploaded_datasets = upload_common.get_uploaded_datasets( trans, cntrller, tool_params, dataset_upload_inputs, library_bunch=library_bunch) elif upload_option == 'upload_directory': uploaded_datasets, response_code, message = self._get_server_dir_uploaded_datasets( trans, kwd, full_dir, import_dir_desc, library_bunch, response_code, message) elif upload_option == 'upload_paths': uploaded_datasets, response_code, message = self._get_path_paste_uploaded_datasets( trans, kwd, library_bunch, response_code, message) if upload_option == 'upload_file' and not uploaded_datasets: response_code = 400 message = 'Select a file, enter a URL or enter text' if response_code != 200: return (response_code, message) json_file_path = upload_common.create_paramfile( trans, uploaded_datasets) data_list = [ud.data for ud in uploaded_datasets] job_params = {} job_params['link_data_only'] = json.dumps( kwd.get('link_data_only', 'copy_files')) job_params['uuid'] = json.dumps(kwd.get('uuid', None)) job, output = upload_common.create_job(trans, tool_params, tool, json_file_path, data_list, folder=library_bunch.folder, job_params=job_params) trans.sa_session.add(job) trans.sa_session.flush() return output
def index(self, trans, library_id, **kwd): """ index( self, trans, library_id, **kwd ) * GET /api/libraries/{library_id}/contents: Returns a list of library files and folders. .. note:: May be slow! Returns all content traversing recursively through all folders. .. seealso:: :class:`galaxy.webapps.galaxy.api.FolderContentsController.index` for a non-recursive solution :param library_id: the encoded id of the library :type library_id: str :returns: list of dictionaries of the form: * id: the encoded id of the library item * name: the 'library path' or relationship of the library item to the root * type: 'file' or 'folder' * url: the url to get detailed information on the library item :rtype: list :raises: MalformedId, InconsistentDatabase, RequestParameterInvalidException, InternalServerError """ rval = [] current_user_roles = trans.get_current_user_roles() def traverse(folder): admin = trans.user_is_admin rval = [] for subfolder in folder.active_folders: if not admin: can_access, folder_ids = trans.app.security_agent.check_folder_contents( trans.user, current_user_roles, subfolder) if (admin or can_access) and not subfolder.deleted: subfolder.api_path = folder.api_path + '/' + subfolder.name subfolder.api_type = 'folder' rval.append(subfolder) rval.extend(traverse(subfolder)) for ld in folder.datasets: if not admin: can_access = trans.app.security_agent.can_access_dataset( current_user_roles, ld.library_dataset_dataset_association.dataset) if (admin or can_access) and not ld.deleted: ld.api_path = folder.api_path + '/' + ld.name ld.api_type = 'file' rval.append(ld) return rval try: decoded_library_id = self.decode_id(library_id) except Exception: raise exceptions.MalformedId( 'Malformed library id ( %s ) specified, unable to decode.' % library_id) try: library = trans.sa_session.query(trans.app.model.Library).filter( trans.app.model.Library.table.c.id == decoded_library_id).one() except MultipleResultsFound: raise exceptions.InconsistentDatabase( 'Multiple libraries found with the same id.') except NoResultFound: raise exceptions.RequestParameterInvalidException( 'No library found with the id provided.') except Exception as e: raise exceptions.InternalServerError( 'Error loading from the database.' + util.unicodify(e)) if not (trans.user_is_admin or trans.app.security_agent.can_access_library( current_user_roles, library)): raise exceptions.RequestParameterInvalidException( 'No library found with the id provided.') encoded_id = 'F' + trans.security.encode_id(library.root_folder.id) # appending root folder rval.append( dict(id=encoded_id, type='folder', name='/', url=url_for('library_content', library_id=library_id, id=encoded_id))) library.root_folder.api_path = '' # appending all other items in the library recursively for content in traverse(library.root_folder): encoded_id = trans.security.encode_id(content.id) if content.api_type == 'folder': encoded_id = 'F' + encoded_id rval.append( dict(id=encoded_id, type=content.api_type, name=content.api_path, url=url_for( 'library_content', library_id=library_id, id=encoded_id, ))) return rval
def _get_drmaa_state_qacct(self, job_id, extinfo): ''' get the job (drmaa) state with qacct. extinfo: dict where signal, exit_status, deleted = True, time_wasted, and memory_wasted can be stored: - signal signal as reported in exit state from qstat (see below) - exit_status set to exit status if returned (ie if qstat returns an exits state larger 0 and less 129 (for exit states > 128 signal is set) in any case (exit state > 0) state FAILED is returned - deleted set to true if the job was deleted (otherwise not set at all), - time_wasted time used in seconds (taken from wallclock) - memory_wasted memory used by the program in byte (taken from maxvmem) return state - first initalised with UNDETERMINED and changed in the following case - DONE if exit state == 0 - FAILED if exit state != 0 - RUNNING if failed in 24,25 - FAILED if failed not in [0,24,25,100] ''' # log.debug("UnivaJobRunner._get_drmaa_state_qacct ({jobid})".format(jobid=job_id)) signals = {k: v for v, k in reversed(sorted(signal.__dict__.items())) if v.startswith('SIG') and not v.startswith('SIG_')} cmd = ['qacct', '-j', job_id] slp = 1 # run qacct -j JOBID (since the accounting data for the job might not be # available immediately a simple retry mechanism is implemented .. # max wait is approx 1min) while True: try: stdout = commands.execute(cmd).strip() except commands.CommandLineException as e: if slp <= 32 and f"job id {job_id} not found" in e.stderr: time.sleep(slp) slp *= 2 continue else: log.error(unicodify(e)) return self.drmaa.JobState.UNDETERMINED else: break qacct = dict() for line in stdout.split("\n"): # remove header if line.startswith("=") or line == "": continue line = line.split() qacct[line[0]] = " ".join(line[1:]) # qacct has three fields of interest: failed, exit_status, deleted_by # experiments # failed exit_status deleted_by # BASH ------------------------------------ # time-limit 100 137 # mem-limit 0 2 # python -------------------------------------------------------------- # time-limit # mem-limit 0 1 # C ------------------------------------------------------------------- # time-limit # mem-limit 0 C programm either have segfault (139) or allocated memory is checked for NULL (then a programmer defined message/exit code is given) # note that max_vmem might not be reliable, since the program never gets the memory. # C++ ----------------------------------------------------------------- # time-limit # mem-limit 0 same as for C programs # JAVA ---------------------------------------------------------------- # time-limit # mem-limit # perl ---------------------------------------------------------------- # time-limit # mem-limit # bash other tests ---------------------------------------------------- # qdel 100 137 user@mail extinfo["time_wasted"] = _parse_time(qacct["wallclock"]) extinfo["memory_wasted"] = size_to_bytes(qacct["maxvmem"]) extinfo["slots"] = int(qacct["slots"]) # deleted_by # If the job (the array task) has been deleted via qdel, "<username>@<hostname>", else # "NONE". If qdel was called multiple times, every invocation is recorded in a comma # separated list. if "deleted_by" in qacct and qacct["deleted_by"] != "NONE": log.info(f"DRMAAUniva: job {job_id} was aborted by {qacct['deleted_by']}") extinfo["deleted"] = True return self.drmaa.JobState.FAILED state = self.drmaa.JobState.UNDETERMINED # exit_status # Exit status of the job script (or Univa Grid Engine specific status in case of certain # error conditions). The exit status is determined by following the normal shell conventions # If the command terminates normally the value of the command is its exit status. # However, in the case that the command exits abnormally, a value of 0200 (octal), 128 # (decimal) is added to the value of the command to make up the exit status. # For example: If a job dies through signal 9 (SIGKILL) then the exit status # becomes 128 + 9 = 137. if "exit_status" in qacct: qacct["exit_status"] = int(qacct["exit_status"]) if qacct["exit_status"] < 1: log.error(f"DRMAAUniva: job {job_id} has exit status {qacct['exit_status']}") state = self.drmaa.JobState.DONE elif 0 < qacct["exit_status"] < 129: log.error(f"DRMAAUniva: job {job_id} has exit status {qacct['exit_status']}") extinfo['exit_status'] = qacct["exit_status"] state = self.drmaa.JobState.FAILED else: log.error(f"DRMAAUniva: job {job_id} was killed by signal {qacct['exit_status'] - 128}") state = self.drmaa.JobState.FAILED extinfo["signal"] = signals[qacct["exit_status"] - 128] # failed # Indicates the problem which occurred in case a job could not be started on the execution # host (e.g. because the owner of the job did not have a valid account on that # machine). If Univa Grid Engine tries to start a job multiple times, this may lead to # multiple entries in the accounting file corresponding to the same job ID. # for the codes see https://docs.oracle.com/cd/E19957-01/820-0699/chp11-2/index.html if "failed" in qacct: code = int(qacct["failed"].split()[0]) # this happens in case of no error or exit_code!=0 (0) or a signal (100). # both cases are covered already if code in [0, 100]: pass # these seem to be OK as well elif code in [24, 25]: state = self.drmaa.JobState.RUNNING else: log.error(f"DRMAAUniva: job {job_id} failed with failure {qacct['failed']}") state = self.drmaa.JobState.FAILED # log.debug("UnivaJobRunner._get_drmaa_state_qacct ({jobid}) -> {state}".format(jobid=job_id, state=self.drmaa_job_state_strings[state])) return state
def delete(self, trans, library_id, id, **kwd): """ delete( self, trans, library_id, id, **kwd ) * DELETE /api/libraries/{library_id}/contents/{id} delete the LibraryDataset with the given ``id`` :type id: str :param id: the encoded id of the library dataset to delete :type kwd: dict :param kwd: (optional) dictionary structure containing: * payload: a dictionary itself containing: * purge: if True, purge the LD :rtype: dict :returns: an error object if an error occurred or a dictionary containing: * id: the encoded id of the library dataset, * deleted: if the library dataset was marked as deleted, * purged: if the library dataset was purged """ # a request body is optional here purge = False if kwd.get('payload', None): purge = util.string_as_bool(kwd['payload'].get('purge', False)) rval = {'id': id} try: ld = self.get_library_dataset(trans, id, check_ownership=False, check_accessible=True) user_is_admin = trans.user_is_admin can_modify = trans.app.security_agent.can_modify_library_item( trans.user.all_roles(), ld) log.debug('is_admin: %s, can_modify: %s', user_is_admin, can_modify) if not (user_is_admin or can_modify): trans.response.status = 403 rval.update({ 'error': 'Unauthorized to delete or purge this library dataset' }) return rval ld.deleted = True if purge: ld.purged = True trans.sa_session.add(ld) trans.sa_session.flush() # TODO: had to change this up a bit from Dataset.user_can_purge dataset = ld.library_dataset_dataset_association.dataset no_history_assoc = len(dataset.history_associations) == len( dataset.purged_history_associations) no_library_assoc = dataset.library_associations == [ ld.library_dataset_dataset_association ] can_purge_dataset = not dataset.purged and no_history_assoc and no_library_assoc if can_purge_dataset: try: ld.library_dataset_dataset_association.dataset.full_delete( ) trans.sa_session.add(ld.dataset) except Exception: pass # flush now to preserve deleted state in case of later interruption trans.sa_session.flush() rval['purged'] = True trans.sa_session.flush() rval['deleted'] = True except exceptions.httpexceptions.HTTPInternalServerError: log.exception( 'Library_contents API, delete: uncaught HTTPInternalServerError: %s, %s', id, str(kwd)) raise except exceptions.httpexceptions.HTTPException: raise except Exception as exc: log.exception( 'library_contents API, delete: uncaught exception: %s, %s', id, str(kwd)) trans.response.status = 500 rval.update({'error': util.unicodify(exc)}) return rval
def get_value(self, trans, grid, tool_shed_repository): return util.unicodify(tool_shed_repository.description)
def verify_tool(tool_id, galaxy_interactor, resource_parameters=None, register_job_data=None, test_index=0, tool_version=None, quiet=False, test_history=None, force_path_paste=False, maxseconds=DEFAULT_TOOL_TEST_WAIT, tool_test_dicts=None): if resource_parameters is None: resource_parameters = {} tool_test_dicts = tool_test_dicts or galaxy_interactor.get_tool_tests(tool_id, tool_version=tool_version) tool_test_dict = tool_test_dicts[test_index] tool_test_dict.setdefault('maxseconds', maxseconds) testdef = ToolTestDescription(tool_test_dict) _handle_def_errors(testdef) if test_history is None: test_history = galaxy_interactor.new_history() stage_data_in_history(galaxy_interactor, tool_id, testdef.test_data(), history=test_history, force_path_paste=force_path_paste, maxseconds=maxseconds) # Once data is ready, run the tool and check the outputs - record API # input, job info, tool run exception, as well as exceptions related to # job output checking and register they with the test plugin so it can # record structured information. tool_inputs = None job_stdio = None job_output_exceptions = None tool_execution_exception = None expected_failure_occurred = False begin_time = time.time() try: try: tool_response = galaxy_interactor.run_tool(testdef, test_history, resource_parameters=resource_parameters) data_list, jobs, tool_inputs = tool_response.outputs, tool_response.jobs, tool_response.inputs data_collection_list = tool_response.output_collections except RunToolException as e: tool_inputs = e.inputs tool_execution_exception = e if not testdef.expect_failure: raise e else: expected_failure_occurred = True except Exception as e: tool_execution_exception = e raise e if not expected_failure_occurred: assert data_list or data_collection_list try: job_stdio = _verify_outputs(testdef, test_history, jobs, tool_id, data_list, data_collection_list, galaxy_interactor, quiet=quiet) except JobOutputsError as e: job_stdio = e.job_stdio job_output_exceptions = e.output_exceptions raise e except Exception as e: job_output_exceptions = [e] raise e finally: if register_job_data is not None: end_time = time.time() job_data = { "tool_id": tool_id, "tool_version": tool_version, "test_index": test_index, "time_seconds": end_time - begin_time, } if tool_inputs is not None: job_data["inputs"] = tool_inputs if job_stdio is not None: job_data["job"] = job_stdio status = "success" if job_output_exceptions: job_data["output_problems"] = [util.unicodify(_) for _ in job_output_exceptions] status = "failure" if tool_execution_exception: job_data["execution_problem"] = util.unicodify(tool_execution_exception) status = "error" job_data["status"] = status register_job_data(job_data) galaxy_interactor.delete_history(test_history)
def get_html(self, prefix="", disabled=False): return unicodify('<textarea name="%s%s" rows="%d" cols="%d"%s>%s</textarea>' % (prefix, self.name, self.rows, self.cols, self.get_disabled_str(disabled), escape(str(self.value), quote=True)))
def _verify_outputs(testdef, history, jobs, tool_id, data_list, data_collection_list, galaxy_interactor, quiet=False): assert len(jobs) == 1, "Test framework logic error, somehow tool test resulted in more than one job." job = jobs[0] maxseconds = testdef.maxseconds if testdef.num_outputs is not None: expected = testdef.num_outputs actual = len(data_list) + len(data_collection_list) if expected != actual: message_template = "Incorrect number of outputs - expected %d, found %s." message = message_template % (expected, actual) raise Exception(message) found_exceptions = [] def register_exception(e): if not found_exceptions and not quiet: # Only print this stuff out once. for stream in ['stdout', 'stderr']: if stream in job_stdio: print(_format_stream(job_stdio[stream], stream=stream, format=True), file=sys.stderr) found_exceptions.append(e) if testdef.expect_failure: if testdef.outputs: raise Exception("Cannot specify outputs in a test expecting failure.") # Wait for the job to complete and register expections if the final # status was not what test was expecting. job_failed = False try: galaxy_interactor.wait_for_job(job['id'], history, maxseconds) except Exception as e: job_failed = True if not testdef.expect_failure: found_exceptions.append(e) job_stdio = galaxy_interactor.get_job_stdio(job['id']) if not job_failed and testdef.expect_failure: error = AssertionError("Expected job to fail but Galaxy indicated the job successfully completed.") register_exception(error) expect_exit_code = testdef.expect_exit_code if expect_exit_code is not None: exit_code = job_stdio["exit_code"] if str(expect_exit_code) != str(exit_code): error = AssertionError("Expected job to complete with exit code %s, found %s" % (expect_exit_code, exit_code)) register_exception(error) for output_index, output_dict in enumerate(testdef.outputs): # Get the correct hid name = output_dict["name"] outfile = output_dict["value"] attributes = output_dict["attributes"] output_testdef = Bunch(name=name, outfile=outfile, attributes=attributes) try: output_data = data_list[name] except (TypeError, KeyError): # Legacy - fall back on ordered data list access if data_list is # just a list (case with twill variant or if output changes its # name). if hasattr(data_list, "values"): output_data = list(data_list.values())[output_index] else: output_data = data_list[len(data_list) - len(testdef.outputs) + output_index] assert output_data is not None try: galaxy_interactor.verify_output(history, jobs, output_data, output_testdef=output_testdef, tool_id=tool_id, maxseconds=maxseconds) except Exception as e: register_exception(e) other_checks = { "command_line": "Command produced by the job", "stdout": "Standard output of the job", "stderr": "Standard error of the job", } # TODO: Only hack the stdio like this for older profile, for newer tool profiles # add some syntax for asserting job messages maybe - or just drop this because exit # code and regex on stdio can be tested directly - so this is really testing Galaxy # core handling more than the tool. job_messages = job_stdio.get("job_messages") or [] stdout_prefix = "" stderr_prefix = "" for job_message in job_messages: message_type = job_message.get("type") if message_type == "regex" and job_message.get("stream") == "stderr": stderr_prefix += (job_message.get("desc") or '') + "\n" elif message_type == "regex" and job_message.get("stream") == "stdout": stdout_prefix += (job_message.get("desc") or '') + "\n" elif message_type == "exit_code": stderr_prefix += (job_message.get("desc") or '') + "\n" else: raise Exception("Unknown job message type [%s] in [%s]" % (message_type, job_message)) for what, description in other_checks.items(): if getattr(testdef, what, None) is not None: try: raw_data = job_stdio[what] assertions = getattr(testdef, what) if what == "stdout": data = stdout_prefix + raw_data elif what == "stderr": data = stderr_prefix + raw_data else: data = raw_data verify_assertions(data, assertions) except AssertionError as err: errmsg = '%s different than expected\n' % description errmsg += util.unicodify(err) register_exception(AssertionError(errmsg)) for output_collection_def in testdef.output_collections: try: name = output_collection_def.name # TODO: data_collection_list is clearly a bad name for dictionary. if name not in data_collection_list: template = "Failed to find output [%s], tool outputs include [%s]" message = template % (name, ",".join(data_collection_list.keys())) raise AssertionError(message) # Data collection returned from submission, elements may have been populated after # the job completed so re-hit the API for more information. data_collection_returned = data_collection_list[name] data_collection = galaxy_interactor._get("dataset_collections/%s" % data_collection_returned["id"], data={"instance_type": "history"}).json() def get_element(elements, id): for element in elements: if element["element_identifier"] == id: return element return False expected_collection_type = output_collection_def.collection_type if expected_collection_type: collection_type = data_collection["collection_type"] if expected_collection_type != collection_type: template = "Expected output collection [%s] to be of type [%s], was of type [%s]." message = template % (name, expected_collection_type, collection_type) raise AssertionError(message) expected_element_count = output_collection_def.count if expected_element_count: actual_element_count = len(data_collection["elements"]) if expected_element_count != actual_element_count: template = "Expected output collection [%s] to have %s elements, but it had %s." message = template % (name, expected_element_count, actual_element_count) raise AssertionError(message) def verify_elements(element_objects, element_tests): for element_identifier, (element_outfile, element_attrib) in element_tests.items(): element = get_element(element_objects, element_identifier) if not element: template = "Failed to find identifier [%s] for testing, tool generated collection elements [%s]" message = template % (element_identifier, element_objects) raise AssertionError(message) element_type = element["element_type"] if element_type != "dataset_collection": hda = element["object"] galaxy_interactor.verify_output_dataset( history, hda_id=hda["id"], outfile=element_outfile, attributes=element_attrib, tool_id=tool_id ) if element_type == "dataset_collection": elements = element["object"]["elements"] verify_elements(elements, element_attrib.get("elements", {})) verify_elements(data_collection["elements"], output_collection_def.element_tests) except Exception as e: register_exception(e) if found_exceptions: raise JobOutputsError(found_exceptions, job_stdio) else: return job_stdio
def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except chronos_exceptions as e: LOGGER.error(unicodify(e))
def add_file(dataset, registry, output_path): ext = None compression_type = None line_count = None link_data_only_str = dataset.get('link_data_only', 'copy_files') if link_data_only_str not in ['link_to_files', 'copy_files']: raise UploadProblemException( "Invalid setting '%s' for option link_data_only - upload request misconfigured" % link_data_only_str) link_data_only = link_data_only_str == 'link_to_files' # run_as_real_user is estimated from galaxy config (external chmod indicated of inputs executed) # If this is True we always purge supplied upload inputs so they are cleaned up and we reuse their # paths during data conversions since this user already owns that path. # Older in_place check for upload jobs created before 18.01, TODO remove in 19.XX. xref #5206 run_as_real_user = dataset.get('run_as_real_user', False) or dataset.get( "in_place", False) # purge_source defaults to True unless this is an FTP import and # ftp_upload_purge has been overridden to False in Galaxy's config. # We set purge_source to False if: # - the job does not have write access to the file, e.g. when running as the # real user # - the files are uploaded from external paths. purge_source = dataset.get( 'purge_source', True) and not run_as_real_user and dataset.type not in ('server_dir', 'path_paste') # in_place is True unless we are running as a real user or importing external paths (i.e. # this is a real upload and not a path paste or ftp import). # in_place should always be False if running as real user because the uploaded file will # be owned by Galaxy and not the user and it should be False for external paths so Galaxy doesn't # modify files not controlled by Galaxy. in_place = not run_as_real_user and dataset.type not in ('server_dir', 'path_paste', 'ftp_import') # Base on the check_upload_content Galaxy config option and on by default, this enables some # security related checks on the uploaded content, but can prevent uploads from working in some cases. check_content = dataset.get('check_content', True) # auto_decompress is a request flag that can be swapped off to prevent Galaxy from automatically # decompressing archive files before sniffing. auto_decompress = dataset.get('auto_decompress', True) try: dataset.file_type except AttributeError: raise UploadProblemException( 'Unable to process uploaded file, missing file_type parameter.') if dataset.type == 'url': try: dataset.path = sniff.stream_url_to_file( dataset.path, file_sources=get_file_sources()) except Exception as e: raise UploadProblemException('Unable to fetch %s\n%s' % (dataset.path, unicodify(e))) # See if we have an empty file if not os.path.exists(dataset.path): raise UploadProblemException( 'Uploaded temporary file (%s) does not exist.' % dataset.path) stdout, ext, datatype, is_binary, converted_path = handle_upload( registry=registry, path=dataset.path, requested_ext=dataset.file_type, name=dataset.name, tmp_prefix='data_id_%s_upload_' % dataset.dataset_id, tmp_dir=output_adjacent_tmpdir(output_path), check_content=check_content, link_data_only=link_data_only, in_place=in_place, auto_decompress=auto_decompress, convert_to_posix_lines=dataset.to_posix_lines, convert_spaces_to_tabs=dataset.space_to_tab, ) # Strip compression extension from name if compression_type and not getattr( datatype, 'compressed', False) and dataset.name.endswith('.' + compression_type): dataset.name = dataset.name[:-len('.' + compression_type)] # Move dataset if link_data_only: # Never alter a file that will not be copied to Galaxy's local file store. if datatype.dataset_content_needs_grooming(dataset.path): err_msg = 'The uploaded files need grooming, so change your <b>Copy data into Galaxy?</b> selection to be ' + \ '<b>Copy files into Galaxy</b> instead of <b>Link to files without copying into Galaxy</b> so grooming can be performed.' raise UploadProblemException(err_msg) if not link_data_only: # Move the dataset to its "real" path. converted_path is a tempfile so we move it even if purge_source is False. if purge_source or converted_path: try: # If user has indicated that the original file to be purged and have converted_path tempfile if purge_source and converted_path: shutil.move(converted_path, output_path) os.remove(dataset.path) else: shutil.move(converted_path or dataset.path, output_path) except OSError as e: # We may not have permission to remove the input if e.errno != errno.EACCES: raise else: shutil.copy(dataset.path, output_path) # Write the job info stdout = stdout or 'uploaded %s file' % ext info = dict(type='dataset', dataset_id=dataset.dataset_id, ext=ext, stdout=stdout, name=dataset.name, line_count=line_count) if dataset.get('uuid', None) is not None: info['uuid'] = dataset.get('uuid') # FIXME: does this belong here? also not output-adjacent-tmpdir aware =/ if not link_data_only and datatype and datatype.dataset_content_needs_grooming( output_path): # Groom the dataset content if necessary datatype.groom_dataset_content(output_path) return info
def get_html(self, prefix=""): return unicodify('<input type="hidden" name="%s%s" value="%s">' % (prefix, self.name, escape(str(self.value), quote=True)))
def get_file_peek(file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5, skipchars=None, line_wrap=True): """ Returns the first LINE_COUNT lines wrapped to WIDTH >>> fname = get_test_fname('4.bed') >>> get_file_peek(fname, LINE_COUNT=1) u'chr22\\t30128507\\t31828507\\tuc003bnx.1_cds_2_0_chr22_29227_f\\t0\\t+\\n' """ # Set size for file.readline() to a negative number to force it to # read until either a newline or EOF. Needed for datasets with very # long lines. if WIDTH == 'unlimited': WIDTH = -1 if skipchars is None: skipchars = [] lines = [] count = 0 file_type = None data_checked = False temp = compression_utils.get_fileobj(file_name, "U") try: while count < LINE_COUNT: line = temp.readline(WIDTH) if line and not is_multi_byte and not data_checked: # See if we have a compressed or binary file for char in line: if ord(char) > 128: file_type = 'binary' break data_checked = True if file_type == 'binary': break if not line_wrap: if line.endswith('\n'): line = line[:-1] else: while True: i = temp.read(1) if not i or i == '\n': break skip_line = False for skipchar in skipchars: if line.startswith(skipchar): skip_line = True break if not skip_line: lines.append(line) count += 1 finally: temp.close() if file_type == 'binary': text = "%s file" % file_type else: try: text = util.unicodify('\n'.join(lines)) except UnicodeDecodeError: text = "binary/unknown file" return text
def upload(self, trans, **kwd): message = escape(kwd.get('message', '')) status = kwd.get('status', 'done') commit_message = escape(kwd.get('commit_message', 'Uploaded')) repository_id = kwd.get('repository_id', '') repository = repository_util.get_repository_in_tool_shed( trans.app, repository_id) repo_dir = repository.repo_path(trans.app) uncompress_file = util.string_as_bool( kwd.get('uncompress_file', 'true')) remove_repo_files_not_in_tar = util.string_as_bool( kwd.get('remove_repo_files_not_in_tar', 'true')) uploaded_file = None upload_point = commit_util.get_upload_point(repository, **kwd) tip = repository.tip() file_data = kwd.get('file_data', '') url = kwd.get('url', '') # Part of the upload process is sending email notification to those that have registered to # receive them. One scenario occurs when the first change set is produced for the repository. # See the suc.handle_email_alerts() method for the definition of the scenarios. new_repo_alert = repository.is_new() uploaded_directory = None if kwd.get('upload_button', False): if file_data == '' and url == '': message = 'No files were entered on the upload form.' status = 'error' uploaded_file = None elif url and url.startswith('hg'): # Use mercurial clone to fetch repository, contents will then be copied over. uploaded_directory = tempfile.mkdtemp() repo_url = 'http%s' % url[len('hg'):] cloned_ok, error_message = hg_util.clone_repository( repo_url, uploaded_directory) if not cloned_ok: message = 'Error uploading via mercurial clone: %s' % error_message status = 'error' basic_util.remove_dir(uploaded_directory) uploaded_directory = None elif url: valid_url = True try: stream = requests.get(url, stream=True) except Exception as e: valid_url = False message = 'Error uploading file via http: %s' % util.unicodify( e) status = 'error' uploaded_file = None if valid_url: fd, uploaded_file_name = tempfile.mkstemp() uploaded_file = open(uploaded_file_name, 'wb') for chunk in stream.iter_content( chunk_size=util.CHUNK_SIZE): if chunk: uploaded_file.write(chunk) uploaded_file.flush() uploaded_file_filename = url.split('/')[-1] isempty = os.path.getsize( os.path.abspath(uploaded_file_name)) == 0 elif file_data not in ('', None): uploaded_file = file_data.file uploaded_file_name = uploaded_file.name uploaded_file_filename = os.path.split(file_data.filename)[-1] isempty = os.path.getsize( os.path.abspath(uploaded_file_name)) == 0 if uploaded_file or uploaded_directory: rdah = attribute_handlers.RepositoryDependencyAttributeHandler( trans.app, unpopulate=False) tdah = attribute_handlers.ToolDependencyAttributeHandler( trans.app, unpopulate=False) stdtm = ShedToolDataTableManager(trans.app) ok = True isgzip = False isbz2 = False if uploaded_file: if uncompress_file: isgzip = checkers.is_gzip(uploaded_file_name) if not isgzip: isbz2 = checkers.is_bz2(uploaded_file_name) if isempty: tar = None istar = False else: # Determine what we have - a single file or an archive try: if (isgzip or isbz2) and uncompress_file: # Open for reading with transparent compression. tar = tarfile.open(uploaded_file_name, 'r:*') else: tar = tarfile.open(uploaded_file_name) istar = True except tarfile.ReadError: tar = None istar = False else: # Uploaded directory istar = False if istar: ok, message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \ repository_content_util.upload_tar( trans, rdah, tdah, repository, tar, uploaded_file, upload_point, remove_repo_files_not_in_tar, commit_message, new_repo_alert ) elif uploaded_directory: ok, message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \ self.upload_directory(trans, rdah, tdah, repository, uploaded_directory, upload_point, remove_repo_files_not_in_tar, commit_message, new_repo_alert) else: if (isgzip or isbz2) and uncompress_file: uploaded_file_filename = commit_util.uncompress( repository, uploaded_file_name, uploaded_file_filename, isgzip=isgzip, isbz2=isbz2) if repository.type == rt_util.REPOSITORY_SUITE_DEFINITION and \ uploaded_file_filename != rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME: ok = False message = 'Repositories of type <b>Repository suite definition</b> can only contain a single file named ' message += '<b>repository_dependencies.xml</b>.' elif repository.type == rt_util.TOOL_DEPENDENCY_DEFINITION and \ uploaded_file_filename != rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME: ok = False message = 'Repositories of type <b>Tool dependency definition</b> can only contain a single file named ' message += '<b>tool_dependencies.xml</b>.' if ok: if upload_point is not None: full_path = os.path.abspath( os.path.join(repo_dir, upload_point, uploaded_file_filename)) else: full_path = os.path.abspath( os.path.join(repo_dir, uploaded_file_filename)) # Move some version of the uploaded file to the load_point within the repository hierarchy. if uploaded_file_filename in [ rt_util. REPOSITORY_DEPENDENCY_DEFINITION_FILENAME ]: # Inspect the contents of the file to see if toolshed or changeset_revision attributes # are missing and if so, set them appropriately. altered, root_elem, error_message = rdah.handle_tag_attributes( uploaded_file_name) if error_message: ok = False message = error_message status = 'error' elif altered: tmp_filename = xml_util.create_and_write_tmp_file( root_elem) shutil.move(tmp_filename, full_path) else: shutil.move(uploaded_file_name, full_path) elif uploaded_file_filename in [ rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME ]: # Inspect the contents of the file to see if changeset_revision values are # missing and if so, set them appropriately. altered, root_elem, error_message = tdah.handle_tag_attributes( uploaded_file_name) if error_message: ok = False message = error_message status = 'error' if ok: if altered: tmp_filename = xml_util.create_and_write_tmp_file( root_elem) shutil.move(tmp_filename, full_path) else: shutil.move(uploaded_file_name, full_path) else: shutil.move(uploaded_file_name, full_path) if ok: # See if any admin users have chosen to receive email alerts when a repository is updated. # If so, check every uploaded file to ensure content is appropriate. check_contents = commit_util.check_file_contents_for_email_alerts( trans.app) if check_contents and os.path.isfile(full_path): content_alert_str = commit_util.check_file_content_for_html_and_images( full_path) else: content_alert_str = '' hg_util.add_changeset(repo_dir, full_path) hg_util.commit_changeset( repo_dir, full_path_to_changeset=full_path, username=trans.user.username, message=commit_message) if full_path.endswith( 'tool_data_table_conf.xml.sample'): # Handle the special case where a tool_data_table_conf.xml.sample file is being uploaded # by parsing the file and adding new entries to the in-memory trans.app.tool_data_tables # dictionary. error, error_message = stdtm.handle_sample_tool_data_table_conf_file( full_path, persist=False) if error: message = '%s<br/>%s' % (message, error_message) # See if the content of the change set was valid. admin_only = len( repository.downloadable_revisions) != 1 suc.handle_email_alerts( trans.app, trans.request.host, repository, content_alert_str=content_alert_str, new_repo_alert=new_repo_alert, admin_only=admin_only) if ok: # Update the repository files for browsing. hg_util.update_repository(repo_dir) # Get the new repository tip. if tip == repository.tip(): message = 'No changes to repository. ' status = 'warning' else: if (isgzip or isbz2) and uncompress_file: uncompress_str = ' uncompressed and ' else: uncompress_str = ' ' if uploaded_directory: source_type = "repository" source = url else: source_type = "file" source = uploaded_file_filename message = "The %s <b>%s</b> has been successfully%suploaded to the repository. " % \ (source_type, escape(source), uncompress_str) if istar and (undesirable_dirs_removed or undesirable_files_removed): items_removed = undesirable_dirs_removed + undesirable_files_removed message += " %d undesirable items (.hg .svn .git directories, .DS_Store, hgrc files, etc) " % items_removed message += "were removed from the archive. " if istar and remove_repo_files_not_in_tar and files_to_remove: if upload_point is not None: message += " %d files were removed from the repository relative to the selected upload point '%s'. " % \ (len(files_to_remove), upload_point) else: message += " %d files were removed from the repository root. " % len( files_to_remove) rmm = repository_metadata_manager.RepositoryMetadataManager( app=trans.app, user=trans.user, repository=repository) status, error_message = \ rmm.set_repository_metadata_due_to_new_tip(trans.request.host, content_alert_str=content_alert_str, **kwd) if error_message: message = error_message kwd['message'] = message if repository.metadata_revisions: # A repository's metadata revisions are order descending by update_time, so the zeroth revision # will be the tip just after an upload. metadata_dict = repository.metadata_revisions[ 0].metadata else: metadata_dict = {} dd = dependency_display.DependencyDisplayer(trans.app) if str(repository.type) not in [ rt_util.REPOSITORY_SUITE_DEFINITION, rt_util.TOOL_DEPENDENCY_DEFINITION ]: change_repository_type_message = rt_util.generate_message_for_repository_type_change( trans.app, repository) if change_repository_type_message: message += change_repository_type_message status = 'warning' else: # Provide a warning message if a tool_dependencies.xml file is provided, but tool dependencies # weren't loaded due to a requirement tag mismatch or some other problem. Tool dependency # definitions can define orphan tool dependencies (no relationship to any tools contained in the # repository), so warning messages are important because orphans are always valid. The repository # owner must be warned in case they did not intend to define an orphan dependency, but simply # provided incorrect information (tool shed, name owner, changeset_revision) for the definition. orphan_message = dd.generate_message_for_orphan_tool_dependencies( repository, metadata_dict) if orphan_message: message += orphan_message status = 'warning' # Handle messaging for invalid tool dependencies. invalid_tool_dependencies_message = dd.generate_message_for_invalid_tool_dependencies( metadata_dict) if invalid_tool_dependencies_message: message += invalid_tool_dependencies_message status = 'error' # Handle messaging for invalid repository dependencies. invalid_repository_dependencies_message = \ dd.generate_message_for_invalid_repository_dependencies(metadata_dict, error_from_tuple=True) if invalid_repository_dependencies_message: message += invalid_repository_dependencies_message status = 'error' # Reset the tool_data_tables by loading the empty tool_data_table_conf.xml file. stdtm.reset_tool_data_tables() if uploaded_directory: basic_util.remove_dir(uploaded_directory) trans.response.send_redirect( web.url_for(controller='repository', action='browse_repository', id=repository_id, commit_message='Deleted selected files', message=message, status=status)) else: if uploaded_directory: basic_util.remove_dir(uploaded_directory) status = 'error' # Reset the tool_data_tables by loading the empty tool_data_table_conf.xml file. stdtm.reset_tool_data_tables() return trans.fill_template( '/webapps/tool_shed/repository/upload.mako', repository=repository, changeset_revision=tip, url=url, commit_message=commit_message, uncompress_file=uncompress_file, remove_repo_files_not_in_tar=remove_repo_files_not_in_tar, message=message, status=status)
def display_name(self, dataset): """Returns formatted html of dataset name""" try: return escape(unicodify(dataset.name, 'utf-8')) except Exception: return "name unavailable"
def get_html(self, prefix="", disabled=False): return unicodify('<input type="password" name="%s%s" size="%d" value="%s"%s>' % (prefix, self.name, self.size, escape(str(self.value), quote=True), self.get_disabled_str(disabled)))
def validate_url(url, ip_whitelist): # If it doesn't look like a URL, ignore it. if not (url.lstrip().startswith('http://') or url.lstrip().startswith('https://')): return url # Extract hostname component parsed_url = urlparse(url).netloc # If credentials are in this URL, we need to strip those. if parsed_url.count('@') > 0: # credentials. parsed_url = parsed_url[parsed_url.rindex('@') + 1:] # Percent encoded colons and other characters will not be resolved as such # so we don't have to either. # Sometimes the netloc will contain the port which is not desired, so we # need to extract that. port = None # However, it could ALSO be an IPv6 address they've supplied. if ':' in parsed_url: # IPv6 addresses have colons in them already (it seems like always more than two) if parsed_url.count(':') >= 2: # Since IPv6 already use colons extensively, they wrap it in # brackets when there is a port, e.g. http://[2001:db8:1f70::999:de8:7648:6e8]:100/ # However if it ends with a ']' then there is no port after it and # they've wrapped it in brackets just for fun. if ']' in parsed_url and not parsed_url.endswith(']'): # If this +1 throws a range error, we don't care, their url # shouldn't end with a colon. idx = parsed_url.rindex(':') # We parse as an int and let this fail ungracefully if parsing # fails because we desire to fail closed rather than open. port = int(parsed_url[idx + 1:]) parsed_url = parsed_url[:idx] else: # Plain ipv6 without port pass else: # This should finally be ipv4 with port. It cannot be IPv6 as that # was caught by earlier cases, and it cannot be due to credentials. idx = parsed_url.rindex(':') port = int(parsed_url[idx + 1:]) parsed_url = parsed_url[:idx] # safe to log out, no credentials/request path, just an IP + port log.debug("parsed url, port: %s : %s", parsed_url, port) # Call getaddrinfo to resolve hostname into tuples containing IPs. addrinfo = socket.getaddrinfo(parsed_url, port) # Get the IP addresses that this entry resolves to (uniquely) # We drop: # AF_* family: It will resolve to AF_INET or AF_INET6, getaddrinfo(3) doesn't even mention AF_UNIX, # socktype: We don't care if a stream/dgram/raw protocol # protocol: we don't care if it is tcp or udp. addrinfo_results = set([info[4][0] for info in addrinfo]) # There may be multiple (e.g. IPv4 + IPv6 or DNS round robin). Any one of these # could resolve to a local addresses (and could be returned by chance), # therefore we must check them all. for raw_ip in addrinfo_results: # Convert to an IP object so we can tell if it is in private space. ip = ipaddress.ip_address(unicodify(raw_ip)) # If this is a private address if ip.is_private: results = [] # If this IP is not anywhere in the whitelist for whitelisted in ip_whitelist: # If it's an IP address range (rather than a single one...) if hasattr(whitelisted, 'subnets'): results.append(ip in whitelisted) else: results.append(ip == whitelisted) if any(results): # If we had any True, then THIS (and ONLY THIS) IP address that # that specific DNS entry resolved to is in whitelisted and # safe to access. But we cannot exit here, we must ensure that # all IPs that that DNS entry resolves to are likewise safe. pass else: # Otherwise, we deny access. raise Exception( "Access to this address in not permitted by server configuration" ) return url