def pbkdf2_bin(data, salt, iterations=COST_FACTOR, keylen=KEY_LENGTH, hashfunc=HASH_FUNCTION): """Returns a binary digest for the PBKDF2 hash algorithm of `data` with the given `salt`. It iterates `iterations` time and produces a key of `keylen` bytes. By default SHA-256 is used as hash function, a different hashlib `hashfunc` can be provided. """ if hasattr(hashlib, 'pbkdf2_hmac'): # Use hashlib.pbkdf2_hmac, new in python 2.7.8 data = smart_str(data) salt = smart_str(salt) return hashlib.pbkdf2_hmac(hashfunc, data, salt, iterations, keylen) else: # Use the old implementation of hashing as a fallback hashfunc = getattr(hashlib, hashfunc, None) mac = hmac.new(smart_str(data), None, hashfunc) def _pseudorandom(x, mac=mac): h = mac.copy() h.update(x) digest = h.digest() return digest, [ord(_) for _ in digest] buf = [] salt = smart_str(salt) for block in range(1, -(-keylen // mac.digest_size) + 1): digest, rv = _pseudorandom(salt + _pack_int(block)) for _ in range(iterations - 1): digest, u = _pseudorandom(digest) rv = starmap(xor, zip(rv, u)) buf.extend(rv) return bytes(bytearray(buf))[:keylen]
def get_fingerprint(self): sha1 = hashlib.sha1() fmap = self.get_filesize_map(True) for k in sorted(fmap.keys()): sha1.update(util.smart_str(k)) sha1.update(util.smart_str(fmap[k])) return sha1.hexdigest()
def pbkdf2_bin(data, salt, iterations=1000, keylen=24, hashfunc=None): """Returns a binary digest for the PBKDF2 hash algorithm of `data` with the given `salt`. It iterates `iterations` time and produces a key of `keylen` bytes. By default SHA-1 is used as hash function, a different hashlib `hashfunc` can be provided. """ hashfunc = hashfunc or hashlib.sha1 mac = hmac.new(smart_str(data), None, hashfunc) def _pseudorandom(x, mac=mac): h = mac.copy() h.update(x) digest = h.digest() if six.PY2: return digest, [ord(_) for _ in digest] return digest, digest buf = [] salt = smart_str(salt) for block in range(1, -(-keylen // mac.digest_size) + 1): digest, rv = _pseudorandom(salt + _pack_int(block)) for _ in range(iterations - 1): digest, u = _pseudorandom(digest) rv = starmap(xor, zip(rv, u)) buf.extend(rv) return bytes(bytearray(buf))[:keylen]
def tool_shed_encode(val): if isinstance(val, dict) or isinstance(val, list): value = json.dumps(val) else: value = val a = hmac_new(b'ToolShedAndGalaxyMustHaveThisSameKey', smart_str(value)) b = unicodify(binascii.hexlify(smart_str(value))) return "%s:%s" % (a, b)
def _check_master_api_key(self, api_key): master_api_key = getattr(self.app.config, 'master_api_key', None) if not master_api_key: return False # Hash keys to make them the same size, so we can do safe comparison. master_hash = hashlib.sha256(smart_str(master_api_key)).hexdigest() provided_hash = hashlib.sha256(smart_str(api_key)).hexdigest() return safe_str_cmp(master_hash, provided_hash)
def pbkdf2_bin(data, salt, iterations=COST_FACTOR, keylen=KEY_LENGTH, hashfunc=HASH_FUNCTION): """Returns a binary digest for the PBKDF2 hash algorithm of `data` with the given `salt`. It iterates `iterations` time and produces a key of `keylen` bytes. By default SHA-256 is used as hash function, a different hashlib `hashfunc` can be provided. """ data = smart_str(data) salt = smart_str(salt) return hashlib.pbkdf2_hmac(hashfunc, data, salt, iterations, keylen)
def flatten( seq ): """ Flatten a possible nested set of iterables """ for x in seq: if isinstance( x, ( types.GeneratorType, list, tuple ) ): for y in flatten( x ): yield smart_str( y ) else: yield smart_str( x )
def flatten(seq): """ Flatten a possible nested set of iterables """ for x in seq: if isinstance(x, (types.GeneratorType, list, tuple)): for y in flatten(x): yield smart_str(y) else: yield smart_str(x)
def __create_jstree(self, directory, disable='folders', whitelist=None): """ Loads recursively all files and folders within the given folder and its subfolders and returns jstree representation of its structure. """ jstree_paths = [] if self.__safe_directory(directory, whitelist=whitelist): for (dirpath, dirnames, filenames) in safe_walk(directory, whitelist=whitelist): for dirname in dirnames: dir_path = os.path.relpath(os.path.join(dirpath, dirname), directory) dir_path_hash = hashlib.sha1( smart_str(dir_path)).hexdigest() disabled = True if disable == 'folders' else False jstree_paths.append( jstree.Path( dir_path, dir_path_hash, { 'type': 'folder', 'state': { 'disabled': disabled }, 'li_attr': { 'full_path': dir_path } })) for filename in filenames: file_path = os.path.relpath( os.path.join(dirpath, filename), directory) file_path_hash = hashlib.sha1( smart_str(file_path)).hexdigest() disabled = True if disable == 'files' else False jstree_paths.append( jstree.Path( file_path, file_path_hash, { 'type': 'file', 'state': { 'disabled': disabled }, 'li_attr': { 'full_path': file_path } })) else: raise exceptions.ConfigDoesNotAllowException( 'The given directory does not exist.') userdir_jstree = jstree.JSTree(jstree_paths) return userdir_jstree
def _decode_baseauth(self, encoded_str): """ Decode an encrypted HTTP basic authentication string. Returns a tuple of the form (email, password), and raises a HTTPBadRequest exception if nothing could be decoded. :param encoded_str: BaseAuth string encoded base64 :type encoded_str: string :returns: email of the user :rtype: string :returns: password of the user :rtype: string :raises: HTTPBadRequest """ try: split = encoded_str.strip().split(' ') except AttributeError: raise exceptions.RequestParameterInvalidException( 'Authentication is missing') # If split is only one element, try to decode the email and password # directly. if len(split) == 1: try: email, password = unicodify(b64decode(smart_str( split[0]))).split(':') except Exception as e: raise exceptions.ActionInputError(e) # If there are only two elements, check the first and ensure it says # 'basic' so that we know we're about to decode the right thing. If not, # bail out. elif len(split) == 2: if split[0].strip().lower() == 'basic': try: email, password = unicodify(b64decode(smart_str( split[1]))).split(':') except Exception: raise exceptions.ActionInputError() else: raise exceptions.ActionInputError() # If there are more than 2 elements, something crazy must be happening. # Bail. else: raise exceptions.ActionInputError() return unquote(email), unquote(password)
def check_password_PBKDF2(guess, hashed): # Split the database representation to extract cost_factor and salt name, hash_function, cost_factor, salt, encoded_original = hashed.split('$', 5) if six.PY3: guess = bytes(guess, 'utf-8') salt = bytes(salt, 'utf-8') else: guess = smart_str(guess) hashed_guess = pbkdf2_bin(guess, salt, int(cost_factor), KEY_LENGTH, getattr(hashlib, hash_function)) # Hash the guess using the same parameters hashed_guess = pbkdf2_bin(smart_str(guess), salt, int(cost_factor), KEY_LENGTH, getattr(hashlib, hash_function)) encoded_guess = b64encode(hashed_guess) if six.PY3: encoded_guess = encoded_guess.decode('utf-8') return safe_str_cmp(encoded_original, encoded_guess)
def md5(s): """ Return hex encoded md5 hash of string s """ m = hash_util.md5() m.update(smart_str(s)) return m.hexdigest()
def check_all_jobs(self): """ Returns a list of servers that failed to be contacted and a dict of "job_id : status" pairs (where status is a bunchified version of the API's structure. """ servers = [] failures = [] statuses = {} for pbs_job_state in self.watched: pbs_server_name = self.__get_pbs_server( pbs_job_state.job_destination.params) if pbs_server_name not in servers: servers.append(pbs_server_name) pbs_job_state.check_count += 1 for pbs_server_name in servers: c = pbs.pbs_connect(util.smart_str(pbs_server_name)) if c <= 0: log.debug( "connection to PBS server %s for state check failed" % pbs_server_name) failures.append(pbs_server_name) continue stat_attrl = pbs.new_attrl(3) stat_attrl[0].name = pbs.ATTR_state stat_attrl[1].name = pbs.ATTR_used stat_attrl[2].name = pbs.ATTR_exitstat jobs = pbs.pbs_statjob(c, None, stat_attrl, None) pbs.pbs_disconnect(c) statuses.update(self.convert_statjob_to_bunches(jobs)) return ((failures, statuses))
def process_batch_requests(self, batch_environ, start_response): """ Loops through any provided JSON formatted 'requests', aggregates their JSON responses, and wraps them in the batch call response. """ payload = self._read_post_payload(batch_environ) requests = payload.get('batch', []) responses = [] for request in requests: if not self._is_allowed_route(request['url']): responses.append( self._disallowed_route_response(request['url'])) continue request_environ = self._build_request_environ( batch_environ, request) response = self._process_batch_request(request, request_environ, start_response) responses.append(response) batch_response_body = smart_str(json.dumps(responses)) start_response('200 OK', [ ('Content-Length', str(len(batch_response_body))), ('Content-Type', 'application/json'), ]) return [batch_response_body]
def _urlopen(url, data=None): scheme = urlparse(url).scheme assert scheme in ('http', 'https', 'ftp'), 'Invalid URL scheme: %s' % scheme if data is not None: data = smart_str(data) return urlopen(Request(url, data))
def stop_job(self, job): """Attempts to delete a job from the PBS queue""" job_id = job.get_job_runner_external_id().encode('utf-8') job_tag = "(%s/%s)" % (job.get_id_tag(), job_id) log.debug("%s Stopping PBS job" % job_tag) # Declare the connection handle c so that it can be cleaned up: c = None try: pbs_server_name = self.__get_pbs_server(job.destination_params) if pbs_server_name is None: log.debug( "(%s) Job queued but no destination stored in job params, cannot delete" % job_tag) return c = pbs.pbs_connect(util.smart_str(pbs_server_name)) if c <= 0: log.debug( "(%s) Connection to PBS server for job delete failed" % job_tag) return pbs.pbs_deljob(c, job_id, '') log.debug("%s Removed from PBS queue before job completion" % job_tag) except: e = traceback.format_exc() log.debug("%s Unable to stop job: %s" % (job_tag, e)) finally: # Cleanup: disconnect from the server. if (None is not c): pbs.pbs_disconnect(c)
def check_all_jobs( self ): """ Returns a list of servers that failed to be contacted and a dict of "job_id : status" pairs (where status is a bunchified version of the API's structure. """ servers = [] failures = [] statuses = {} for pbs_job_state in self.watched: pbs_server_name = self.__get_pbs_server(pbs_job_state.job_destination.params) if pbs_server_name not in servers: servers.append( pbs_server_name ) pbs_job_state.check_count += 1 for pbs_server_name in servers: c = pbs.pbs_connect( util.smart_str( pbs_server_name ) ) if c <= 0: log.debug("connection to PBS server %s for state check failed" % pbs_server_name ) failures.append( pbs_server_name ) continue stat_attrl = pbs.new_attrl(3) stat_attrl[0].name = pbs.ATTR_state stat_attrl[1].name = pbs.ATTR_used stat_attrl[2].name = pbs.ATTR_exitstat jobs = pbs.pbs_statjob( c, None, stat_attrl, None ) pbs.pbs_disconnect( c ) statuses.update( self.convert_statjob_to_bunches( jobs ) ) return( ( failures, statuses ) )
def stop_job( self, job ): """Attempts to delete a job from the PBS queue""" job_id = job.get_job_runner_external_id().encode('utf-8') job_tag = "(%s/%s)" % ( job.get_id_tag(), job_id ) log.debug( "%s Stopping PBS job" % job_tag ) # Declare the connection handle c so that it can be cleaned up: c = None try: pbs_server_name = self.__get_pbs_server( job.destination_params ) if pbs_server_name is None: log.debug("(%s) Job queued but no destination stored in job params, cannot delete" % job_tag ) return c = pbs.pbs_connect( util.smart_str( pbs_server_name ) ) if c <= 0: log.debug("(%s) Connection to PBS server for job delete failed" % job_tag ) return pbs.pbs_deljob( c, job_id, '' ) log.debug( "%s Removed from PBS queue before job completion" % job_tag ) except: e = traceback.format_exc() log.debug( "%s Unable to stop job: %s" % ( job_tag, e ) ) finally: # Cleanup: disconnect from the server. if ( None is not c ): pbs.pbs_disconnect( c )
def encode_guid(self, session_key): # Session keys are strings # Pad to a multiple of 8 with leading "!" session_key = smart_str(session_key) s = (b"!" * (8 - len(session_key) % 8)) + session_key # Encrypt return codecs.encode(self.id_cipher.encrypt(s), 'hex')
def _last_bits(secret): """We append the kind at the end, so just use the bits at the end. """ last_bits = smart_str(secret) if len(last_bits) > MAXIMUM_ID_SECRET_LENGTH: last_bits = last_bits[-MAXIMUM_ID_SECRET_LENGTH:] return last_bits
def __init__(self, **config): id_secret = config['id_secret'] self.id_secret = id_secret self.id_cipher = Blowfish.new(smart_str(self.id_secret), mode=Blowfish.MODE_ECB) per_kind_id_secret_base = config.get('per_kind_id_secret_base', self.id_secret) self.id_ciphers_for_kind = _cipher_cache(per_kind_id_secret_base)
def hash_conda_packages(conda_packages, conda_target=None): """ Produce a unique hash on supplied packages. TODO: Ideally we would do this in such a way that preserved environments. """ h = hashlib.new('sha256') for conda_package in conda_packages: h.update(smart_str(conda_package.install_environment)) return h.hexdigest()
def _decode_baseauth(self, encoded_str): """ Decode an encrypted HTTP basic authentication string. Returns a tuple of the form (email, password), and raises a HTTPBadRequest exception if nothing could be decoded. :param encoded_str: BaseAuth string encoded base64 :type encoded_str: string :returns: email of the user :rtype: string :returns: password of the user :rtype: string :raises: HTTPBadRequest """ split = encoded_str.strip().split(' ') # If split is only one element, try to decode the email and password # directly. if len(split) == 1: try: email, password = unicodify(b64decode(smart_str(split[0]))).split(':') except Exception as e: raise exceptions.ActionInputError(str(e)) # If there are only two elements, check the first and ensure it says # 'basic' so that we know we're about to decode the right thing. If not, # bail out. elif len(split) == 2: if split[0].strip().lower() == 'basic': try: email, password = unicodify(b64decode(smart_str(split[1]))).split(':') except Exception: raise exceptions.ActionInputError() else: raise exceptions.ActionInputError() # If there are more than 2 elements, something crazy must be happening. # Bail. else: raise exceptions.ActionInputError() return unquote(email), unquote(password)
def make_body_iterable(self, trans, body): if isinstance(body, (types.GeneratorType, list, tuple)): # Recursively stream the iterable return flatten(body) elif body is None: # Returns an empty body return [] else: # Worst case scenario return [smart_str(body)]
def encode_id(self, obj_id, kind=None): if obj_id is None: raise galaxy.exceptions.MalformedId("Attempted to encode None id") id_cipher = self.__id_cipher(kind) # Convert to bytes s = smart_str(obj_id) # Pad to a multiple of 8 with leading "!" s = (b"!" * (8 - len(s) % 8)) + s # Encrypt return unicodify(codecs.encode(id_cipher.encrypt(s), 'hex'))
def make_body_iterable( self, trans, body ): if isinstance( body, ( types.GeneratorType, list, tuple ) ): # Recursively stream the iterable return flatten( body ) elif body is None: # Returns an empty body return [] else: # Worst case scenario return [ smart_str( body ) ]
def __init__(self, repository, environ): self.repository = repository self.updated = False repo_cache = environ.get("GALAXY_TEST_DATA_REPO_CACHE", "test-data-cache") m = hashlib.md5() m.update(smart_str(repository)) repo_path = os.path.join(repo_cache, m.hexdigest()) super(GitDataResolver, self).__init__(repo_path) # My preference would be for this to be false, but for backward compat # will leave it as true for now. self.fetch_data = asbool(environ.get("GALAXY_TEST_FETCH_DATA", "true"))
def __init__(self, repository, environ): self.repository = repository self.updated = False repo_cache = environ.get("GALAXY_TEST_DATA_REPO_CACHE", "test-data-cache") m = hashlib.md5() m.update(smart_str(repository)) repo_path = os.path.join(repo_cache, m.hexdigest()) super().__init__(repo_path) # My preference would be for this to be false, but for backward compat # will leave it as true for now. self.fetch_data = asbool(environ.get("GALAXY_TEST_FETCH_DATA", "true"))
def hash_password_PBKDF2(password): # Generate a random salt salt = b64encode(urandom(SALT_LENGTH)) # Apply the pbkdf2 encoding hashed = pbkdf2_bin(smart_str(password), salt, COST_FACTOR, KEY_LENGTH, getattr(hashlib, HASH_FUNCTION)) hashed_b64 = b64encode(hashed) if six.PY3: salt = salt.decode('utf-8') hashed_b64 = hashed_b64.decode('utf-8') # Format return 'PBKDF2${0}${1}${2}${3}'.format(HASH_FUNCTION, COST_FACTOR, salt, hashed_b64)
def run(): #id_secret='cff03377815197df' # 567cc1640266789c' id_secret = '0123456789abcdef' cipher = Blowfish.new(smart_str(id_secret), mode=Blowfish.MODE_ECB) message = '*****@*****.**' s = smart_str(message) # Pad to a multiple of 8 with leading "!" s = (b"!" * (8 - len(s) % 8)) + s print "S: " + s # Encrypt encrypted = cipher.encrypt(s) print "Encrypted: " + encrypted encoded = codecs.encode(encrypted, "hex") print "Encoded: " + encoded #print unicodify(codecs.encode(cipher.encrypt(s), 'hex')) print unicodify(encoded) security = SecurityHelper(id_secret=id_secret) print security.encode_guid(message)
def _sniffnfix_pg9_hex(value): """ Sniff for and fix postgres 9 hex decoding issue """ try: if value[0] == 'x': return binascii.unhexlify(value[1:]) elif smart_str(value).startswith(b'\\x'): return binascii.unhexlify(value[2:]) else: return value except Exception: return value
def encode_dataset_user(trans, dataset, user): # encode dataset id as usual # encode user id using the dataset create time as the key dataset_hash = trans.security.encode_id(dataset.id) if user is None: user_hash = 'None' else: user_hash = str(user.id) # Pad to a multiple of 8 with leading "!" user_hash = ("!" * (8 - len(user_hash) % 8)) + user_hash cipher = Blowfish.new(smart_str(dataset.create_time), mode=Blowfish.MODE_ECB) user_hash = cipher.encrypt(user_hash).encode('hex') return dataset_hash, user_hash
def check_password(guess, hashed): """ Check a hashed password. Supports either PBKDF2 if the hash is prefixed with that string, or sha1 otherwise. """ if hashed.startswith("PBKDF2"): if check_password_PBKDF2(guess, hashed): return True else: # Passwords were originally encoded with sha1 and hexed if safe_str_cmp(hashlib.sha1(smart_str(guess)).hexdigest(), hashed): return True # Password does not match return False
def _get_custos_credentials(self): clientIdAndSec = self.config['client_id'] + ":" + self.config[ 'client_secret'] creds = requests.get( self.config['credential_url'], headers={ "Authorization": "Basic %s" % util.unicodify( base64.b64encode(util.smart_str(clientIdAndSec))) }, verify=False, params={'client_id': self.config['client_id']}) credentials = creds.json() self.config['iam_client_secret'] = credentials['iam_client_secret']
def check_single_job( self, pbs_server_name, job_id ): """ Returns the state of a single job, used to make sure a job is really dead. """ c = pbs.pbs_connect( util.smart_str( pbs_server_name ) ) if c <= 0: log.debug("connection to PBS server %s for state check failed" % pbs_server_name ) return None stat_attrl = pbs.new_attrl(1) stat_attrl[0].name = pbs.ATTR_state jobs = pbs.pbs_statjob( c, job_id, stat_attrl, None ) pbs.pbs_disconnect( c ) return jobs[0].attribs[0].value
def _fetch_token(self, oauth2_session, trans): if self.config.get('iam_client_secret'): # Custos uses the Keycloak client secret to get the token client_secret = self.config['iam_client_secret'] else: client_secret = self.config['client_secret'] token_endpoint = self.config['token_endpoint'] clientIdAndSec = self.config['client_id'] + ":" + self.config['client_secret'] # for custos return oauth2_session.fetch_token( token_endpoint, client_secret=client_secret, authorization_response=trans.request.url, headers={"Authorization": "Basic %s" % util.unicodify(base64.b64encode(util.smart_str(clientIdAndSec)))}, # for custos verify=self._get_verify_param())
def decode_dataset_user(trans, dataset_hash, user_hash): # decode dataset id as usual # decode user id using the dataset create time as the key dataset_id = trans.security.decode_id(dataset_hash) dataset = trans.sa_session.query(trans.app.model.HistoryDatasetAssociation).get(dataset_id) assert dataset, "Bad Dataset id provided to decode_dataset_user" if user_hash in [None, 'None']: user = None else: cipher = Blowfish.new(smart_str(dataset.create_time), mode=Blowfish.MODE_ECB) user_id = cipher.decrypt(user_hash.decode('hex')).lstrip("!") user = trans.sa_session.query(trans.app.model.User).get(int(user_id)) assert user, "A Bad user id was passed to decode_dataset_user" return dataset, user
def escape( string ): """ A tool shed variant of markupsafe.escape that allows a select few HTML elements that are repeatedly used in messages created deep in the toolshed components. Ideally abstract things would be produced in these components and messages in the views or client side - this is what should be worked toward - but for now - we have this hack. >>> escape("A <b>repo</b>") u'A <b>repo</b>' """ escaped = smart_str( raw_escape( string ), encoding="ascii", errors="replace" ) # Unescape few selected tags. for key, value in ALLOWED_MAP.items(): escaped = escaped.replace(value, key) return escaped
def choose_one(self, lst, hash_value=None): """ Choose a random value from supplied list. If hash_value is passed in then every request with that same hash_value would produce the same choice from the supplied list. """ if hash_value is None: return random.choice(lst) if not isinstance(hash_value, int): # Convert hash_value string into index as_hex = hashlib.md5(util.smart_str(hash_value)).hexdigest() hash_value = int(as_hex, 16) # else assumed to be 'random' int from 0-~Inf random_index = hash_value % len(lst) return lst[random_index]
def __create_jstree(self, directory, disable='folders', whitelist=None): """ Loads recursively all files and folders within the given folder and its subfolders and returns jstree representation of its structure. """ jstree_paths = [] if self.__safe_directory(directory, whitelist=whitelist): for (dirpath, dirnames, filenames) in safe_walk(directory, whitelist=whitelist): for dirname in dirnames: dir_path = os.path.relpath(os.path.join(dirpath, dirname), directory) dir_path_hash = hashlib.sha1(smart_str(dir_path)).hexdigest() disabled = True if disable == 'folders' else False jstree_paths.append(jstree.Path(dir_path, dir_path_hash, {'type': 'folder', 'state': {'disabled': disabled}, 'li_attr': {'full_path': dir_path}})) for filename in filenames: file_path = os.path.relpath(os.path.join(dirpath, filename), directory) file_path_hash = hashlib.sha1(smart_str(file_path)).hexdigest() disabled = True if disable == 'files' else False jstree_paths.append(jstree.Path(file_path, file_path_hash, {'type': 'file', 'state': {'disabled': disabled}, 'li_attr': {'full_path': file_path}})) else: raise exceptions.ConfigDoesNotAllowException('The given directory does not exist.') userdir_jstree = jstree.JSTree(jstree_paths) return userdir_jstree
def stream_to_open_named_file(stream, fd, filename, source_encoding=None, source_error='strict', target_encoding=None, target_error='strict'): """Writes a stream to the provided file descriptor, returns the file name. Closes file descriptor""" # signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better CHUNK_SIZE = 1048576 data_checked = False is_compressed = False is_binary = False try: codecs.lookup(target_encoding) except Exception: target_encoding = util.DEFAULT_ENCODING # utf-8 if not source_encoding: source_encoding = util.DEFAULT_ENCODING # sys.getdefaultencoding() would mimic old behavior (defaults to ascii) while True: chunk = stream.read(CHUNK_SIZE) if not chunk: break if not data_checked: # See if we're uploading a compressed file try: # Convert chunk to a bytestring if it is not already. # Check if the first 2 bytes of the chunk are equal to the # gzip magic number. if smart_str(chunk)[:2] == util.gzip_magic: is_compressed = True except Exception: pass if not is_compressed: is_binary = util.is_binary(chunk) data_checked = True if not is_compressed and not is_binary: if not isinstance(chunk, text_type): chunk = chunk.decode(source_encoding, source_error) os.write(fd, chunk.encode(target_encoding, target_error)) else: # Compressed files must be encoded after they are uncompressed in the upload utility, # while binary files should not be encoded at all. os.write(fd, chunk) os.close(fd) return filename
def process_batch_requests(self, batch_environ, start_response): """ Loops through any provided JSON formatted 'requests', aggregates their JSON responses, and wraps them in the batch call response. """ payload = self._read_post_payload(batch_environ) requests = payload.get('batch', []) responses = [] for request in requests: if not self._is_allowed_route(request['url']): responses.append(self._disallowed_route_response(request['url'])) continue request_environ = self._build_request_environ(batch_environ, request) response = self._process_batch_request(request, request_environ, start_response) responses.append(response) batch_response_body = smart_str(json.dumps(responses)) start_response('200 OK', [ ('Content-Length', len(batch_response_body)), ('Content-Type', 'application/json'), ]) return [batch_response_body]
def display_data(self, trans, data, preview=False, filename=None, to_ext=None, **kwd): """ Old display method, for transition - though still used by API and test framework. Datatypes should be very careful if overridding this method and this interface between datatypes and Galaxy will likely change. TOOD: Document alternatives to overridding this method (data providers?). """ # Relocate all composite datatype display to a common location. composite_extensions = trans.app.datatypes_registry.get_composite_extensions() composite_extensions.append('html') # for archiving composite datatypes # Prevent IE8 from sniffing content type since we're explicit about it. This prevents intentionally text/plain # content from being rendered in the browser trans.response.headers['X-Content-Type-Options'] = 'nosniff' if isinstance(data, six.string_types): return smart_str(data) if filename and filename != "index": # For files in extra_files_path file_path = trans.app.object_store.get_filename(data.dataset, extra_dir='dataset_%s_files' % data.dataset.id, alt_name=filename) if os.path.exists(file_path): if os.path.isdir(file_path): with tempfile.NamedTemporaryFile(mode='w', delete=False, dir=trans.app.config.new_file_path, prefix='gx_html_autocreate_') as tmp_fh: tmp_file_name = tmp_fh.name dir_items = sorted(os.listdir(file_path)) base_path, item_name = os.path.split(file_path) tmp_fh.write('<html><head><h3>Directory %s contents: %d items</h3></head>\n' % (escape(item_name), len(dir_items))) tmp_fh.write('<body><p/><table cellpadding="2">\n') for index, fname in enumerate(dir_items): if index % 2 == 0: bgcolor = '#D8D8D8' else: bgcolor = '#FFFFFF' # Can't have an href link here because there is no route # defined for files contained within multiple subdirectory # levels of the primary dataset. Something like this is # close, but not quite correct: # href = url_for(controller='dataset', action='display', # dataset_id=trans.security.encode_id(data.dataset.id), # preview=preview, filename=fname, to_ext=to_ext) tmp_fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, escape(fname))) tmp_fh.write('</table></body></html>\n') return self._yield_user_file_content(trans, data, tmp_file_name) mime = mimetypes.guess_type(file_path)[0] if not mime: try: mime = trans.app.datatypes_registry.get_mimetype_by_extension(".".split(file_path)[-1]) except Exception: mime = "text/plain" self._clean_and_set_mime_type(trans, mime) return self._yield_user_file_content(trans, data, file_path) else: return webob.exc.HTTPNotFound("Could not find '%s' on the extra files path %s." % (filename, file_path)) self._clean_and_set_mime_type(trans, data.get_mime()) trans.log_event("Display dataset id: %s" % str(data.id)) from galaxy import datatypes # DBTODO REMOVE THIS AT REFACTOR if to_ext or isinstance(data.datatype, datatypes.binary.Binary): # Saving the file, or binary file if data.extension in composite_extensions: return self._archive_composite_dataset(trans, data, **kwd) else: trans.response.headers['Content-Length'] = int(os.stat(data.file_name).st_size) filename = self._download_filename(data, to_ext, hdca=kwd.get("hdca", None), element_identifier=kwd.get("element_identifier", None)) trans.response.set_content_type("application/octet-stream") # force octet-stream so Safari doesn't append mime extensions to filename trans.response.headers["Content-Disposition"] = 'attachment; filename="%s"' % filename return open(data.file_name, 'rb') if not os.path.exists(data.file_name): raise webob.exc.HTTPNotFound("File Not Found (%s)." % data.file_name) max_peek_size = 1000000 # 1 MB if isinstance(data.datatype, datatypes.text.Html): max_peek_size = 10000000 # 10 MB for html preview = util.string_as_bool(preview) if not preview or isinstance(data.datatype, datatypes.images.Image) or os.stat(data.file_name).st_size < max_peek_size: return self._yield_user_file_content(trans, data, data.file_name) else: trans.response.set_content_type("text/html") return trans.stream_template_mako("/dataset/large_file.mako", truncated_data=open(data.file_name).read(max_peek_size), data=data)
def _execute(self, cmd, timeout): return self.ssh.exec_command(smart_str(cmd), timeout=timeout)
def queue_job( self, job_wrapper ): """Create PBS script for a job and submit it to the PBS queue""" # prepare the job if not self.prepare_job( job_wrapper, include_metadata=not( self.app.config.pbs_stage_path ) ): return job_destination = job_wrapper.job_destination # Determine the job's PBS destination (server/queue) and options from the job destination definition pbs_queue_name = None pbs_server_name = self.default_pbs_server pbs_options = [] if '-q' in job_destination.params and 'destination' not in job_destination.params: job_destination.params['destination'] = job_destination.params.pop('-q') if 'destination' in job_destination.params: if '@' in job_destination.params['destination']: # Destination includes a server pbs_queue_name, pbs_server_name = job_destination.params['destination'].split('@') if pbs_queue_name == '': # e.g. `qsub -q @server` pbs_queue_name = None else: # Destination is just a queue pbs_queue_name = job_destination.params['destination'] job_destination.params.pop('destination') # Parse PBS params pbs_options = self.parse_destination_params(job_destination.params) # Explicitly set the determined PBS destination in the persisted job destination for recovery job_destination.params['destination'] = '%s@%s' % (pbs_queue_name or '', pbs_server_name) c = pbs.pbs_connect( util.smart_str( pbs_server_name ) ) if c <= 0: errno, text = pbs.error() job_wrapper.fail( "Unable to queue job for execution. Resubmitting the job may succeed." ) log.error( "Connection to PBS server for submit failed: %s: %s" % ( errno, text ) ) return # define job attributes ofile = "%s/%s.o" % (self.app.config.cluster_files_directory, job_wrapper.job_id) efile = "%s/%s.e" % (self.app.config.cluster_files_directory, job_wrapper.job_id) ecfile = "%s/%s.ec" % (self.app.config.cluster_files_directory, job_wrapper.job_id) output_fnames = job_wrapper.get_output_fnames() # If an application server is set, we're staging if self.app.config.pbs_application_server: pbs_ofile = self.app.config.pbs_application_server + ':' + ofile pbs_efile = self.app.config.pbs_application_server + ':' + efile output_files = [ str( o ) for o in output_fnames ] output_files.append(ecfile) stagein = self.get_stage_in_out( job_wrapper.get_input_fnames() + output_files, symlink=True ) stageout = self.get_stage_in_out( output_files ) attrs = [ dict( name=pbs.ATTR_o, value=pbs_ofile ), dict( name=pbs.ATTR_e, value=pbs_efile ), dict( name=pbs.ATTR_stagein, value=stagein ), dict( name=pbs.ATTR_stageout, value=stageout ), ] # If not, we're using NFS else: attrs = [ dict( name=pbs.ATTR_o, value=ofile ), dict( name=pbs.ATTR_e, value=efile ), ] # define PBS job options attrs.append( dict( name=pbs.ATTR_N, value=str( "%s_%s_%s" % ( job_wrapper.job_id, job_wrapper.tool.id, job_wrapper.user ) ) ) ) job_attrs = pbs.new_attropl( len( attrs ) + len( pbs_options ) ) for i, attr in enumerate( attrs + pbs_options ): job_attrs[i].name = attr['name'] job_attrs[i].value = attr['value'] if 'resource' in attr: job_attrs[i].resource = attr['resource'] exec_dir = os.path.abspath( job_wrapper.working_directory ) # write the job script if self.app.config.pbs_stage_path != '': # touch the ecfile so that it gets staged with open(ecfile, 'a'): os.utime(ecfile, None) stage_commands = pbs_symlink_template % ( " ".join( job_wrapper.get_input_fnames() + output_files ), self.app.config.pbs_stage_path, exec_dir, ) else: stage_commands = '' env_setup_commands = [ stage_commands ] script = self.get_job_file(job_wrapper, exit_code_path=ecfile, env_setup_commands=env_setup_commands) job_file = "%s/%s.sh" % (self.app.config.cluster_files_directory, job_wrapper.job_id) self.write_executable_script( job_file, script ) # job was deleted while we were preparing it if job_wrapper.get_state() == model.Job.states.DELETED: log.debug( "Job %s deleted by user before it entered the PBS queue" % job_wrapper.job_id ) pbs.pbs_disconnect(c) if job_wrapper.cleanup_job in ( "always", "onsuccess" ): self.cleanup( ( ofile, efile, ecfile, job_file ) ) job_wrapper.cleanup() return # submit # The job tag includes the job and the task identifier # (if a TaskWrapper was passed in): galaxy_job_id = job_wrapper.get_id_tag() log.debug("(%s) submitting file %s" % ( galaxy_job_id, job_file ) ) tries = 0 while tries < 5: job_id = pbs.pbs_submit(c, job_attrs, job_file, pbs_queue_name, None) tries += 1 if job_id: pbs.pbs_disconnect(c) break errno, text = pbs.error() log.warning( "(%s) pbs_submit failed (try %d/5), PBS error %d: %s" % (galaxy_job_id, tries, errno, text) ) time.sleep(2) else: log.error( "(%s) All attempts to submit job failed" % galaxy_job_id ) job_wrapper.fail( "Unable to run this job due to a cluster error, please retry it later" ) return if pbs_queue_name is None: log.debug("(%s) queued in default queue as %s" % (galaxy_job_id, job_id) ) else: log.debug("(%s) queued in %s queue as %s" % (galaxy_job_id, pbs_queue_name, job_id) ) # persist destination job_wrapper.set_job_destination( job_destination, job_id ) # Store PBS related state information for job job_state = AsynchronousJobState() job_state.job_wrapper = job_wrapper job_state.job_id = job_id job_state.job_file = job_file job_state.output_file = ofile job_state.error_file = efile job_state.exit_code_file = ecfile job_state.old_state = 'N' job_state.running = False job_state.job_destination = job_destination # Add to our 'queue' of jobs to monitor self.monitor_queue.put( job_state )