def run_outlier_model(job, name, sample_id, background_id, gene_id, args): # Process names with flexible extensions sample_ext = os.path.splitext(args.sample)[1] sample_name = 'sample_matrix{}'.format(sample_ext) bg_ext = os.path.splitext(args.background)[1] bg_name = 'bg_matrix{}'.format(bg_ext) # Read in input file from jobStore job.fileStore.readGlobalFile(sample_id, os.path.join(job.tempDir, sample_name)) job.fileStore.readGlobalFile(background_id, os.path.join(job.tempDir, bg_name)) if gene_id: job.fileStore.readGlobalFile( gene_id, os.path.join(job.tempDir, 'gene-list.txt')) # Define parameters and call Docker container parameters = [ '--sample', '/data/{}'.format(sample_name), '--background', '/data/{}'.format(bg_name), '--name', name, '--out-dir', '/data', '--group', args.group, '--col-skip', str(args.col_skip), '--num-backgrounds', str(args.num_backgrounds), '--max-genes', str(args.max_genes), '--num-training-genes', str(args.num_training_genes) ] if gene_id: parameters.extend(['--gene-list', '/data/gene-list.txt']) image = 'jvivian/bayesian-outlier-model:1.0a4' apiDockerCall(job=job, image=image, working_dir=job.tempDir, parameters=parameters, user='******') _fixPermissions(tool=image, workDir=job.tempDir) out_dir = os.path.join(job.tempDir, name) shutil.move(out_dir, args.out_dir)
def _testDockerCleanFn(job, working_dir, detached=None, rm=None, deferParam=None, containerName=None): """ Test function for test docker_clean. Runs a container with given flags and then dies leaving behind a zombie container. :param toil.job.Job job: job :param working_dir: See `work_dir=` in :func:`dockerCall` :param bool rm: See `rm=` in :func:`dockerCall` :param bool detached: See `detached=` in :func:`dockerCall` :param int deferParam: See `deferParam=` in :func:`dockerCall` :param str containerName: See `container_name=` in :func:`dockerCall` """ def killSelf(): test_file = os.path.join(working_dir, 'test.txt') # Kill the worker once we are sure the docker container is started while not os.path.exists(test_file): logger.debug('Waiting on the file created by spooky_container.') time.sleep(1) # By the time we reach here, we are sure the container is running. time.sleep(1) os.kill(os.getpid(), signal.SIGKILL) t = Thread(target=killSelf) # Make it a daemon thread so that thread failure doesn't hang tests. t.daemon = True t.start() apiDockerCall(job, image='quay.io/ucsc_cgl/spooky_test', working_dir=working_dir, deferParam=deferParam, containerName=containerName, detach=detached, remove=rm, privileged=True)
def run(self, fileStore): fileStore.logToMaster("SalmonIndex") tempDir = fileStore.getLocalTempDir() try: os.makedirs(os.path.join(tempDir, 'execution')) except OSError as e: if e.errno != errno.EEXIST: raise fpath = fileStore.readGlobalFile(self.ref_txome, userPath=os.path.join( tempDir, os.path.basename(self.ref_txome))) index_fpath = os.makedirs(os.path.join(tempDir, 'execution', 'index')) cmd = f'salmon index -t {fpath} --index "{index_fpath}"; tar -cvzf index.tar.gz {index_fpath}' #; tar -cvzf index.tar.gz "{index_fpath}"'#-i index; tar -cvzf index.tar.gz index' generate_docker_bashscript_file(temp_dir=tempDir, docker_dir=tempDir, globs=[], cmd=cmd, job_name='SalmonIndex') # apiDockerCall() with demux=True returns a tuple of bytes objects (stdout, stderr). stdout, stderr = \ apiDockerCall(self, image='combinelab/salmon', working_dir=tempDir, parameters=[os.path.join(tempDir, "SalmonIndex_script.sh")], entrypoint="/bin/bash", stderr=True, demux=True, volumes={tempDir: {"bind": tempDir}}) with open(os.path.join(current_working_dir, 'SalmonIndex.log'), 'wb') as f: if stdout: f.write(stdout) if stderr: f.write(stderr) output_file_id = fileStore.writeGlobalFile( os.path.join(tempDir, 'execution', 'index.tar.gz')) index_output_path = os.path.join(os.path.abspath(current_working_dir), 'index.tar.gz') fileStore.exportFile(output_file_id, f'file://{index_output_path}') return {"index": output_file_id}
def run_pdb2pqr(pdb_file, whitespace=True, ff="amber", parameters=None, work_dir=None, docker=True, job=None): if work_dir is None: work_dir = os.getcwd() full_pdb_path = pdb_file pdb_path = os.path.basename(full_pdb_path) pqr_file = "{}.pqr".format(pdb_path) _parameters = list(parameters) if isinstance(parameters, (list, tuple)) else [] _parameters.append("--ff={}".format(ff)) if whitespace: _parameters.append("--whitespace") if docker and apiDockerCall is not None and job is not None: #Docker can only read from work_dir if not os.path.abspath(os.path.dirname(pdb_file)) == os.path.abspath(work_dir): shutil.copy(pdb_file, work_dir) _parameters += ["/data/{}".format(pdb_path), "/data/{}.pqr".format(pdb_path)] try: output = apiDockerCall(job, image='edraizen/pdb2pqr:latest', working_dir=work_dir, parameters=_parameters) pqr_file = os.path.join(work_dir, pqr_file) except (SystemExit, KeyboardInterrupt): raise except: raise #return run_pdb2pqr(pdb_file, whitespace=whitespace, ff=ff, # parameters=parameters, work_dir=work_dir, docker=False) else: pqr_file = os.path.join(work_dir, pqr_file) command = ["/usr/share/pdb2pqr/pdb2pqr.py"]+parameters command += [full_pdb_path, pqr_file] try: with silence_stdout(), silence_stderr(): subprocess.call(command) except (SystemExit, KeyboardInterrupt): raise except Exception as e: raise #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e)) assert os.path.isfile(pqr_file) return pqr_file
def run(self, fileStore): fileStore.logToMaster("FastQCtwo") tempDir = fileStore.getLocalTempDir() try: os.makedirs(os.path.join(tempDir, 'execution')) except OSError as e: if e.errno != errno.EEXIST: raise fpath = fileStore.readGlobalFile(self.reads, userPath=os.path.join( tempDir, os.path.basename(self.reads))) cmd = f'zcat "{fpath}" | fastqc stdin:readstwo' generate_docker_bashscript_file(temp_dir=tempDir, docker_dir=tempDir, globs=[], cmd=cmd, job_name='FastQCtwo') # apiDockerCall() with demux=True returns a tuple of bytes objects (stdout, stderr). stdout, stderr = \ apiDockerCall(self, image='pegi3s/fastqc', working_dir=tempDir, parameters=[os.path.join(tempDir, "FastQCtwo_script.sh")], entrypoint="/bin/bash", stderr=True, demux=True, volumes={tempDir: {"bind": tempDir}}) with open(os.path.join(current_working_dir, 'FastQCtwo.log'), 'wb') as f: if stdout: f.write(stdout) if stderr: f.write(stderr) output_file_id = fileStore.writeGlobalFile( os.path.join(tempDir, 'execution', 'readstwo_fastqc.html')) fastqc_output_path = os.path.join(os.path.abspath(current_working_dir), 'readstwo_fastqc.html') fileStore.exportFile(output_file_id, f'file://{fastqc_output_path}') return {"fastqc": output_file_id}
def analyze_haddock(analysis_dir, docker=True, job=None): if docker and apiDockerCall is not None and job is not None: oldcwd = os.getcwd() os.chdir(analysis_dir) try: out = apiDockerCall( job, 'edraizen/haddock:latest', entrypoint="csh", working_dir="/data", volumes={analysis_dir: { "bind": "/data", "mode": "rw" }}, parameters=["/opt/haddock2.2/tools/ana_structures.csh"], ) job.log(out) except (SystemExit, KeyboardInterrupt): raise except Exception as e: if "RMSD: Undefined variable" not in str(e): raise os.chdir(oldcwd) else: try: oldcwd = os.getcwd() os.chdir(analysis_dir) out = subprocess.check_output( ["csh", "/opt/haddock2.2/tools/ana_structures.csh"]) os.chdir(oldcwd) except (SystemExit, KeyboardInterrupt): raise except Exception as e: if "RMSD: Undefined variable" not in str(e): raise #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e)) job.log("ANALYSIS_DIR: {}".format(os.listdir(analysis_dir))) results = pd.read_table(os.path.join(analysis_dir, "structures_haddock-sorted.stat"), nrows=1, delim_whitespace=True) results.columns = ["haddock_" + c for c in results.columns] results = results.iloc[0] return results
def run_cx(pdb_path, work_dir=None, job=None): if work_dir is None: work_dir = os.getcwd() if apiDockerCall is not None and job is not None: if not os.path.abspath( os.path.dirname(pdb_path)) == os.path.abspath(work_dir): shutil.copy(pdb_path, work_dir) parameters = [os.path.basename(pdb_path)] print "BEFORE", os.listdir(work_dir) with open(pdb_path) as f: print "IT EXISTS", next(f) cx_f = apiDockerCall( job, image='edraizen/cx:latest', working_dir="/data", volumes={work_dir: { "bind": "/data", "mode": "rw" }}, parameters=parameters) print "DONE RUNINGF CX", cx_f # cx_f = open(os.path.join(work_dir, os.path.basename(pdb_path)+".cx")) # delCx = os.path.join(work_dir, os.path.basename(pdb_path)+".cx") else: with open(pdb_path) as f: cx_f = subprocess.check_output("cx", stdin=f) cx_f = iter(cx_f.splitlines()) #Read in b-factor from PDB file. CX sometimes introduces invalid characters #so the Bio.PDB parser cannot be used result = {} for l in cx_f: if l[:6].strip() in ["ATOM", "HETATM"]: try: result[int(l[6:11].strip())] = float(l[60:66].strip()) except ValueError as e: pass # if delCx is not None: # cx_f.close() # os.remove(delCx) return result
def docker_call(job, config, work_dir, params, image, tag, outfile=None): tagged_image = "{}:{}".format(image, tag) if DOCKER_LOGGING: log(job, "Running '{}' with parameters: {}".format(tagged_image, params), config.uuid, 'docker') if outfile is None: output = apiDockerCall(job, tagged_image, working_dir=work_dir, parameters=params, user="******") else: output = dockerCall(job, tool=tagged_image, workDir=work_dir, parameters=params, outfile=outfile) return output
def CNS(input_file, prefix, work_dir=None, docker=True, job=None, template=True, **template_kwds): work_dir = work_dir or os.getcwd() if docker and apiDockerCall is not None and job is not None: if not template and not os.path.abspath( os.path.dirname(input_file)) == os.path.abspath(work_dir): shutil.copy(input_file, work_dir) inp = input_file else: updated_templates = {} for k, v in template_kwds.iteritems(): if os.path.isfile(v) and not os.path.abspath( os.path.dirname(v)) == os.path.abspath(work_dir): shutil.copy(v, work_dir) updated_templates[k] = os.path.join("/data", os.path.basename(v)) inp = generate_input(input_file, prefix, work_dir, **updated_templates) try: parameters = [os.path.join("/data", os.path.basename(inp))] output = apiDockerCall(job, image='edraizen/cns:latest', working_dir=work_dir, parameters=parameters) # except (SystemExit, KeyboardInterrupt): # raise except: raise return CNS(input_file, work_dir=work_dir, docker=False, job=job) else: load_cns_environment() inp = generate_input(input_file, prefix, work_dir, **template_kwds) with open(input_file) as inp: output = subprocess.check_output(["cns"], stdin=inp) return output
def score_complex(pdb_file, chain, iteration=None, work_dir=None, docker=True, job=None): if work_dir is None: work_dir = os.getcwd() if docker and apiDockerCall is not None and job is not None: if not os.path.abspath( os.path.dirname(pdb_file)) == os.path.abspath(work_dir): shutil.copy(pdb_file, work_dir) try: parameters = ["score", os.path.basename(pdb_file), "--chain"] + list(chain) if isinstance(iteration, int) and iteration in range(3): parameters += ["--iteration", str(iteration)] score = apiDockerCall( job, "edraizen/haddock:latest", working_dir="/data", volumes={work_dir: { "bind": "/data", "mode": "rw" }}, parameters=parameters) except (SystemExit, KeyboardInterrupt): raise except: raise results = pd.read_table(os.path.join(work_dir, pdb_file + ".haddock-sorted.stat"), nrows=1, delim_whitespace=True) results.columns = ["haddock_" + c for c in results.columns] results = results.iloc[0] return results
def call_with_docker(self, job, args, work_dir, outfile, errfile, check_output, tool_name): """ Thin wrapper for docker_call that will use internal lookup to figure out the location of the docker file. Only exposes docker_call parameters used so far. expect args as list of lists. if (toplevel) list has size > 1, then piping interface used Does support redirecting output to outfile, unless check_output is used, in which case output is captured. """ RealtimeLogger.info( truncate_msg("Docker Run: {}".format(" | ".join(" ".join(x) for x in args)))) start_time = timeit.default_timer() # we use the first argument to look up the tool in the docker map # but allow overriding of this with the tool_name parameter name = tool_name if tool_name is not None else args[0][0] tool = self.docker_tool_map[name] # We keep an environment dict environment = {} # And an entry point override entrypoint = None # And a volumes dict for mounting volumes = {} # And a working directory override working_dir = None # breaks Rscript. Todo: investigate how general this actually is if name != 'Rscript': # vg uses TMPDIR for temporary files # this is particularly important for gcsa, which makes massive files. # we will default to keeping these in our working directory environment['TMPDIR'] = '.' if name == 'Rscript': # The R dockers by default want to install packages in non-writable directories. Sometimes. # Make sure a writable directory which exists is used. environment['R_LIBS'] = '/tmp' if name == 'vg': environment['VG_FULL_TRACEBACK'] = '1' # ugly hack for platypus, as default container doesn't have executable in path if tool == 'quay.io/biocontainers/platypus-variant:0.8.1.1--htslib1.7_1' and \ args[0][0] == 'Platypus.py': args[0][ 0] = '/usr/local/share/platypus-variant-0.8.1.1-1/Platypus.py' # Force all dockers to run sort in a consistent way environment['LC_ALL'] = 'C' # set our working directory map if work_dir is not None: volumes[os.path.abspath(work_dir)] = { 'bind': '/data', 'mode': 'rw' } working_dir = '/data' if outfile is not None: # We need to send output to a file object assert (not check_output) # We can't just redirect stdout of the container from the API, so # we do something more complicated. # Now we need to populate an FD that spits out the container output. output_fd = None # We may be able to use a FIFO, or we may need a network connection. # FIFO sharing between host and container only works on Linux. use_fifo = (platform.system() == 'Linux') if use_fifo: # On a Linux host we can just use a FIFO from the container to the host # Set up a FIFO to receive it fifo_dir = tempfile.mkdtemp() fifo_host_path = os.path.join(fifo_dir, 'stdout.fifo') os.mkfifo(fifo_host_path) # Mount the FIFO in the container. # The container doesn't actually have to have the mountpoint directory in its filesystem. volumes[fifo_dir] = {'bind': '/control', 'mode': 'rw'} # Redirect the command output by tacking on another pipeline stage parameters = args + [['dd', 'of=/control/stdout.fifo']] # Open the FIFO into nonblocking mode. See # <https://stackoverflow.com/a/5749687> and # <http://shallowsky.com/blog/programming/python-read-characters.html> output_fd = os.open(fifo_host_path, os.O_RDONLY | os.O_NONBLOCK) else: # On a Mac host we can't because of https://github.com/docker/for-mac/issues/483 # We need to go over the network instead. # Open an IPv4 TCP socket, since we know Docker uses IPv4 only listen_sock = socket.socket(socket.AF_INET) # Bind it to an OS-selected port on all interfaces, since we can't determine the Docker interface # TODO: socket.INADDR_ANY ought to work here but is rejected for being an int. listen_sock.bind(('', 0)) # Start listening listen_sock.listen(1) # Get the port we got given listen_port = listen_sock.getsockname()[1] # Generate a random security cookie. Since we can't really stop # Internet randos from connecting to our socket, we bail out on # any connection that doesn't start with this cookie and a newline. security_cookie = str(uuid.uuid4()) # Redirect the command output to that port using Bash networking # Your Docker needs to be 18.03+ to support host.docker.internal # Your container needs to have bash with networking support parameters = args + [[ 'bash', '-c', 'exec 3<>/dev/tcp/host.docker.internal/{}; cat <(echo {}) - >&3' .format(listen_port, security_cookie) ]] RealtimeLogger.debug( "Listening on port {} for output from Docker container". format(listen_port)) # We can't populate the FD until we accept, which we can't do # until the Docker comes up and is trying to connect. RealtimeLogger.debug("Final Docker command: {}".format(" | ".join( " ".join(x) for x in parameters))) # Start the container detached so we don't wait on it container = apiDockerCall(job, tool, parameters, volumes=volumes, working_dir=working_dir, entrypoint=entrypoint, environment=environment, detach=True) RealtimeLogger.debug("Asked for container {}".format(container.id)) if not use_fifo: # Try and accept a connection from the container. # Make sure there's a timeout so we don't accept forever listen_sock.settimeout(10) for attempt in range(3): connection_sock, remote_address = listen_sock.accept() RealtimeLogger.info( "Got connection from {}".format(remote_address)) # Set a 10 second timeout for the cookie connection_sock.settimeout(10) # Check the security cookie received_cookie_and_newline = connection_sock.recv( len(security_cookie) + 1) if received_cookie_and_newline != security_cookie + "\n": # Incorrect security cookie. RealtimeLogger.warning( "Received incorect security cookie message from {}" .format(remote_address)) continue else: # This is the container we are looking for # Go into nonblocking mode which our read code expects connection_sock.setblocking(True) # Set the FD output_fd = connection_sock.fileno() break if output_fd is None: # We can't get ahold of the Docker in time raise RuntimeError( "Could not establish network connection for Docker output!" ) # If the Docker container goes badly enough, it may not even open # the other end of the connection. So we can't just wait for it to # EOF before checking on the Docker. # Now read ought to throw if there is no data. But # <https://stackoverflow.com/q/38843278> and some testing suggest # that this doesn't happen, and it just looks like EOF. So we will # watch out for that. try: # Prevent leaking FDs # If this is set, and there is no data in the pipe, decide that no data is coming last_chance = False # If this is set, we have seen data in the pipe, so the other # end must have opened it and will eventually close it if it # doesn't run forever. saw_data = False while True: # While there still might be data in the pipe if output_fd is not None: # Select on the pipe with a timeout, so we don't spin constantly waiting for data can_read, can_write, had_error = select.select( [output_fd], [], [output_fd], 10) if len(can_read) > 0 or len(had_error) > 0: # There is data available or something else weird about our FIFO. try: # Do a nonblocking read. Since we checked with select we never should get "" unless there's an EOF. data = os.read(output_fd, 4096) if data == "": # We didn't throw and we got nothing, so it must be EOF. RealtimeLogger.debug("Got EOF") break except OSError as err: if err.errno in [errno.EAGAIN, errno.EWOULDBLOCK]: # There is no data right now data = None else: # Something else has gone wrong raise err else: # There is no data available. Don't even try to read. Treat it as if a read refused to block. data = None if data is not None: # Send our data to the outfile outfile.write(data) saw_data = True elif not saw_data: # We timed out and there has never been any data. Maybe the container has died/never started? if last_chance: # The container has been dead for a while and nothing has arrived yet. Assume no data is coming. RealtimeLogger.warning( "Giving up on output form container {}".format( container.id)) break # Otherwise, check on it container.reload() if container.status not in [ 'created', 'restarting', 'running', 'removing' ]: # The container has stopped. So what are we doing waiting around for it? # Wait one last time for any lingering data to percolate through the FIFO time.sleep(10) last_chance = True continue finally: # No matter what happens, close our end of the connection os.close(output_fd) if not use_fifo: # Also close the listening socket listen_sock.close() # Now our data is all sent. # Wait on the container and get its return code. return_code = container.wait() if use_fifo: # Clean up the FIFO files os.unlink(fifo_host_path) os.rmdir(fifo_dir) else: # No piping needed. if len(args) == 1: # split off first argument as entrypoint (so we can be oblivious as to whether # that happens by default) parameters = [] if len(args[0]) == 1 else args[0][1:] entrypoint = args[0][0] else: # can leave as is for piped interface which takes list of args lists # and doesn't worry about entrypoints since everything goes through bash -c # todo: check we have a bash entrypoint! parameters = args # Run the container and dump the logs if it fails. container = apiDockerCall(job, tool, parameters, volumes=volumes, working_dir=working_dir, entrypoint=entrypoint, environment=environment, detach=True) # Wait on the container and get its return code. return_code = container.wait() # When we get here, the container has been run, and stdout is either in the file object we sent it to or in the Docker logs. # stderr is always in the Docker logs. if return_code != 0: # What were we doing? command = " | ".join(" ".join(x) for x in args) # Dump logs RealtimeLogger.error( "Docker container for command {} failed with code {}".format( command, return_code)) RealtimeLogger.error("Dumping stderr...") for line in container.logs(stderr=True, stdout=False, stream=True): # Trim trailing \n RealtimeLogger.error(line[:-1]) if not check_output and outfile is None: # Dump stdout as well, since it's not something the caller wanted as data RealtimeLogger.error("Dumping stdout...") for line in container.logs(stderr=False, stdout=True, stream=True): # Trim trailing \n RealtimeLogger.error(line[:-1]) # Raise an error if it's not sucess raise RuntimeError( "Docker container for command {} failed with code {}".format( command, return_code)) elif errfile: # user wants stderr even if no crash for line in container.logs(stderr=True, stdout=False, stream=True): errfile.write(line) if check_output: # We need to collect the output. We grab it from Docker's handy on-disk buffer. # TODO: Bad Things can happen if the container logs too much. captured_stdout = container.logs(stderr=False, stdout=True) end_time = timeit.default_timer() run_time = end_time - start_time RealtimeLogger.info("Successfully docker ran {} in {} seconds.".format( " | ".join(" ".join(x) for x in args), run_time)) if outfile: outfile.flush() os.fsync(outfile.fileno()) if check_output is True: return captured_stdout
def run_scwrl(pdb_file, output_prefix=None, framefilename=None, sequencefilename=None, paramfilename=None, in_cystal=False, remove_hydrogens=False, remove_h_n_term=False, work_dir=None, docker=True, job=None): if work_dir is None: work_dir = os.getcwd() full_pdb_path = pdb_file pdb_path = os.path.basename(full_pdb_path) output_file = "{}.scwrl".format(pdb_path) if output_prefix is None: output_prefix = os.path.splitext(full_pdb_path)[0] output_file = "{}.scwrl.pdb".format(output_prefix) _parameters = [p for p, use in [("-#", in_cystal), ("-h", remove_hydrogens), \ ("-t", remove_h_n_term)] if use] if docker and apiDockerCall is not None and job is not None: #Docker can only read from work_dir if not os.path.abspath(os.path.dirname(pdb_file)) == os.path.abspath(work_dir): shutil.copy(pdb_file, work_dir) parameters = ["-i", "/data/{}".format(os.path.basename(pdb_file))] parameters += ["-o", "/data/{}".format(os.path.basename(output_file))] parameters += _parameters if framefilename is not None and os.path.isfile(framefilename): if not os.path.abspath(os.path.dirname(framefilename)) == os.path.abspath(work_dir): shutil.copy(framefilename, work_dir) parameters += ["-f", "/data/{}".format(os.path.basename(framefilename))] if sequencefilename is not None and os.path.isfile(sequencefilename): if not os.path.abspath(os.path.dirname(sequencefilename)) == os.path.abspath(work_dir): shutil.copy(sequencefilename, work_dir) parameters += ["-s", "/data/{}".format(os.path.basename(sequencefilename))] if paramfilename is not None and os.path.isfile(paramfilename): if not os.path.abspath(os.path.dirname(paramfilename)) == os.path.abspath(work_dir): shutil.copy(paramfilename, work_dir) parameters += ["-p", "/data/{}".format(os.path.basename(paramfilename))] try: apiDockerCall(job, image='edraizen/scwrl4:latest', working_dir=work_dir, parameters=parameters) except (SystemExit, KeyboardInterrupt): raise except: raise #return run_scwrl(pdb_file, output_prefix=output_prefix, framefilename=framefilename, # sequencefilename=sequencefilename, paramfilename=paramfilename, in_cystal=in_cystal, # remove_hydrogens=remove_hydrogens, remove_h_n_term=remove_h_n_term, work_dir=work_dir, docker=False) output_file = os.path.join(work_dir, os.path.basename(output_file)) #if not os.path.abspath(os.path.dirname(output_file)) == os.path.abspath(work_dir): # shutil.move(os.path.join(work_dir, os.path.basename(output_file)), # os.path.abspath(os.path.dirname(output_file))) else: parameters = ["scwrl4", "-i", pdb_file, "-o", output_file]+_parameters if framefilename is not None and os.path.isfile(framefilename): parameters += ["-f", framefilename] if sequencefilename is not None and os.path.isfile(sequencefilename): parameters += ["-s", sequencefilename] if paramfilename is not None and os.path.isfile(paramfilename): parameters += ["-p", paramfilename] try: subprocess.call(parameters) except (SystemExit, KeyboardInterrupt): raise except Exception as e: raise #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e)) assert os.path.isfile(output_file) return output_file
def run_outlier_model(job, sample_info, args, cores=3, memory="15G"): # Unpack sample information and add sample specific options name, sample_opts = sample_info if sample_opts: for key, value in sample_opts.iteritems(): setattr(args, key, value) # Check if output already exists and don't run if so output = os.path.join(args.out_dir, name) if os.path.exists(output): return 0 # Process names with flexible extensions sample_ext = os.path.splitext(args.sample)[1] sample_name = "sample_matrix{}".format(sample_ext) bg_ext = os.path.splitext(args.background)[1] bg_name = "bg_matrix{}".format(bg_ext) # Copy input files to work directory shutil.copy(args.sample, os.path.join(job.tempDir, sample_name)) shutil.copy(args.background, os.path.join(job.tempDir, bg_name)) if args.gene_list: shutil.copy(args.gene_list, os.path.join(job.tempDir, "gene-list.txt")) # Define parameters and call Docker container parameters = [ "--sample", "/data/{}".format(sample_name), "--background", "/data/{}".format(bg_name), "--name", name, "--out-dir", "/data", "--group", args.group, "--col-skip", str(args.col_skip), "--num-backgrounds", str(args.num_backgrounds), "--max-genes", str(args.max_genes), "--num-training-genes", str(args.num_training_genes), "--pval-convergence-cutoff", str(args.pval_convergence_cutoff), "--tune", str(args.tune), ] if args.disable_iter: parameters.append("--disable-iter") if args.save_model: parameters.append("--save-model") if args.gene_list: parameters.extend(["--gene-list", "/data/gene-list.txt"]) image = "jvivian/gene-outlier-detection:0.13.0a" apiDockerCall( job=job, image=image, working_dir=job.tempDir, parameters=parameters, user="******", ) _fixPermissions(tool=image, workDir=job.tempDir) out_dir = os.path.join(job.tempDir, name) if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) shutil.move(out_dir, args.out_dir)
def run_maxcluster(*args, **kwds): work_dir = kwds.pop("work_dir", None) docker = kwds.pop("docker", True) job = kwds.pop("job", None) if work_dir is None: work_dir = os.getcwd() if "file_list" in kwds and not "l" in kwds: kwds["l"] = kwds.pop("file_list") else: kwds.pop("file_list", None) log = kwds.get("log", False) if log and not isinstance(log, str): f = tempfile.NamedTemporaryFile(dir=work_dir, suffix=".log", delete=False) f.close() kwds["log"] = f.name file_kwds = ["log", "e", "p", "l", "R", "Rl", "Ru", "F", "M"] in_file_kwds = ["e", "p", "l", "F", "M"] parameters = ["-" + a for a in args] for k, v in kwds.iteritems(): if k not in file_kwds: parameters += ["-{}".format(k), str(v)] job.log("ORIG PARAMS: {}".format(parameters)) file_parameters = {k: v for k, v in kwds.iteritems() if k in file_kwds} if docker and apiDockerCall is not None and job is not None: for k, f in file_parameters.iteritems(): if k in in_file_kwds and not os.path.abspath( os.path.dirname(f)) == os.path.abspath(work_dir): shutil.copy(f, work_dir) job.log("BASENAMING: {}".format(os.path.basename(f))) parameters += ["-{}".format(k), os.path.basename(f)] oldcwd = os.getcwd() os.chdir(work_dir) try: out = apiDockerCall( job, 'edraizen/maxcluster:latest', working_dir="/data", volumes={work_dir: { "bind": "/data", "mode": "rw" }}, parameters=parameters) except (SystemExit, KeyboardInterrupt): raise except: job.log("FILE LIST IS [{}]".format( open(file_parameters["l"]).read())) raise #return run_scwrl(pdb_file, output_prefix=output_prefix, framefilename=framefilename, # sequencefilename=sequencefilename, paramfilename=paramfilename, in_cystal=in_cystal, # remove_hydrogens=remove_hydrogens, remove_h_n_term=remove_h_n_term, work_dir=work_dir, docker=False) os.chdir(oldcwd) else: file_args = [] for k, f in file_parameters.iteritems(): parameters += ["-{}".format(k), f] args = [maxcluster_path] + file_args + parameters try: out = subprocess.check_output(args) except (SystemExit, KeyboardInterrupt): raise except Exception as e: raise #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e)) if "log" in kwds and os.path.isfile(kwds["log"]): return kwds["log"] return out
def run_haddock(dock_name, setup=False, work_dir=None, docker=True, toil=False, job=None): if work_dir is None: work_dir = os.getcwd() if job: print str(os.listdir(work_dir)) assert any( os.path.isfile(os.path.join(work_dir, f)) for f in ("new.html", "run.cns")) if toil: image = 'edraizen/haddock-toil:latest' parameters = [ "--provisioner", "aws", "--nodeTypes", "t2.small,t2.small:0.0069", "--defaultCores", "1", "--maxCores", "1", "--maxNodes", "1,99", "--maxLocalJobs", "100", "--targetTime", "1", "--batchSystem", "mesos", "--defaultMemory", "997Mi", "--defaultDisk", "42121Mi", "--logFile", "{}.log".format(dock_name), "aws:us-east-1:haddock-{}".format(dock_name), ] else: image = 'edraizen/haddock:latest' parameters = [] if docker and apiDockerCall is not None and job is not None: oldcwd = os.getcwd() os.chdir(work_dir) try: out = apiDockerCall( job, image, working_dir="/data", volumes={work_dir: { "bind": "/data", "mode": "rw" }}, parameters=parameters, detach=True) # if not setup: for line in out.logs(stream=True): # stream=True makes this loop blocking; we will loop until # the container stops and there is no more output. RealtimeLogger.info(line) # else: # job.log(out) except (SystemExit, KeyboardInterrupt): raise except: raise #return run_scwrl(pdb_file, output_prefix=output_prefix, framefilename=framefilename, # sequencefilename=sequencefilename, paramfilename=paramfilename, in_cystal=in_cystal, # remove_hydrogens=remove_hydrogens, remove_h_n_term=remove_h_n_term, work_dir=work_dir, docker=False) os.chdir(oldcwd) else: try: out = subprocess.check_output([sys.executable, "RunHaddock.py"]) except (SystemExit, KeyboardInterrupt): raise except Exception as e: raise #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e)) #job.log(out) return out
def run_minimize(pdb_file, min_type="lbfgs_armijo_nonmonotone", min_tolerance=0.001, ignore_zero_occupancy=False, parameters=None, work_dir=None): if work_dir is None: work_dir = os.getcwd() prefix = os.path.splitext(os.path.basename(pdb_file))[0] parameters = list(parameters) if isinstance(parameters, (list, tuple)) else [] parameters += [ "-run:min_type", min_type, "-run:min_tolerance", str(min_tolerance), "-ignore_zero_occupancy", str(ignore_zero_occupancy).lower()] if apiDockerCall is not None: if not os.path.abspath(os.path.dirname(pdb_file)) == os.path.abspath(work_dir): shutil.copy(pdb_file, work_dir) minimized_file = os.path.join("/data", "{}.pdb_0001.pdb".format(prefix)) score_file = os.path.join("/data", "{}.sc".format(prefix)) pdb_file_in = os.path.join("/data", "{}.pdb".format(prefix)) parameters += [ "-s", pdb_file_in, "-out:file:scorefile", score_file, "-out:path:pdb", "/data", "-out:path:score", "/data"] try: apiDockerCall(job, image='edraizen/minimize:latest', working_dir=work_dir, parameters=parameters) except (SystemExit, KeyboardInterrupt): raise except: return run(pdb_file, min_type=min_type, min_tolerance=min_tolerance, ignore_zero_occupancy=ignore_zero_occupancy, parameters=parameters, work_dir=work_dir) else: work_dir = os.path.dirname(os.path.abspath(pdb_file)) minimized_file = os.path.join(workdir, "{}.pdb_0001.pdb".format(prefix)) score_file = os.path.join(work_dir, "{}.sc".format(prefix)) parameters += [ "-out:file:scorefile", score_file, "-out:path:pdb", work_dir, "-out:path:score", work_dir] command = ["minimize.static.linuxgccrelease"]+parameters command += [ "-out:file:scorefile", score_file, "-out:path:pdb", work_dir, "-out:path:score", work_dir ] try: subprocess.check_output(["minimize.static.linuxgccrelease", "-s", pqr_file, "-run:min_type", "lbfgs_armijo_nonmonotone", "-run:min_tolerance", "0.001", "-overwrite", "false", #Pandas apply calls first row twice so this is needed "-ignore_zero_occupancy", "false", "-out:file:scorefile", score_file, "-out:path:pdb", pdb_path, "-out:path:score", pdb_path], stderr=subprocess.PIPE) except subprocess.CalledProcessError: raise RuntimeError("Unable to minimize file {}".format(pqr_file))
def run_apbs(pqr_file, input_file=None, keep_input=False, work_dir=None, docker=True, job=None): """Run APBS. Calculates correct size using Psize and defualt from Chimera """ if work_dir is None: work_dir = os.getcwd() full_pqr_path = pqr_file pqr_path = os.path.basename(full_pqr_path) file_prefix = os.path.splitext(pqr_path)[0] output_prefix = os.path.join(work_dir, "{}.apbs_output".format(file_prefix)) if input_file is not None and os.path.isfile(input_file): keep_input = True else: input_file_contents = make_apbs_input(full_pqr_path, "{}.apbs_output".format(file_prefix)) if docker and apiDockerCall is not None and job is not None: input_file_name = os.path.join(work_dir, "{}.apbs_input".format(file_prefix)) input_file_short = "{}.apbs_input".format(file_prefix) output_prefix = "{}.apbs_output".format(file_prefix) if input_file is not None: if not os.path.abspath(os.path.dirname(input_file)) == os.path.abspath(work_dir): shutil.copy(input_file, os.path.join(work_dir, input_file_short)) else: input_file_short = os.path.basename(input_file) else: input_file = input_file_name with open(input_file, "w") as f: f.write(input_file_contents) if not os.path.abspath(os.path.dirname(pqr_file)) == os.path.abspath(work_dir): shutil.copy(pqr_file, work_dir) try: parameters = ["/data/{}".format(input_file_short)] apiDockerCall(job, image='edraizen/apbs:latest', working_dir="/data", volumes={work_dir:{"bind":"/data", "mode":"rw"}}, parameters=parameters) output_prefix = os.path.join(work_dir, output_prefix) except (SystemExit, KeyboardInterrupt): raise except: raise return run_apbs(full_pqr_file, input_file=input_file_name, keep_input=keep_input, work_dir=work_dir, docker=False) else: input_file = os.path.join(work_dir, "{}.apbs_input".format(file_prefix)) output_prefix = os.path.join(work_dir, "{}.apbs_output".format(file_prefix)) with open(input_file, "w") as f: f.write(input_file_contents) try: subprocess.call(["apbs", input_file]) except (SystemExit, KeyboardInterrupt): raise except Exception as e: raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e)) if not keep_input and input_file is not None and os.path.isfile(input_file): os.remove(input_file) out_file = output_prefix+".txt" assert os.path.isfile(out_file), "Outfile not found: {}".format(os.listdir(work_dir)) return out_file
def align(fixed_file, fixed_chain, moving_file, moving_chain, method="tmalign", force_alignment=None, extract=True, docker=True, work_dir=None, job=None): if work_dir is None: work_dir = os.getcwd() import shutil numalign = sum( 1 for f in os.listdir("/root") if f.startswith("align")) / 3. + 1 shutil.copy( fixed_file, os.path.join( "/root", "align{}_fixed_{}_{}.pdb".format(numalign, fixed_chain, moving_chain))) shutil.copy( moving_file, os.path.join( "/root", "align{}_moving_{}_{}.pdb".format(numalign, fixed_chain, moving_chain))) _chain1 = tempfile.NamedTemporaryFile(suffix=".pdb", dir=work_dir, delete=False) extract_chains(fixed_file, fixed_chain, rename="B" * len(fixed_chain), new_file=_chain1.name) _chain1.close() job.log("MOVE CHAINS: {} {}".format(get_all_chains(moving_file), moving_chain)) job.log("FIX CHAINS: {} {}".format(get_all_chains(fixed_file), fixed_chain)) _chain2 = tempfile.NamedTemporaryFile(suffix=".pdb", dir=work_dir, delete=False) extract_chains(moving_file, moving_chain, rename="A" * len(moving_chain), new_file=_chain2.name) _chain2.close() job.log("MOVE CHAIN A: {}".format(next(get_atom_lines(_chain2.name)))) job.log("MOVE CHAIN A: {}".format(get_all_chains(_chain2.name))) job.log("MOVE CHAIN B: {}".format(next(get_atom_lines(_chain1.name)))) job.log("MOVE CHAIN B: {}".format(get_all_chains(_chain1.name))) shutil.copy( _chain1.name, os.path.join( "/root", "align{}_seg_fixed_{}_{}.pdb".format(numalign, fixed_chain, moving_chain))) shutil.copy( _chain2.name, os.path.join( "/root", "align{}_seg_moving_{}_{}.pdb".format(numalign, fixed_chain, moving_chain))) _outf = tempfile.NamedTemporaryFile(dir=work_dir, delete=False) _outf.close() _outf = _outf.name if method in ["tmalign", "mmalign"]: image = "edraizen/{}:latest".format(method) parameters = [ os.path.basename(_chain2.name), os.path.basename(_chain1.name), "-o", os.path.basename(_outf + ".sup") ] if method == "tmalign": if force_alignment is not None: parameters += ["-I", os.path.basename(force_alignment)] else: parameters += ["-m", os.path.basename(_outf + ".matrix.txt")] elif method == "ce": image = "edraizen/ce:latest" parameters = lambda f, m, o: [ "--file1", os.path.basename(_chain1.name), "--file2", os.path.basename(_chain2.name), "-outputPDB", "-outFile", os.path.basename(_outf + ".sup") ] if docker and apiDockerCall is not None and job is not None: try: stdout = apiDockerCall( job, image, working_dir="/data", volumes={work_dir: { "bind": "/data", "mode": "rw" }}, parameters=parameters) except (SystemExit, KeyboardInterrupt): raise except Exception as e: print dir(e) raise Exception(str(e).decode('utf-8').encode("ascii", "ignore")) else: raise RuntimeError("Only docker works at the moment") job.log("OUTPUT: " + stdout) rmsd_re = re.compile("^Aligned length=.+, RMSD=(.+), Seq_ID=") tmscore_re = re.compile( "^TM-score=(.+) \(if normalized by length of Chain_2\)") rmsd_tm_re = re.compile( "^Aligned length=.+, RMSD=(.+), TM-score=(.+), ID=") rmsd = tm_score = -1. lines = iter(stdout.splitlines()) for line in lines: job.log(line.rstrip()) m = rmsd_re.match(line) if m: rmsd = float(m.group(1).strip()) job.log("RMSD is {}".format(rmsd)) continue m = rmsd_tm_re.match(line) if m: rmsd = float(m.group(1).strip()) tm_score = float(m.group(2).strip()) job.log("RMSD is {}".format(rmsd)) job.log("TM-score is {}".format(tm_score)) m = tmscore_re.match(line) if m: tm_score = float(m.group(1).strip()) job.log("TM-score is {}".format(tm_score)) if method == "mmalign" and "rotation matrix" in line: with open(_outf + ".matrix.txt", "w") as matfile: for i, mat_line in enumerate(lines): if i > 4: break matfile.write(mat_line) ending = ".sup_all_atm_lig" if method == "tmalign" else ".sup_all" _outfile = _outf + ending job.log("ALL chains: {}".format(get_all_chains(_outfile))) job.log("FIRST LINE: {}".format(next(get_atom_lines(_outfile)))) job.log("ALL alinged files: {}".format(os.listdir(work_dir))) shutil.copy( _outfile, os.path.join( "/root", "align{}_raw_aligned_{}_{}.pdb".format(numalign, fixed_chain, moving_chain))) if method == "tmalign": shutil.copy( _outf + ".matrix.txt", os.path.join( "/root", "align{}_matrix_aligned_{}_{}.pdb".format( numalign, fixed_chain, moving_chain))) if extract: outfile = os.path.join( work_dir, "{}.aligned.pdb".format( os.path.splitext(os.path.basename(moving_file))[0])) else: outfile = os.path.join( work_dir, "{}__{}.aligned.pdb".format( os.path.splitext(os.path.basename(fixed_file))[0], os.path.splitext(os.path.basename(moving_file))[0])) if extract: #Chain A had the the moving_pdb rottrans _outfile = extract_chains(_outfile, "A") #Copy the updated XYZ coords into moving_pdb file to ensure chains are correct update_xyz(moving_file, _outfile, updated_pdb=outfile) else: #Remove extraneous lines with open(outfile, "w") as out: subprocess.call( [sys.executable, os.path.join(PDB_TOOLS, "pdb_tidy.py")], stdout=out) if force_alignment is None: matrix_file = outfile + ".matrix" assert os.path.isfile(_outf + ".matrix.txt") shutil.move(_outf + ".matrix.txt", matrix_file) assert os.path.isfile(matrix_file) with open(matrix_file) as f: print "MATRIX" print f.read() else: matrix_file = force_alignment job.log("NEW chains: {}".format(get_all_chains(outfile))) for f in glob.glob(os.path.join(work_dir, _outf + "*")): try: os.remove(f) except OSError: pass assert os.path.isfile(outfile) return outfile, rmsd, tm_score, matrix_file
def run_modeller(pir, template, model, num_models=5, work_dir=None, docker=True, job=None): if work_dir is None: work_dir = os.getcwd() python_file = os.path.join(work_dir, "run_modeller.py") if docker and apiDockerCall is not None and job is not None: #Docker can only read from work_dir if not os.path.abspath( os.path.dirname(pir)) == os.path.abspath(work_dir): shutil.copy(pir, work_dir) if not os.path.abspath( os.path.dirname(template)) == os.path.abspath(work_dir): shutil.copy(template, work_dir) with open(python_file, "w") as f: f.write( modeller_file.format( pir=os.path.join("/data", os.path.basename(pir)), template=os.path.basename(template).rsplit(".", 1)[0], model=model, num_models=num_models, work_dir="/data")) print open(python_file).read() parameters = [os.path.join("/data", os.path.basename(python_file))] try: outputs = apiDockerCall(job, image='edraizen/modeller:latest', working_dir=work_dir, parameters=parameters) job.log(outputs) outputs = outputs.splitlines()[-1] except (SystemExit, KeyboardInterrupt): raise except: raise #return run_scwrl(pdb_file, output_prefix=output_prefix, framefilename=framefilename, # sequencefilename=sequencefilename, paramfilename=paramfilename, in_cystal=in_cystal, # remove_hydrogens=remove_hydrogens, remove_h_n_term=remove_h_n_term, work_dir=work_dir, docker=False) else: with open(python_file, "w") as f: f.write( modeller_file.format(pir=pir, template=template, model=model, num_models=num_models, work_dir=work_dir)) try: outputs = subprocess.check_output([sys.executable, python_file]) except (SystemExit, KeyboardInterrupt): raise except Exception as e: raise #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e)) outputs = json.loads(outputs) best_pdb, best_dope = min(outputs.iteritems(), key=lambda x: x[1]) best_pdb = os.path.join(work_dir, best_pdb) assert os.path.isfile(best_pdb) for f in outputs.keys(): path = os.path.join(work_dir, f) if path != best_pdb: try: os.remove(path) except OSError: pass return best_pdb
def run_zrank(complex_path, refinement=False, work_dir=None, docker=True, job=None): if work_dir is None: work_dir = os.getcwd() _parameters = ["-R"] if refinement else [] if not isinstance(complex_path, (list, tuple)): complex_path = [complex_path] listfile = tempfile.NamedTemporaryFile(dir=work_dir, prefix="listfile", suffix=".txt", delete=False) for pdb in complex_path: print >> listfile, os.path.basename(pdb) listfile.close() with open(listfile.name) as f: print f.read() if docker and apiDockerCall is not None and job is not None: parameters = _parameters + [os.path.basename(listfile.name)] try: out = apiDockerCall( job, image='edraizen/zrank:latest', working_dir="/data", volumes={work_dir: { "bind": "/data", "mode": "rw" }}, parameters=parameters) print out except (SystemExit, KeyboardInterrupt): raise except Exception as e: print "Error running docker for {} becuase {}".format( complex_path, e) raise return run_zrank(complex_path, refinement=refinement, work_dir=work_dir, docker=False) else: cmd = [zrank_path ] + _parameters + [os.path.join("/data", listfile.name)] try: subprocess.call(cmd) except (SystemExit, KeyboardInterrupt): raise except Exception as e: raise RuntimeError("Cannot run zrank for {}. Error: {}".format( complex_path, e)) assert os.path.isfile(listfile.name + ".zr.out"), "No output for zrank" with open(listfile.name + ".zr.out") as f: scores = dict(line.rstrip().split() for line in f) if len(complex_path) == 1: scores = scores.values()[0] for f in (listfile.name, listfile.name + ".zr.out"): try: os.remove(f) except OSError: pass return scores
def _testDockerLogsFn(job, working_dir, script_file, stream=False, demux=False): """Return True if the test succeeds. Otherwise Exception is raised.""" # we write a script file because the redirection operator, '>&2', is wrapped # in quotes when passed as parameters. import textwrap bash_script = textwrap.dedent(''' #!/bin/bash echo hello stdout ; echo hello stderr >&2 ; echo hello stdout ; echo hello stderr >&2 ; echo hello stdout ; echo hello stdout ; ''') with open(script_file, 'w') as file: file.write(bash_script) out = apiDockerCall( job, image='quay.io/ucsc_cgl/ubuntu:20.04', working_dir=working_dir, parameters=[script_file], volumes={working_dir: { 'bind': working_dir, 'mode': 'rw' }}, entrypoint="/bin/bash", stdout=True, stderr=True, stream=stream, demux=demux) # we check the output length because order is not guaranteed. if stream: if demux: # a generator with tuples of (stdout, stderr) assert hasattr(out, '__iter__') for _ in range(6): stdout, stderr = next(out) if stdout: # len('hello stdout\n') == 13 assert len(stdout) == 13 elif stderr: assert len(stderr) == 13 else: assert False else: # a generator with bytes assert hasattr(out, '__iter__') for _ in range(6): assert len(next(out)) == 13 else: if demux: # a tuple of (stdout, stderr) stdout, stderr = out # len('hello stdout\n' * 4) == 52 assert len(stdout) == 52 # len('hello stderr\n' * 2) == 26 assert len(stderr) == 26 else: # a bytes object # len('hello stdout\n' * 4 + 'hello stderr\n' * 2) == 78 assert len(out) == 78 return True
def run(self, fileStore): fileStore.logToMaster("md5") tempDir = fileStore.getLocalTempDir() try: os.makedirs(os.path.join(tempDir, 'execution')) except OSError as e: if e.errno != errno.EEXIST: raise inputFile = process_and_read_file(abspath_file( self.id_inputFile, asldijoiu23r8u34q89fho934t8u34fcurrentworkingdir), tempDir, fileStore, docker=True) try: # Intended to deal with "optional" inputs that may not exist # TODO: handle this better command0 = r''' /bin/my_md5sum ''' except: command0 = '' try: # Intended to deal with "optional" inputs that may not exist # TODO: handle this better command1 = str(inputFile if not isinstance(inputFile, tuple) else process_and_read_file(inputFile, tempDir, fileStore )).strip("\n") except: command1 = '' try: # Intended to deal with "optional" inputs that may not exist # TODO: handle this better command2 = r''' ''' except: command2 = '' cmd = command0 + command1 + command2 cmd = textwrap.dedent(cmd.strip("\n")) generate_docker_bashscript_file(temp_dir=tempDir, docker_dir=tempDir, globs=[], cmd=cmd, job_name='md5') stdout = apiDockerCall( self, image='quay.io/briandoconnor/dockstore-tool-md5sum:1.0.4', working_dir=tempDir, parameters=[os.path.join(tempDir, "md5_script.sh")], entrypoint="/bin/bash", user='******', stderr=True, volumes={tempDir: { "bind": tempDir }}) writetype = 'wb' if isinstance(stdout, bytes) else 'w' with open( os.path.join(asldijoiu23r8u34q89fho934t8u34fcurrentworkingdir, 'md5.log'), writetype) as f: f.write(stdout) # output-type: File output_filename = 'md5sum.txt' value = process_outfile( output_filename, fileStore, tempDir, '/home/lifeisaboutfishtacos/dockstore-workflow-md5sum') rvDict = {"value": value} return rvDict