def new_function(*args, **kwargs): last_error = None # Call backoff times, overriding parameters with stuff from kwargs for delay in backoff_times(retries=kwargs.get("retries", retries), base_delay=kwargs.get( "base_delay", base_delay)): # Keep looping until it works or our iterator raises a # BackoffError if delay > 0: # We have to wait before trying again RealtimeLogger.error("Retry after {} seconds".format(delay)) time.sleep(delay) try: return original_function(*args, **kwargs) except Exception as last_error: # Report the formatted underlying exception with traceback RealtimeLogger.error("{} failed due to: {}".format( original_function.__name__, "".join(traceback.format_exception(*sys.exc_info())))) # If we get here, the function we're calling never ran through before we # ran out of backoff times. Give an error. if last_error is None: raise BackoffError("Ran out of retries calling {}".format( original_function.__name__)) else: raise last_error
def read_input_file(self, input_path, local_path): """ Get input from the filesystem. """ RealtimeLogger.debug("Loading {} from FileIOStore in {} to {}".format( input_path, self.path_prefix, local_path)) if os.path.exists(local_path): # Try deleting the existing item if it already exists try: os.unlink(local_path) except: # Don't fail here, fail complaining about the assertion, which # will be more informative. pass # Make sure the path is clear for copying assert (not os.path.exists(local_path)) # Where is the file actually? real_path = os.path.abspath(os.path.join(self.path_prefix, input_path)) if not os.path.exists(real_path): RealtimeLogger.error( "Can't find {} from FileIOStore in {}!".format( input_path, self.path_prefix)) raise RuntimeError("File {} missing!".format(real_path)) # Make a temporary file temp_handle, temp_path = tempfile.mkstemp( dir=os.path.dirname(local_path)) os.close(temp_handle) # Copy to the temp file shutil.copy2(real_path, temp_path) # Rename the temp file to the right place, atomically RealtimeLogger.info("rename {} -> {}".format(temp_path, local_path)) os.rename(temp_path, local_path) # Look at the file stats file_stats = os.stat(real_path) if (file_stats.st_uid == os.getuid() and file_stats.st_mode & stat.S_IWUSR): # We own this file and can write to it. We don't want the user # script messing it up through the symlink. try: # Clear the user write bit, so the user can't accidentally # clobber the file in the actual store through the symlink. os.chmod(real_path, file_stats.st_mode ^ stat.S_IWUSR) except OSError: # If something goes wrong here (like us not having permission to # change permissions), ignore it. pass
def _runDeferredFunction(self, deferredFunction): """ Run a deferred function (either our own or someone else's). Reports an error if it fails. """ try: deferredFunction.invoke() except Exception as err: # Report this in real time, if enabled. Otherwise the only place it ends up is the worker log. RealtimeLogger.error("Failed to run deferred function %s: %s", repr(deferredFunction), str(err)) except: RealtimeLogger.error("Failed to run deferred function %s", repr(deferredFunction))
def handle_reject(self, job, err): """ Handle promise rejection. """ Logger.error("Promise Rejected: {}".format(err)) RealtimeLogger.error("Promise Rejected: {}".format(err)) self.err = err # TODO: implement # Check if we have any reject handlers # If so call them # If not throw an error that stops the workflow raise err
def call_with_docker(self, job, args, work_dir, outfile, errfile, check_output, tool_name): """ Thin wrapper for docker_call that will use internal lookup to figure out the location of the docker file. Only exposes docker_call parameters used so far. expect args as list of lists. if (toplevel) list has size > 1, then piping interface used Does support redirecting output to outfile, unless check_output is used, in which case output is captured. """ RealtimeLogger.info( truncate_msg("Docker Run: {}".format(" | ".join(" ".join(x) for x in args)))) start_time = timeit.default_timer() # we use the first argument to look up the tool in the docker map # but allow overriding of this with the tool_name parameter name = tool_name if tool_name is not None else args[0][0] tool = self.docker_tool_map[name] # We keep an environment dict environment = {} # And an entry point override entrypoint = None # And a volumes dict for mounting volumes = {} # And a working directory override working_dir = None # breaks Rscript. Todo: investigate how general this actually is if name != 'Rscript': # vg uses TMPDIR for temporary files # this is particularly important for gcsa, which makes massive files. # we will default to keeping these in our working directory environment['TMPDIR'] = '.' if name == 'Rscript': # The R dockers by default want to install packages in non-writable directories. Sometimes. # Make sure a writable directory which exists is used. environment['R_LIBS'] = '/tmp' if name == 'vg': environment['VG_FULL_TRACEBACK'] = '1' # ugly hack for platypus, as default container doesn't have executable in path if tool == 'quay.io/biocontainers/platypus-variant:0.8.1.1--htslib1.7_1' and \ args[0][0] == 'Platypus.py': args[0][ 0] = '/usr/local/share/platypus-variant-0.8.1.1-1/Platypus.py' # Force all dockers to run sort in a consistent way environment['LC_ALL'] = 'C' # set our working directory map if work_dir is not None: volumes[os.path.abspath(work_dir)] = { 'bind': '/data', 'mode': 'rw' } working_dir = '/data' if outfile is not None: # We need to send output to a file object assert (not check_output) # We can't just redirect stdout of the container from the API, so # we do something more complicated. # Now we need to populate an FD that spits out the container output. output_fd = None # We may be able to use a FIFO, or we may need a network connection. # FIFO sharing between host and container only works on Linux. use_fifo = (platform.system() == 'Linux') if use_fifo: # On a Linux host we can just use a FIFO from the container to the host # Set up a FIFO to receive it fifo_dir = tempfile.mkdtemp() fifo_host_path = os.path.join(fifo_dir, 'stdout.fifo') os.mkfifo(fifo_host_path) # Mount the FIFO in the container. # The container doesn't actually have to have the mountpoint directory in its filesystem. volumes[fifo_dir] = {'bind': '/control', 'mode': 'rw'} # Redirect the command output by tacking on another pipeline stage parameters = args + [['dd', 'of=/control/stdout.fifo']] # Open the FIFO into nonblocking mode. See # <https://stackoverflow.com/a/5749687> and # <http://shallowsky.com/blog/programming/python-read-characters.html> output_fd = os.open(fifo_host_path, os.O_RDONLY | os.O_NONBLOCK) else: # On a Mac host we can't because of https://github.com/docker/for-mac/issues/483 # We need to go over the network instead. # Open an IPv4 TCP socket, since we know Docker uses IPv4 only listen_sock = socket.socket(socket.AF_INET) # Bind it to an OS-selected port on all interfaces, since we can't determine the Docker interface # TODO: socket.INADDR_ANY ought to work here but is rejected for being an int. listen_sock.bind(('', 0)) # Start listening listen_sock.listen(1) # Get the port we got given listen_port = listen_sock.getsockname()[1] # Generate a random security cookie. Since we can't really stop # Internet randos from connecting to our socket, we bail out on # any connection that doesn't start with this cookie and a newline. security_cookie = str(uuid.uuid4()) # Redirect the command output to that port using Bash networking # Your Docker needs to be 18.03+ to support host.docker.internal # Your container needs to have bash with networking support parameters = args + [[ 'bash', '-c', 'exec 3<>/dev/tcp/host.docker.internal/{}; cat <(echo {}) - >&3' .format(listen_port, security_cookie) ]] RealtimeLogger.debug( "Listening on port {} for output from Docker container". format(listen_port)) # We can't populate the FD until we accept, which we can't do # until the Docker comes up and is trying to connect. RealtimeLogger.debug("Final Docker command: {}".format(" | ".join( " ".join(x) for x in parameters))) # Start the container detached so we don't wait on it container = apiDockerCall(job, tool, parameters, volumes=volumes, working_dir=working_dir, entrypoint=entrypoint, environment=environment, detach=True) RealtimeLogger.debug("Asked for container {}".format(container.id)) if not use_fifo: # Try and accept a connection from the container. # Make sure there's a timeout so we don't accept forever listen_sock.settimeout(10) for attempt in range(3): connection_sock, remote_address = listen_sock.accept() RealtimeLogger.info( "Got connection from {}".format(remote_address)) # Set a 10 second timeout for the cookie connection_sock.settimeout(10) # Check the security cookie received_cookie_and_newline = connection_sock.recv( len(security_cookie) + 1) if received_cookie_and_newline != security_cookie + "\n": # Incorrect security cookie. RealtimeLogger.warning( "Received incorect security cookie message from {}" .format(remote_address)) continue else: # This is the container we are looking for # Go into nonblocking mode which our read code expects connection_sock.setblocking(True) # Set the FD output_fd = connection_sock.fileno() break if output_fd is None: # We can't get ahold of the Docker in time raise RuntimeError( "Could not establish network connection for Docker output!" ) # If the Docker container goes badly enough, it may not even open # the other end of the connection. So we can't just wait for it to # EOF before checking on the Docker. # Now read ought to throw if there is no data. But # <https://stackoverflow.com/q/38843278> and some testing suggest # that this doesn't happen, and it just looks like EOF. So we will # watch out for that. try: # Prevent leaking FDs # If this is set, and there is no data in the pipe, decide that no data is coming last_chance = False # If this is set, we have seen data in the pipe, so the other # end must have opened it and will eventually close it if it # doesn't run forever. saw_data = False while True: # While there still might be data in the pipe if output_fd is not None: # Select on the pipe with a timeout, so we don't spin constantly waiting for data can_read, can_write, had_error = select.select( [output_fd], [], [output_fd], 10) if len(can_read) > 0 or len(had_error) > 0: # There is data available or something else weird about our FIFO. try: # Do a nonblocking read. Since we checked with select we never should get "" unless there's an EOF. data = os.read(output_fd, 4096) if data == "": # We didn't throw and we got nothing, so it must be EOF. RealtimeLogger.debug("Got EOF") break except OSError as err: if err.errno in [errno.EAGAIN, errno.EWOULDBLOCK]: # There is no data right now data = None else: # Something else has gone wrong raise err else: # There is no data available. Don't even try to read. Treat it as if a read refused to block. data = None if data is not None: # Send our data to the outfile outfile.write(data) saw_data = True elif not saw_data: # We timed out and there has never been any data. Maybe the container has died/never started? if last_chance: # The container has been dead for a while and nothing has arrived yet. Assume no data is coming. RealtimeLogger.warning( "Giving up on output form container {}".format( container.id)) break # Otherwise, check on it container.reload() if container.status not in [ 'created', 'restarting', 'running', 'removing' ]: # The container has stopped. So what are we doing waiting around for it? # Wait one last time for any lingering data to percolate through the FIFO time.sleep(10) last_chance = True continue finally: # No matter what happens, close our end of the connection os.close(output_fd) if not use_fifo: # Also close the listening socket listen_sock.close() # Now our data is all sent. # Wait on the container and get its return code. return_code = container.wait() if use_fifo: # Clean up the FIFO files os.unlink(fifo_host_path) os.rmdir(fifo_dir) else: # No piping needed. if len(args) == 1: # split off first argument as entrypoint (so we can be oblivious as to whether # that happens by default) parameters = [] if len(args[0]) == 1 else args[0][1:] entrypoint = args[0][0] else: # can leave as is for piped interface which takes list of args lists # and doesn't worry about entrypoints since everything goes through bash -c # todo: check we have a bash entrypoint! parameters = args # Run the container and dump the logs if it fails. container = apiDockerCall(job, tool, parameters, volumes=volumes, working_dir=working_dir, entrypoint=entrypoint, environment=environment, detach=True) # Wait on the container and get its return code. return_code = container.wait() # When we get here, the container has been run, and stdout is either in the file object we sent it to or in the Docker logs. # stderr is always in the Docker logs. if return_code != 0: # What were we doing? command = " | ".join(" ".join(x) for x in args) # Dump logs RealtimeLogger.error( "Docker container for command {} failed with code {}".format( command, return_code)) RealtimeLogger.error("Dumping stderr...") for line in container.logs(stderr=True, stdout=False, stream=True): # Trim trailing \n RealtimeLogger.error(line[:-1]) if not check_output and outfile is None: # Dump stdout as well, since it's not something the caller wanted as data RealtimeLogger.error("Dumping stdout...") for line in container.logs(stderr=False, stdout=True, stream=True): # Trim trailing \n RealtimeLogger.error(line[:-1]) # Raise an error if it's not sucess raise RuntimeError( "Docker container for command {} failed with code {}".format( command, return_code)) elif errfile: # user wants stderr even if no crash for line in container.logs(stderr=True, stdout=False, stream=True): errfile.write(line) if check_output: # We need to collect the output. We grab it from Docker's handy on-disk buffer. # TODO: Bad Things can happen if the container logs too much. captured_stdout = container.logs(stderr=False, stdout=True) end_time = timeit.default_timer() run_time = end_time - start_time RealtimeLogger.info("Successfully docker ran {} in {} seconds.".format( " | ".join(" ".join(x) for x in args), run_time)) if outfile: outfile.flush() os.fsync(outfile.fileno()) if check_output is True: return captured_stdout
def run_calleval_plots(job, context, names, eval_results_dict, plot_sets): """ Make and output calleval ROC plots. Takes a "names" list of all conditions. Condition names in the list (or in plot_sets) do not include an "-unclipped" tag; both clipped and unclipped plots are made if the data is available. Takes a nested dict by condition name, then clipping status ("clipped", "unclipped"), and then variant type ("snp", "non_snp", "weighted"). Eventual entries are to ROC data file ids (.tsv.gz). plot_sets is a data structure of collections of conditions to plot against each other, as produced by parse_plot_sets. Returns a list of created plot file IDs. """ # make some roc plots roc_plot_ids = [] for roc_type in ['snp', 'non_snp', 'weighted']: # For each type of ROC to plot for mode in ['unclipped', 'clipped']: # For each clipping mode # What kind of ROC plot is it? # It should be this unless there is a clipped mode for this ROC and we are the unclipped one. roc_kind = roc_type if mode == 'unclipped' and True in ['clipped' in eval_results_dict for eval_result in eval_results_dict.values()]: # We are the unclipped mode and there will be a clipped mode roc_kind += '-unclipped' # Get all the eval results for this mode, by condition name mode_results = {name: eval_result.get(mode) for name, eval_result in list(eval_results_dict.items())} if None in iter(mode_results.values()): # We can't do this mode since it wasn't run continue # Extract out all the stats file IDs for this ROC type, by # condition name. This dict is implicitly for the clipped or # unclipped mode, depending on which mode we are doing. The # condition name keys don't have clipping tags. roc_table_ids = {name: result.get(roc_type) for name, result in list(mode_results.items())} for subset_number, plot_set in enumerate(plot_sets): # For each plot set specifier # Unpack plot_set plot_title, plot_conditions = plot_set if plot_conditions is None: # Use all the conditions instead plot_conditions = names # Add the ROC plot kind (snp, clipped, etc.) to the plot title if plot_title is None: plot_title = roc_kind else: plot_title = '{} ({})'.format(plot_title, roc_kind) for name in plot_conditions: if name not in names: # Complain if any of the names in the subset aren't conditions that actually ran message = 'Condition {} not found in list of available conditions {}'.format(name, names) RealtimeLogger.error(message) raise RuntimeError(message) if name not in roc_table_ids: # Complain if any of the names in the subset lacks ROC data message = 'Condition {} has no ROC data; data only available for {}'.format(name, list(roc_table_ids.keys())) RealtimeLogger.error(message) raise RuntimeError(message) # Make a list of roc table IDs for the subset, in plot_conditions order subset_ids = [roc_table_ids[name] for name in plot_conditions] # Make the plot roc_plot_ids.append(job.addChildJobFn(run_vcfeval_roc_plot, context, subset_ids, names=plot_conditions, kind=roc_kind, number=subset_number, title=plot_title).rv()) return roc_plot_ids
def run_bam_caller(job, context, fasta_file_id, bam_file_id, bam_idx_id, sample_name, chrom, offset, out_name, bam_caller, bam_caller_opts): """ run freebayes or platypus to make a vcf """ assert bam_caller in ['freebayes', 'platypus'] # make a local work directory work_dir = job.fileStore.getLocalTempDir() # download the input fasta_path = os.path.join(work_dir, 'ref.fa') bam_path = os.path.join(work_dir, 'alignment.bam') bam_idx_path = bam_path + '.bai' job.fileStore.readGlobalFile(fasta_file_id, fasta_path) job.fileStore.readGlobalFile(bam_file_id, bam_path) job.fileStore.readGlobalFile(bam_idx_id, bam_idx_path) # output vcf_path = os.path.join(work_dir, '{}-raw.vcf'.format(out_name)) if bam_caller == 'freebayes': fb_cmd = ['freebayes', '-f', os.path.basename(fasta_path), os.path.basename(bam_path)] if bam_caller_opts: fb_cmd += bam_caller_opts if chrom: fb_cmd += ['-r', chrom] timer = TimeTracker('freebayes') with open(vcf_path, 'wb') as out_vcf: context.runner.call(job, fb_cmd, work_dir=work_dir, outfile=out_vcf) timer.stop() elif bam_caller == 'platypus': context.runner.call(job, ['samtools', 'faidx', 'ref.fa'], work_dir=work_dir) plat_cmd = ['Platypus.py', 'callVariants', '--refFile', os.path.basename(fasta_path), '--bamFiles', os.path.basename(bam_path), '-o', os.path.basename(vcf_path)] if bam_caller_opts: plat_cmd += bam_caller_opts if chrom: plat_cmd += ['--regions', chrom] timer = TimeTracker('Platypus') context.runner.call(job, plat_cmd, work_dir=work_dir) timer.stop() else: assert False context.write_intermediate_file(job, vcf_path) vcf_fix_path = os.path.join(work_dir, '{}.vcf'.format(out_name)) # apply offset and sample name vcf_reader = vcf.Reader(open(vcf_path)) if sample_name: # Freebayes always outputs "unknown" for the sample if the BAM # doesn't specify a name, and can't be convinced to do otherwise: # https://github.com/ekg/freebayes/issues/471 # So we hack the VCFReader to think the sample names are what we want them to be assert(len(vcf_reader.samples) == 1) # TODO: asserting that sample 0 is named unknown can fail. Why? RealtimeLogger.info('Correcting Freebayes samples {} to [{}]'.format(vcf_reader.samples, sample_name)) vcf_reader.samples = [sample_name] # Rebuild the secret sample index vcf_reader._sample_indexes = {sample_name: 0} vcf_writer = vcf.Writer(open(vcf_fix_path, 'w'), vcf_reader) # We insist that Freebayes produce at least one record, or we will abort and dump files. have_records = False for record in vcf_reader: have_records = True if offset: record.POS += int(offset) vcf_writer.write_record(record) vcf_writer.flush() vcf_writer.close() if not have_records: # This is a problem. Stop here for debugging. RealtimeLogger.error('{} produced no calls. Dumping files...'.format(bam_caller)) for dump_path in [fasta_path, bam_path, bam_idx_path]: if dump_path and os.path.isfile(dump_path): context.write_output_file(job, dump_path) raise RuntimeError('{} produced no calls'.format(bam_caller)) context.runner.call(job, ['bgzip', os.path.basename(vcf_fix_path)], work_dir = work_dir) context.runner.call(job, ['tabix', '-p', 'vcf', os.path.basename(vcf_fix_path) + '.gz'], work_dir = work_dir) return (context.write_output_file(job, vcf_fix_path + '.gz'), context.write_output_file(job, vcf_fix_path + '.gz.tbi'), timer)