Python RealtimeLogger.warning Examples

Programming Language: Python

Namespace/Package Name: toil.realtimeLogger

Class/Type: RealtimeLogger

Method/Function: warning

Examples at hotexamples.com: 3

Python RealtimeLogger.warning - 3 examples found. These are the top rated real world Python examples of toil.realtimeLogger.RealtimeLogger.warning extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

info(30)

debug(12)

error(7)

warning(3)

RealtimeLogger(1)

critical(1)

Example #1

Show file

    def download(filename):
        """
        Download each file
        """

        try:

            if (not options.overwrite) and out_store.exists(filename):
                # File exists. But make sure its size is correct.

                if not options.check_size:
                    # Skip existing file. No need to check the length.
                    RealtimeLogger.info("Skipped {}".format(filename))
                    return

                out_size = out_store.get_size(filename)
                in_size = in_store.get_size(filename)
                if out_size != in_size:
                    # Complain about size mismatch and copy
                    RealtimeLogger.warning(
                        "Redownloading {}! Size was {} and not {}!".format(
                            filename, out_size, in_size))
                else:
                    # Skip existing file
                    RealtimeLogger.info("Skipped {}".format(filename))
                    return

            # Make a temp file
            (handle,
             path) = tempfile.mkstemp(dir=job.fileStore.getLocalTempDir())
            os.close(handle)

            RealtimeLogger.debug("Download {}".format(filename))

            # Download
            in_store.read_input_file(filename, path)
            # Store
            out_store.write_output_file(path, filename)

            # Clean up
            os.unlink(path)

        except:
            # Put all exception text into an exception and raise that
            raise Exception("".join(
                traceback.format_exception(*sys.exc_info())))

        RealtimeLogger.info("Copied {}".format(filename))

Example #2

Show file

    def call_with_docker(self, job, args, work_dir, outfile, errfile,
                         check_output, tool_name):
        """
        
        Thin wrapper for docker_call that will use internal lookup to
        figure out the location of the docker file.  Only exposes docker_call
        parameters used so far.  expect args as list of lists.  if (toplevel)
        list has size > 1, then piping interface used
        
        Does support redirecting output to outfile, unless check_output is
        used, in which case output is captured.
        
        """

        RealtimeLogger.info(
            truncate_msg("Docker Run: {}".format(" | ".join(" ".join(x)
                                                            for x in args))))
        start_time = timeit.default_timer()

        # we use the first argument to look up the tool in the docker map
        # but allow overriding of this with the tool_name parameter
        name = tool_name if tool_name is not None else args[0][0]
        tool = self.docker_tool_map[name]

        # We keep an environment dict
        environment = {}

        # And an entry point override
        entrypoint = None

        # And a volumes dict for mounting
        volumes = {}

        # And a working directory override
        working_dir = None

        # breaks Rscript.  Todo: investigate how general this actually is
        if name != 'Rscript':
            # vg uses TMPDIR for temporary files
            # this is particularly important for gcsa, which makes massive files.
            # we will default to keeping these in our working directory
            environment['TMPDIR'] = '.'

        if name == 'Rscript':
            # The R dockers by default want to install packages in non-writable directories. Sometimes.
            # Make sure a writable directory which exists is used.
            environment['R_LIBS'] = '/tmp'

        if name == 'vg':
            environment['VG_FULL_TRACEBACK'] = '1'

        # ugly hack for platypus, as default container doesn't have executable in path
        if tool == 'quay.io/biocontainers/platypus-variant:0.8.1.1--htslib1.7_1' and \
           args[0][0] == 'Platypus.py':
            args[0][
                0] = '/usr/local/share/platypus-variant-0.8.1.1-1/Platypus.py'

        # Force all dockers to run sort in a consistent way
        environment['LC_ALL'] = 'C'

        # set our working directory map
        if work_dir is not None:
            volumes[os.path.abspath(work_dir)] = {
                'bind': '/data',
                'mode': 'rw'
            }
            working_dir = '/data'

        if outfile is not None:
            # We need to send output to a file object

            assert (not check_output)

            # We can't just redirect stdout of the container from the API, so
            # we do something more complicated.

            # Now we need to populate an FD that spits out the container output.
            output_fd = None

            # We may be able to use a FIFO, or we may need a network connection.
            # FIFO sharing between host and container only works on Linux.
            use_fifo = (platform.system() == 'Linux')

            if use_fifo:
                # On a Linux host we can just use a FIFO from the container to the host

                # Set up a FIFO to receive it
                fifo_dir = tempfile.mkdtemp()
                fifo_host_path = os.path.join(fifo_dir, 'stdout.fifo')
                os.mkfifo(fifo_host_path)

                # Mount the FIFO in the container.
                # The container doesn't actually have to have the mountpoint directory in its filesystem.
                volumes[fifo_dir] = {'bind': '/control', 'mode': 'rw'}

                # Redirect the command output by tacking on another pipeline stage
                parameters = args + [['dd', 'of=/control/stdout.fifo']]

                # Open the FIFO into nonblocking mode. See
                # <https://stackoverflow.com/a/5749687> and
                # <http://shallowsky.com/blog/programming/python-read-characters.html>
                output_fd = os.open(fifo_host_path,
                                    os.O_RDONLY | os.O_NONBLOCK)

            else:
                # On a Mac host we can't because of https://github.com/docker/for-mac/issues/483
                # We need to go over the network instead.

                # Open an IPv4 TCP socket, since we know Docker uses IPv4 only
                listen_sock = socket.socket(socket.AF_INET)
                # Bind it to an OS-selected port on all interfaces, since we can't determine the Docker interface
                # TODO: socket.INADDR_ANY ought to work here but is rejected for being an int.
                listen_sock.bind(('', 0))

                # Start listening
                listen_sock.listen(1)

                # Get the port we got given
                listen_port = listen_sock.getsockname()[1]

                # Generate a random security cookie. Since we can't really stop
                # Internet randos from connecting to our socket, we bail out on
                # any connection that doesn't start with this cookie and a newline.
                security_cookie = str(uuid.uuid4())

                # Redirect the command output to that port using Bash networking
                # Your Docker needs to be 18.03+ to support host.docker.internal
                # Your container needs to have bash with networking support
                parameters = args + [[
                    'bash', '-c',
                    'exec 3<>/dev/tcp/host.docker.internal/{}; cat <(echo {}) - >&3'
                    .format(listen_port, security_cookie)
                ]]

                RealtimeLogger.debug(
                    "Listening on port {} for output from Docker container".
                    format(listen_port))

                # We can't populate the FD until we accept, which we can't do
                # until the Docker comes up and is trying to connect.

            RealtimeLogger.debug("Final Docker command: {}".format(" | ".join(
                " ".join(x) for x in parameters)))

            # Start the container detached so we don't wait on it
            container = apiDockerCall(job,
                                      tool,
                                      parameters,
                                      volumes=volumes,
                                      working_dir=working_dir,
                                      entrypoint=entrypoint,
                                      environment=environment,
                                      detach=True)

            RealtimeLogger.debug("Asked for container {}".format(container.id))

            if not use_fifo:
                # Try and accept a connection from the container.
                # Make sure there's a timeout so we don't accept forever
                listen_sock.settimeout(10)

                for attempt in range(3):

                    connection_sock, remote_address = listen_sock.accept()

                    RealtimeLogger.info(
                        "Got connection from {}".format(remote_address))

                    # Set a 10 second timeout for the cookie
                    connection_sock.settimeout(10)

                    # Check the security cookie
                    received_cookie_and_newline = connection_sock.recv(
                        len(security_cookie) + 1)

                    if received_cookie_and_newline != security_cookie + "\n":
                        # Incorrect security cookie.
                        RealtimeLogger.warning(
                            "Received incorect security cookie message from {}"
                            .format(remote_address))
                        continue
                    else:
                        # This is the container we are looking for
                        # Go into nonblocking mode which our read code expects
                        connection_sock.setblocking(True)
                        # Set the FD
                        output_fd = connection_sock.fileno()
                        break

                if output_fd is None:
                    # We can't get ahold of the Docker in time
                    raise RuntimeError(
                        "Could not establish network connection for Docker output!"
                    )

            # If the Docker container goes badly enough, it may not even open
            # the other end of the connection. So we can't just wait for it to
            # EOF before checking on the Docker.

            # Now read ought to throw if there is no data. But
            # <https://stackoverflow.com/q/38843278> and some testing suggest
            # that this doesn't happen, and it just looks like EOF. So we will
            # watch out for that.

            try:
                # Prevent leaking FDs

                # If this is set, and there is no data in the pipe, decide that no data is coming
                last_chance = False
                # If this is set, we have seen data in the pipe, so the other
                # end must have opened it and will eventually close it if it
                # doesn't run forever.
                saw_data = False

                while True:
                    # While there still might be data in the pipe

                    if output_fd is not None:
                        # Select on the pipe with a timeout, so we don't spin constantly waiting for data
                        can_read, can_write, had_error = select.select(
                            [output_fd], [], [output_fd], 10)

                    if len(can_read) > 0 or len(had_error) > 0:
                        # There is data available or something else weird about our FIFO.

                        try:
                            # Do a nonblocking read. Since we checked with select we never should get "" unless there's an EOF.
                            data = os.read(output_fd, 4096)

                            if data == "":
                                # We didn't throw and we got nothing, so it must be EOF.
                                RealtimeLogger.debug("Got EOF")
                                break

                        except OSError as err:
                            if err.errno in [errno.EAGAIN, errno.EWOULDBLOCK]:
                                # There is no data right now
                                data = None
                            else:
                                # Something else has gone wrong
                                raise err

                    else:
                        # There is no data available. Don't even try to read. Treat it as if a read refused to block.
                        data = None

                    if data is not None:
                        # Send our data to the outfile
                        outfile.write(data)
                        saw_data = True
                    elif not saw_data:
                        # We timed out and there has never been any data. Maybe the container has died/never started?

                        if last_chance:
                            # The container has been dead for a while and nothing has arrived yet. Assume no data is coming.
                            RealtimeLogger.warning(
                                "Giving up on output form container {}".format(
                                    container.id))
                            break

                        # Otherwise, check on it
                        container.reload()

                        if container.status not in [
                                'created', 'restarting', 'running', 'removing'
                        ]:
                            # The container has stopped. So what are we doing waiting around for it?

                            # Wait one last time for any lingering data to percolate through the FIFO
                            time.sleep(10)
                            last_chance = True
                            continue

            finally:
                # No matter what happens, close our end of the connection
                os.close(output_fd)

                if not use_fifo:
                    # Also close the listening socket
                    listen_sock.close()

            # Now our data is all sent.
            # Wait on the container and get its return code.
            return_code = container.wait()

            if use_fifo:
                # Clean up the FIFO files
                os.unlink(fifo_host_path)
                os.rmdir(fifo_dir)

        else:
            # No piping needed.

            if len(args) == 1:
                # split off first argument as entrypoint (so we can be oblivious as to whether
                # that happens by default)
                parameters = [] if len(args[0]) == 1 else args[0][1:]
                entrypoint = args[0][0]
            else:
                # can leave as is for piped interface which takes list of args lists
                # and doesn't worry about entrypoints since everything goes through bash -c
                # todo: check we have a bash entrypoint!
                parameters = args

            # Run the container and dump the logs if it fails.
            container = apiDockerCall(job,
                                      tool,
                                      parameters,
                                      volumes=volumes,
                                      working_dir=working_dir,
                                      entrypoint=entrypoint,
                                      environment=environment,
                                      detach=True)

            # Wait on the container and get its return code.
            return_code = container.wait()

        # When we get here, the container has been run, and stdout is either in the file object we sent it to or in the Docker logs.
        # stderr is always in the Docker logs.

        if return_code != 0:
            # What were we doing?
            command = " | ".join(" ".join(x) for x in args)

            # Dump logs
            RealtimeLogger.error(
                "Docker container for command {} failed with code {}".format(
                    command, return_code))
            RealtimeLogger.error("Dumping stderr...")
            for line in container.logs(stderr=True, stdout=False, stream=True):
                # Trim trailing \n
                RealtimeLogger.error(line[:-1])

            if not check_output and outfile is None:
                # Dump stdout as well, since it's not something the caller wanted as data
                RealtimeLogger.error("Dumping stdout...")
                for line in container.logs(stderr=False,
                                           stdout=True,
                                           stream=True):
                    # Trim trailing \n
                    RealtimeLogger.error(line[:-1])

            # Raise an error if it's not sucess
            raise RuntimeError(
                "Docker container for command {} failed with code {}".format(
                    command, return_code))
        elif errfile:
            # user wants stderr even if no crash
            for line in container.logs(stderr=True, stdout=False, stream=True):
                errfile.write(line)

        if check_output:
            # We need to collect the output. We grab it from Docker's handy on-disk buffer.
            # TODO: Bad Things can happen if the container logs too much.
            captured_stdout = container.logs(stderr=False, stdout=True)

        end_time = timeit.default_timer()
        run_time = end_time - start_time
        RealtimeLogger.info("Successfully docker ran {} in {} seconds.".format(
            " | ".join(" ".join(x) for x in args), run_time))

        if outfile:
            outfile.flush()
            os.fsync(outfile.fileno())

        if check_output is True:
            return captured_stdout

Example #3

Show file

def run_chunk_alignment(job, context, gam_input_reads, bam_input_reads, sample_name, interleaved, mapper,
                        chunk_filename_ids, chunk_id, indexes,
                        bam_output=False, gbwt_penalty=None, always_check_population=True, validate=False, fasta_dict_id=None):
                        
    """
    Align a chunk of reads.
    
    Takes a dict from index type to index file ID. Some indexes are extra and
    specifying them will change mapping behavior.
    """
                        

    RealtimeLogger.info("Starting {} alignment on {} chunk {}".format(mapper, sample_name, chunk_id))

    # How long did the alignment take to run, in seconds?
    run_time = None
    
    # Define work directory for docker calls
    work_dir = job.fileStore.getLocalTempDir()

    # Download local input files from the remote storage container
    graph_file = os.path.join(work_dir, "graph.vg")

    # Work out what index files we need
    index_files = {}
    index_files['xg'] = graph_file + ".xg"
    if mapper == 'map' or mapper == 'mpmap':
        index_files['gcsa'] = graph_file + ".gcsa"
        index_files['lcp'] = index_files['gcsa'] + ".lcp"
        
        if 'gbwt' in indexes:
            # We have a GBWT haplotype index available.
            index_files['gbwt'] = graph_file + ".gbwt"
            
    if mapper == 'mpmap':
        if 'snarls' in indexes:
            # mpmap knows how to use the snarls, and we have them, so we should use them
            
            # Note that passing them will affect mapping, if using multiple
            # tracebacks. Since we only run single path mode, if multiple
            # tracebacks aren't used, mpmap will ignore the snarls.
            index_files['snarls'] = graph_file + ".snarls"
        
    if mapper == 'giraffe':
        index_files['minimizer'] = graph_file + ".min"
        index_files['distance'] = graph_file + ".dist"
        index_files['gbwt'] = graph_file + ".gbwt"
        if 'ggbwt' in indexes:
            index_files['ggbwt'] = graph_file + ".gg"
        
    for index_type in list(index_files.keys()):
        # Download each index file
        job.fileStore.readGlobalFile(indexes[index_type], index_files[index_type])
    
    # We need the sample reads (fastq(s) or gam) for alignment
    reads_files = []
    reads_ext = 'gam' if gam_input_reads else 'bam' if bam_input_reads else 'fq.gz'
    for j, chunk_filename_id in enumerate(chunk_filename_ids):
        reads_file = os.path.join(work_dir, 'reads_chunk_{}_{}.{}'.format(chunk_id, j, reads_ext))
        job.fileStore.readGlobalFile(chunk_filename_id, reads_file)
        reads_files.append(reads_file)
    
    # And a temp file for our aligner output
    if bam_output is False:
        output_file = os.path.join(work_dir, "{}_{}.gam".format(sample_name, chunk_id))
    else:
        output_file = os.path.join(work_dir, "{}_{}.bam".format(sample_name, chunk_id))
    
    # Open the file stream for writing
    with open(output_file, 'wb') as alignment_file:

        # Start the aligner and have it write to the file

        # Plan out what to run
        vg_parts = []
        
        if mapper == 'mpmap':
            vg_parts += ['vg', 'mpmap']
            vg_parts += context.config.mpmap_opts
            if ('-F' not in vg_parts and '--output-fmt' not in vg_parts) or 'GAM' not in vg_parts:
                RealtimeLogger.warning('Adding --output-fmt GAM to mpmap options as only GAM output supported')
                vg_parts += ['--output-fmt', 'GAM']
        elif mapper == 'map':
            vg_parts += ['vg', 'map'] 
            vg_parts += context.config.map_opts
        elif mapper == 'giraffe':
            vg_parts += ['vg', 'giraffe'] 
            vg_parts += context.config.giraffe_opts
        else:
            raise RuntimeError('Unimplemented mapper "{}"'.format(mapper))
            
        for reads_file in reads_files:
            input_flag = '-G' if gam_input_reads else '-b' if bam_input_reads else '-f'
            vg_parts += [input_flag, os.path.basename(reads_file)]
        
        vg_parts += ['-t', str(context.config.alignment_cores)]
        vg_parts += ['-R', 'SM:{}'.format(sample_name)]
        
        # Override the -i flag in args with the --interleaved command-line flag
        if interleaved is True and '-i' not in vg_parts and '--interleaved' not in vg_parts:
            vg_parts += ['-i']
        elif interleaved is False and 'i' in vg_parts:
            del vg_parts[vg_parts.index('-i')]
        if interleaved is False and '--interleaved' in vg_parts:
            del vg_parts[vg_parts.index('--interleaved')]

        # Override the --surject-to option
        if bam_output is True and '--surject-to' not in vg_parts and mapper != 'giraffe':
            vg_parts += ['--surject-to', 'bam']
        elif bam_output is True and '--output-format' not in vg_parts and mapper == 'giraffe':
            vg_parts += ['--output-format', 'BAM']
        elif bam_output is False and '--surject-to' in vg_parts:
            sidx = vg_parts.index('--surject-to')
            del vg_parts[sidx]
            del vg_parts[sidx]

        # Turn indexes into options
        type_to_option = {
            'gbwt': '--gbwt-name',
            'xg': '-x',
            'gcsa': '-g',
            'lcp': None,
            'distance': '-d',
            'minimizer': '-m',
            'ggbwt': '--graph-name',
            'snarls': '--snarls'
        }
        for index_type, index_file in list(index_files.items()):
            if type_to_option[index_type] is not None:
                vg_parts += [type_to_option[index_type], os.path.basename(index_file)]

        if 'gbwt' in index_files:
            # We may have a GBWT recombination rate/penalty override
            if gbwt_penalty is not None:
                # We have a recombination penalty value to apply
                if '--recombination-penalty' in vg_parts:
                    # Make sure to strip out the penalty if it is in args already
                    sidx = vg_parts.index('--recombination-penalty')
                    del vg_parts[sidx]
                    del vg_parts[sidx]
                    
                # Both map and mpmap take this option
                vg_parts += ['--recombination-penalty', str(gbwt_penalty)]
                
            if mapper == 'mpmap' and always_check_population:
                # Always try to population-score even unambiguous reads
                # mpmap can do this
                vg_parts += ['--always-check-population']
        
        if fasta_dict_id is not None and bam_output is True:
            fasta_dict_file = os.path.join(work_dir, 'fasta.dict')
            job.fileStore.readGlobalFile(fasta_dict_id, fasta_dict_file)
            vg_parts += ['--ref-paths', os.path.basename(fasta_dict_file)]
            
        
        RealtimeLogger.info(
            "Running VG for {} against {}: {}".format(sample_name, graph_file,
            " ".join(vg_parts)))
        
        # Mark when we start the alignment
        start_time = timeit.default_timer()
        command = vg_parts
        try:
            context.runner.call(job, command, work_dir = work_dir, outfile=alignment_file)
            end_time = timeit.default_timer()
            if validate:
                alignment_file.flush()
                context.runner.call(job, ['vg', 'validate', os.path.basename(index_files['xg']),
                                          '--gam', os.path.basename(output_file)], work_dir = work_dir)
        except:
            # Dump everything we need to replicate the alignment
            end_time = timeit.default_timer()
            logging.error("Mapping failed. Dumping files.")
            for index_file in list(index_files.values()):
                context.write_output_file(job, index_file)
            for reads_file in reads_files:
                context.write_output_file(job, reads_file)
            raise
        
        # Mark when it's done
        run_time = end_time - start_time

    paired_end = '-i' in vg_parts or '--interleaved' in vg_parts or len(chunk_filename_ids) > 1
    RealtimeLogger.info("Aligned {}. Process took {} seconds with {} vg-{}".format(
        output_file, run_time, 'paired-end' if paired_end else 'single-end', mapper))

    if 'id_ranges' in indexes and bam_output is False:
        # Break GAM into multiple chunks at the end. So we need the file
        # defining those chunks.
        id_ranges_file = os.path.join(work_dir, 'id_ranges.tsv')
        job.fileStore.readGlobalFile(indexes['id_ranges'], id_ranges_file)
        
        # Chunk the gam up by chromosome
        gam_chunks = split_gam_into_chroms(job, work_dir, context, index_files['xg'], id_ranges_file, output_file)
        
        # Write gam_chunks to store
        gam_chunk_ids = []
        for gam_chunk in gam_chunks:
            gam_chunk_ids.append(context.write_intermediate_file(job, gam_chunk))

        return gam_chunk_ids, run_time
    else:
        # We can just report one chunk of everything
        return [context.write_intermediate_file(job, output_file)], run_time