def run_outlier_model(job, name, sample_id, background_id, gene_id, args):
    # Process names with flexible extensions
    sample_ext = os.path.splitext(args.sample)[1]
    sample_name = 'sample_matrix{}'.format(sample_ext)
    bg_ext = os.path.splitext(args.background)[1]
    bg_name = 'bg_matrix{}'.format(bg_ext)

    # Read in input file from jobStore
    job.fileStore.readGlobalFile(sample_id,
                                 os.path.join(job.tempDir, sample_name))
    job.fileStore.readGlobalFile(background_id,
                                 os.path.join(job.tempDir, bg_name))
    if gene_id:
        job.fileStore.readGlobalFile(
            gene_id, os.path.join(job.tempDir, 'gene-list.txt'))

    # Define parameters and call Docker container
    parameters = [
        '--sample', '/data/{}'.format(sample_name), '--background',
        '/data/{}'.format(bg_name), '--name', name, '--out-dir', '/data',
        '--group', args.group, '--col-skip',
        str(args.col_skip), '--num-backgrounds',
        str(args.num_backgrounds), '--max-genes',
        str(args.max_genes), '--num-training-genes',
        str(args.num_training_genes)
    ]
    if gene_id:
        parameters.extend(['--gene-list', '/data/gene-list.txt'])
    image = 'jvivian/bayesian-outlier-model:1.0a4'
    apiDockerCall(job=job,
                  image=image,
                  working_dir=job.tempDir,
                  parameters=parameters,
                  user='******')
    _fixPermissions(tool=image, workDir=job.tempDir)

    out_dir = os.path.join(job.tempDir, name)
    shutil.move(out_dir, args.out_dir)
Example #2
0
def _testDockerCleanFn(job,
                       working_dir,
                       detached=None,
                       rm=None,
                       deferParam=None,
                       containerName=None):
    """
    Test function for test docker_clean.  Runs a container with given flags and
    then dies leaving behind a zombie container.
    :param toil.job.Job job: job
    :param working_dir: See `work_dir=` in :func:`dockerCall`
    :param bool rm: See `rm=` in :func:`dockerCall`
    :param bool detached: See `detached=` in :func:`dockerCall`
    :param int deferParam: See `deferParam=` in :func:`dockerCall`
    :param str containerName: See `container_name=` in :func:`dockerCall`
    """
    def killSelf():
        test_file = os.path.join(working_dir, 'test.txt')
        # Kill the worker once we are sure the docker container is started
        while not os.path.exists(test_file):
            logger.debug('Waiting on the file created by spooky_container.')
            time.sleep(1)
        # By the time we reach here, we are sure the container is running.
        time.sleep(1)
        os.kill(os.getpid(), signal.SIGKILL)

    t = Thread(target=killSelf)
    # Make it a daemon thread so that thread failure doesn't hang tests.
    t.daemon = True
    t.start()
    apiDockerCall(job,
                  image='quay.io/ucsc_cgl/spooky_test',
                  working_dir=working_dir,
                  deferParam=deferParam,
                  containerName=containerName,
                  detach=detached,
                  remove=rm,
                  privileged=True)
Example #3
0
    def run(self, fileStore):
        fileStore.logToMaster("SalmonIndex")
        tempDir = fileStore.getLocalTempDir()

        try:
            os.makedirs(os.path.join(tempDir, 'execution'))
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        fpath = fileStore.readGlobalFile(self.ref_txome,
                                         userPath=os.path.join(
                                             tempDir,
                                             os.path.basename(self.ref_txome)))

        index_fpath = os.makedirs(os.path.join(tempDir, 'execution', 'index'))

        cmd = f'salmon index -t {fpath} --index "{index_fpath}";  tar -cvzf index.tar.gz {index_fpath}'  #; tar -cvzf index.tar.gz "{index_fpath}"'#-i index; tar -cvzf index.tar.gz index'

        generate_docker_bashscript_file(temp_dir=tempDir,
                                        docker_dir=tempDir,
                                        globs=[],
                                        cmd=cmd,
                                        job_name='SalmonIndex')

        # apiDockerCall() with demux=True returns a tuple of bytes objects (stdout, stderr).
        stdout, stderr = \
            apiDockerCall(self,
                          image='combinelab/salmon',
                          working_dir=tempDir,
                          parameters=[os.path.join(tempDir, "SalmonIndex_script.sh")],
                          entrypoint="/bin/bash",
                          stderr=True,
                          demux=True,
                          volumes={tempDir: {"bind": tempDir}})

        with open(os.path.join(current_working_dir, 'SalmonIndex.log'),
                  'wb') as f:
            if stdout:
                f.write(stdout)
            if stderr:
                f.write(stderr)

        output_file_id = fileStore.writeGlobalFile(
            os.path.join(tempDir, 'execution', 'index.tar.gz'))
        index_output_path = os.path.join(os.path.abspath(current_working_dir),
                                         'index.tar.gz')
        fileStore.exportFile(output_file_id, f'file://{index_output_path}')

        return {"index": output_file_id}
Example #4
0
def run_pdb2pqr(pdb_file, whitespace=True, ff="amber", parameters=None, work_dir=None, docker=True, job=None):
    if work_dir is None:
        work_dir = os.getcwd()

    full_pdb_path = pdb_file
    pdb_path = os.path.basename(full_pdb_path)
    pqr_file = "{}.pqr".format(pdb_path)

    _parameters = list(parameters) if isinstance(parameters, (list, tuple)) else []
    _parameters.append("--ff={}".format(ff))
    if whitespace:
        _parameters.append("--whitespace")

    if docker and apiDockerCall is not None and job is not None:
        #Docker can only read from work_dir
        if not os.path.abspath(os.path.dirname(pdb_file)) == os.path.abspath(work_dir):
            shutil.copy(pdb_file, work_dir)

        _parameters += ["/data/{}".format(pdb_path), "/data/{}.pqr".format(pdb_path)]
        try:
            output = apiDockerCall(job,
                          image='edraizen/pdb2pqr:latest',
                          working_dir=work_dir,
                          parameters=_parameters)
            pqr_file = os.path.join(work_dir, pqr_file)
        except (SystemExit, KeyboardInterrupt):
            raise
        except:
            raise
            #return run_pdb2pqr(pdb_file, whitespace=whitespace, ff=ff,
            #    parameters=parameters, work_dir=work_dir, docker=False)

    else:
        pqr_file = os.path.join(work_dir, pqr_file)
        command = ["/usr/share/pdb2pqr/pdb2pqr.py"]+parameters
        command += [full_pdb_path, pqr_file]

        try:
            with silence_stdout(), silence_stderr():
                subprocess.call(command)
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            raise
            #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e))

    assert os.path.isfile(pqr_file)
    return pqr_file
Example #5
0
    def run(self, fileStore):
        fileStore.logToMaster("FastQCtwo")
        tempDir = fileStore.getLocalTempDir()

        try:
            os.makedirs(os.path.join(tempDir, 'execution'))
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        fpath = fileStore.readGlobalFile(self.reads,
                                         userPath=os.path.join(
                                             tempDir,
                                             os.path.basename(self.reads)))
        cmd = f'zcat "{fpath}" | fastqc stdin:readstwo'

        generate_docker_bashscript_file(temp_dir=tempDir,
                                        docker_dir=tempDir,
                                        globs=[],
                                        cmd=cmd,
                                        job_name='FastQCtwo')

        # apiDockerCall() with demux=True returns a tuple of bytes objects (stdout, stderr).
        stdout, stderr = \
            apiDockerCall(self,
                          image='pegi3s/fastqc',
                          working_dir=tempDir,
                          parameters=[os.path.join(tempDir, "FastQCtwo_script.sh")],
                          entrypoint="/bin/bash",
                          stderr=True,
                          demux=True,
                          volumes={tempDir: {"bind": tempDir}})

        with open(os.path.join(current_working_dir, 'FastQCtwo.log'),
                  'wb') as f:
            if stdout:
                f.write(stdout)
            if stderr:
                f.write(stderr)

        output_file_id = fileStore.writeGlobalFile(
            os.path.join(tempDir, 'execution', 'readstwo_fastqc.html'))
        fastqc_output_path = os.path.join(os.path.abspath(current_working_dir),
                                          'readstwo_fastqc.html')
        fileStore.exportFile(output_file_id, f'file://{fastqc_output_path}')

        return {"fastqc": output_file_id}
Example #6
0
def analyze_haddock(analysis_dir, docker=True, job=None):
    if docker and apiDockerCall is not None and job is not None:
        oldcwd = os.getcwd()
        os.chdir(analysis_dir)
        try:
            out = apiDockerCall(
                job,
                'edraizen/haddock:latest',
                entrypoint="csh",
                working_dir="/data",
                volumes={analysis_dir: {
                    "bind": "/data",
                    "mode": "rw"
                }},
                parameters=["/opt/haddock2.2/tools/ana_structures.csh"],
            )
            job.log(out)

        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            if "RMSD: Undefined variable" not in str(e):
                raise
        os.chdir(oldcwd)
    else:
        try:
            oldcwd = os.getcwd()
            os.chdir(analysis_dir)
            out = subprocess.check_output(
                ["csh", "/opt/haddock2.2/tools/ana_structures.csh"])
            os.chdir(oldcwd)
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            if "RMSD: Undefined variable" not in str(e):
                raise
            #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e))

    job.log("ANALYSIS_DIR: {}".format(os.listdir(analysis_dir)))
    results = pd.read_table(os.path.join(analysis_dir,
                                         "structures_haddock-sorted.stat"),
                            nrows=1,
                            delim_whitespace=True)
    results.columns = ["haddock_" + c for c in results.columns]
    results = results.iloc[0]

    return results
Example #7
0
def run_cx(pdb_path, work_dir=None, job=None):
    if work_dir is None:
        work_dir = os.getcwd()

    if apiDockerCall is not None and job is not None:
        if not os.path.abspath(
                os.path.dirname(pdb_path)) == os.path.abspath(work_dir):
            shutil.copy(pdb_path, work_dir)
        parameters = [os.path.basename(pdb_path)]
        print "BEFORE", os.listdir(work_dir)
        with open(pdb_path) as f:
            print "IT EXISTS", next(f)
        cx_f = apiDockerCall(
            job,
            image='edraizen/cx:latest',
            working_dir="/data",
            volumes={work_dir: {
                "bind": "/data",
                "mode": "rw"
            }},
            parameters=parameters)
        print "DONE RUNINGF CX", cx_f
        # cx_f = open(os.path.join(work_dir, os.path.basename(pdb_path)+".cx"))
        # delCx = os.path.join(work_dir, os.path.basename(pdb_path)+".cx")
    else:
        with open(pdb_path) as f:
            cx_f = subprocess.check_output("cx", stdin=f)
    cx_f = iter(cx_f.splitlines())

    #Read in b-factor from PDB file. CX sometimes introduces invalid characters
    #so the Bio.PDB parser cannot be used
    result = {}
    for l in cx_f:
        if l[:6].strip() in ["ATOM", "HETATM"]:
            try:
                result[int(l[6:11].strip())] = float(l[60:66].strip())
            except ValueError as e:
                pass

    # if delCx is not None:
    #     cx_f.close()
    #     os.remove(delCx)

    return result
def docker_call(job, config, work_dir, params, image, tag, outfile=None):
    tagged_image = "{}:{}".format(image, tag)
    if DOCKER_LOGGING:
        log(job,
            "Running '{}' with parameters: {}".format(tagged_image, params),
            config.uuid, 'docker')
    if outfile is None:
        output = apiDockerCall(job,
                               tagged_image,
                               working_dir=work_dir,
                               parameters=params,
                               user="******")
    else:
        output = dockerCall(job,
                            tool=tagged_image,
                            workDir=work_dir,
                            parameters=params,
                            outfile=outfile)
    return output
Example #9
0
def CNS(input_file,
        prefix,
        work_dir=None,
        docker=True,
        job=None,
        template=True,
        **template_kwds):
    work_dir = work_dir or os.getcwd()
    if docker and apiDockerCall is not None and job is not None:
        if not template and not os.path.abspath(
                os.path.dirname(input_file)) == os.path.abspath(work_dir):
            shutil.copy(input_file, work_dir)
            inp = input_file
        else:
            updated_templates = {}
            for k, v in template_kwds.iteritems():
                if os.path.isfile(v) and not os.path.abspath(
                        os.path.dirname(v)) == os.path.abspath(work_dir):
                    shutil.copy(v, work_dir)
                updated_templates[k] = os.path.join("/data",
                                                    os.path.basename(v))
            inp = generate_input(input_file, prefix, work_dir,
                                 **updated_templates)

        try:
            parameters = [os.path.join("/data", os.path.basename(inp))]
            output = apiDockerCall(job,
                                   image='edraizen/cns:latest',
                                   working_dir=work_dir,
                                   parameters=parameters)

# except (SystemExit, KeyboardInterrupt):
#        raise
        except:
            raise
            return CNS(input_file, work_dir=work_dir, docker=False, job=job)
    else:
        load_cns_environment()
        inp = generate_input(input_file, prefix, work_dir, **template_kwds)
        with open(input_file) as inp:
            output = subprocess.check_output(["cns"], stdin=inp)
    return output
Example #10
0
def score_complex(pdb_file,
                  chain,
                  iteration=None,
                  work_dir=None,
                  docker=True,
                  job=None):
    if work_dir is None:
        work_dir = os.getcwd()

    if docker and apiDockerCall is not None and job is not None:
        if not os.path.abspath(
                os.path.dirname(pdb_file)) == os.path.abspath(work_dir):
            shutil.copy(pdb_file, work_dir)
        try:
            parameters = ["score",
                          os.path.basename(pdb_file), "--chain"] + list(chain)
            if isinstance(iteration, int) and iteration in range(3):
                parameters += ["--iteration", str(iteration)]
            score = apiDockerCall(
                job,
                "edraizen/haddock:latest",
                working_dir="/data",
                volumes={work_dir: {
                    "bind": "/data",
                    "mode": "rw"
                }},
                parameters=parameters)
        except (SystemExit, KeyboardInterrupt):
            raise
        except:
            raise

    results = pd.read_table(os.path.join(work_dir,
                                         pdb_file + ".haddock-sorted.stat"),
                            nrows=1,
                            delim_whitespace=True)
    results.columns = ["haddock_" + c for c in results.columns]
    results = results.iloc[0]

    return results
Example #11
0
    def call_with_docker(self, job, args, work_dir, outfile, errfile,
                         check_output, tool_name):
        """
        
        Thin wrapper for docker_call that will use internal lookup to
        figure out the location of the docker file.  Only exposes docker_call
        parameters used so far.  expect args as list of lists.  if (toplevel)
        list has size > 1, then piping interface used
        
        Does support redirecting output to outfile, unless check_output is
        used, in which case output is captured.
        
        """

        RealtimeLogger.info(
            truncate_msg("Docker Run: {}".format(" | ".join(" ".join(x)
                                                            for x in args))))
        start_time = timeit.default_timer()

        # we use the first argument to look up the tool in the docker map
        # but allow overriding of this with the tool_name parameter
        name = tool_name if tool_name is not None else args[0][0]
        tool = self.docker_tool_map[name]

        # We keep an environment dict
        environment = {}

        # And an entry point override
        entrypoint = None

        # And a volumes dict for mounting
        volumes = {}

        # And a working directory override
        working_dir = None

        # breaks Rscript.  Todo: investigate how general this actually is
        if name != 'Rscript':
            # vg uses TMPDIR for temporary files
            # this is particularly important for gcsa, which makes massive files.
            # we will default to keeping these in our working directory
            environment['TMPDIR'] = '.'

        if name == 'Rscript':
            # The R dockers by default want to install packages in non-writable directories. Sometimes.
            # Make sure a writable directory which exists is used.
            environment['R_LIBS'] = '/tmp'

        if name == 'vg':
            environment['VG_FULL_TRACEBACK'] = '1'

        # ugly hack for platypus, as default container doesn't have executable in path
        if tool == 'quay.io/biocontainers/platypus-variant:0.8.1.1--htslib1.7_1' and \
           args[0][0] == 'Platypus.py':
            args[0][
                0] = '/usr/local/share/platypus-variant-0.8.1.1-1/Platypus.py'

        # Force all dockers to run sort in a consistent way
        environment['LC_ALL'] = 'C'

        # set our working directory map
        if work_dir is not None:
            volumes[os.path.abspath(work_dir)] = {
                'bind': '/data',
                'mode': 'rw'
            }
            working_dir = '/data'

        if outfile is not None:
            # We need to send output to a file object

            assert (not check_output)

            # We can't just redirect stdout of the container from the API, so
            # we do something more complicated.

            # Now we need to populate an FD that spits out the container output.
            output_fd = None

            # We may be able to use a FIFO, or we may need a network connection.
            # FIFO sharing between host and container only works on Linux.
            use_fifo = (platform.system() == 'Linux')

            if use_fifo:
                # On a Linux host we can just use a FIFO from the container to the host

                # Set up a FIFO to receive it
                fifo_dir = tempfile.mkdtemp()
                fifo_host_path = os.path.join(fifo_dir, 'stdout.fifo')
                os.mkfifo(fifo_host_path)

                # Mount the FIFO in the container.
                # The container doesn't actually have to have the mountpoint directory in its filesystem.
                volumes[fifo_dir] = {'bind': '/control', 'mode': 'rw'}

                # Redirect the command output by tacking on another pipeline stage
                parameters = args + [['dd', 'of=/control/stdout.fifo']]

                # Open the FIFO into nonblocking mode. See
                # <https://stackoverflow.com/a/5749687> and
                # <http://shallowsky.com/blog/programming/python-read-characters.html>
                output_fd = os.open(fifo_host_path,
                                    os.O_RDONLY | os.O_NONBLOCK)

            else:
                # On a Mac host we can't because of https://github.com/docker/for-mac/issues/483
                # We need to go over the network instead.

                # Open an IPv4 TCP socket, since we know Docker uses IPv4 only
                listen_sock = socket.socket(socket.AF_INET)
                # Bind it to an OS-selected port on all interfaces, since we can't determine the Docker interface
                # TODO: socket.INADDR_ANY ought to work here but is rejected for being an int.
                listen_sock.bind(('', 0))

                # Start listening
                listen_sock.listen(1)

                # Get the port we got given
                listen_port = listen_sock.getsockname()[1]

                # Generate a random security cookie. Since we can't really stop
                # Internet randos from connecting to our socket, we bail out on
                # any connection that doesn't start with this cookie and a newline.
                security_cookie = str(uuid.uuid4())

                # Redirect the command output to that port using Bash networking
                # Your Docker needs to be 18.03+ to support host.docker.internal
                # Your container needs to have bash with networking support
                parameters = args + [[
                    'bash', '-c',
                    'exec 3<>/dev/tcp/host.docker.internal/{}; cat <(echo {}) - >&3'
                    .format(listen_port, security_cookie)
                ]]

                RealtimeLogger.debug(
                    "Listening on port {} for output from Docker container".
                    format(listen_port))

                # We can't populate the FD until we accept, which we can't do
                # until the Docker comes up and is trying to connect.

            RealtimeLogger.debug("Final Docker command: {}".format(" | ".join(
                " ".join(x) for x in parameters)))

            # Start the container detached so we don't wait on it
            container = apiDockerCall(job,
                                      tool,
                                      parameters,
                                      volumes=volumes,
                                      working_dir=working_dir,
                                      entrypoint=entrypoint,
                                      environment=environment,
                                      detach=True)

            RealtimeLogger.debug("Asked for container {}".format(container.id))

            if not use_fifo:
                # Try and accept a connection from the container.
                # Make sure there's a timeout so we don't accept forever
                listen_sock.settimeout(10)

                for attempt in range(3):

                    connection_sock, remote_address = listen_sock.accept()

                    RealtimeLogger.info(
                        "Got connection from {}".format(remote_address))

                    # Set a 10 second timeout for the cookie
                    connection_sock.settimeout(10)

                    # Check the security cookie
                    received_cookie_and_newline = connection_sock.recv(
                        len(security_cookie) + 1)

                    if received_cookie_and_newline != security_cookie + "\n":
                        # Incorrect security cookie.
                        RealtimeLogger.warning(
                            "Received incorect security cookie message from {}"
                            .format(remote_address))
                        continue
                    else:
                        # This is the container we are looking for
                        # Go into nonblocking mode which our read code expects
                        connection_sock.setblocking(True)
                        # Set the FD
                        output_fd = connection_sock.fileno()
                        break

                if output_fd is None:
                    # We can't get ahold of the Docker in time
                    raise RuntimeError(
                        "Could not establish network connection for Docker output!"
                    )

            # If the Docker container goes badly enough, it may not even open
            # the other end of the connection. So we can't just wait for it to
            # EOF before checking on the Docker.

            # Now read ought to throw if there is no data. But
            # <https://stackoverflow.com/q/38843278> and some testing suggest
            # that this doesn't happen, and it just looks like EOF. So we will
            # watch out for that.

            try:
                # Prevent leaking FDs

                # If this is set, and there is no data in the pipe, decide that no data is coming
                last_chance = False
                # If this is set, we have seen data in the pipe, so the other
                # end must have opened it and will eventually close it if it
                # doesn't run forever.
                saw_data = False

                while True:
                    # While there still might be data in the pipe

                    if output_fd is not None:
                        # Select on the pipe with a timeout, so we don't spin constantly waiting for data
                        can_read, can_write, had_error = select.select(
                            [output_fd], [], [output_fd], 10)

                    if len(can_read) > 0 or len(had_error) > 0:
                        # There is data available or something else weird about our FIFO.

                        try:
                            # Do a nonblocking read. Since we checked with select we never should get "" unless there's an EOF.
                            data = os.read(output_fd, 4096)

                            if data == "":
                                # We didn't throw and we got nothing, so it must be EOF.
                                RealtimeLogger.debug("Got EOF")
                                break

                        except OSError as err:
                            if err.errno in [errno.EAGAIN, errno.EWOULDBLOCK]:
                                # There is no data right now
                                data = None
                            else:
                                # Something else has gone wrong
                                raise err

                    else:
                        # There is no data available. Don't even try to read. Treat it as if a read refused to block.
                        data = None

                    if data is not None:
                        # Send our data to the outfile
                        outfile.write(data)
                        saw_data = True
                    elif not saw_data:
                        # We timed out and there has never been any data. Maybe the container has died/never started?

                        if last_chance:
                            # The container has been dead for a while and nothing has arrived yet. Assume no data is coming.
                            RealtimeLogger.warning(
                                "Giving up on output form container {}".format(
                                    container.id))
                            break

                        # Otherwise, check on it
                        container.reload()

                        if container.status not in [
                                'created', 'restarting', 'running', 'removing'
                        ]:
                            # The container has stopped. So what are we doing waiting around for it?

                            # Wait one last time for any lingering data to percolate through the FIFO
                            time.sleep(10)
                            last_chance = True
                            continue

            finally:
                # No matter what happens, close our end of the connection
                os.close(output_fd)

                if not use_fifo:
                    # Also close the listening socket
                    listen_sock.close()

            # Now our data is all sent.
            # Wait on the container and get its return code.
            return_code = container.wait()

            if use_fifo:
                # Clean up the FIFO files
                os.unlink(fifo_host_path)
                os.rmdir(fifo_dir)

        else:
            # No piping needed.

            if len(args) == 1:
                # split off first argument as entrypoint (so we can be oblivious as to whether
                # that happens by default)
                parameters = [] if len(args[0]) == 1 else args[0][1:]
                entrypoint = args[0][0]
            else:
                # can leave as is for piped interface which takes list of args lists
                # and doesn't worry about entrypoints since everything goes through bash -c
                # todo: check we have a bash entrypoint!
                parameters = args

            # Run the container and dump the logs if it fails.
            container = apiDockerCall(job,
                                      tool,
                                      parameters,
                                      volumes=volumes,
                                      working_dir=working_dir,
                                      entrypoint=entrypoint,
                                      environment=environment,
                                      detach=True)

            # Wait on the container and get its return code.
            return_code = container.wait()

        # When we get here, the container has been run, and stdout is either in the file object we sent it to or in the Docker logs.
        # stderr is always in the Docker logs.

        if return_code != 0:
            # What were we doing?
            command = " | ".join(" ".join(x) for x in args)

            # Dump logs
            RealtimeLogger.error(
                "Docker container for command {} failed with code {}".format(
                    command, return_code))
            RealtimeLogger.error("Dumping stderr...")
            for line in container.logs(stderr=True, stdout=False, stream=True):
                # Trim trailing \n
                RealtimeLogger.error(line[:-1])

            if not check_output and outfile is None:
                # Dump stdout as well, since it's not something the caller wanted as data
                RealtimeLogger.error("Dumping stdout...")
                for line in container.logs(stderr=False,
                                           stdout=True,
                                           stream=True):
                    # Trim trailing \n
                    RealtimeLogger.error(line[:-1])

            # Raise an error if it's not sucess
            raise RuntimeError(
                "Docker container for command {} failed with code {}".format(
                    command, return_code))
        elif errfile:
            # user wants stderr even if no crash
            for line in container.logs(stderr=True, stdout=False, stream=True):
                errfile.write(line)

        if check_output:
            # We need to collect the output. We grab it from Docker's handy on-disk buffer.
            # TODO: Bad Things can happen if the container logs too much.
            captured_stdout = container.logs(stderr=False, stdout=True)

        end_time = timeit.default_timer()
        run_time = end_time - start_time
        RealtimeLogger.info("Successfully docker ran {} in {} seconds.".format(
            " | ".join(" ".join(x) for x in args), run_time))

        if outfile:
            outfile.flush()
            os.fsync(outfile.fileno())

        if check_output is True:
            return captured_stdout
Example #12
0
def run_scwrl(pdb_file, output_prefix=None, framefilename=None, sequencefilename=None,
  paramfilename=None, in_cystal=False, remove_hydrogens=False, remove_h_n_term=False,
  work_dir=None, docker=True, job=None):
    if work_dir is None:
        work_dir = os.getcwd()

    full_pdb_path = pdb_file
    pdb_path = os.path.basename(full_pdb_path)
    output_file = "{}.scwrl".format(pdb_path)

    if output_prefix is None:
        output_prefix = os.path.splitext(full_pdb_path)[0]

    output_file = "{}.scwrl.pdb".format(output_prefix)

    _parameters = [p for p, use in [("-#", in_cystal), ("-h", remove_hydrogens), \
        ("-t", remove_h_n_term)] if use]

    if docker and apiDockerCall is not None and job is not None:
        #Docker can only read from work_dir
        if not os.path.abspath(os.path.dirname(pdb_file)) == os.path.abspath(work_dir):
            shutil.copy(pdb_file, work_dir)

        parameters = ["-i", "/data/{}".format(os.path.basename(pdb_file))]
        parameters += ["-o", "/data/{}".format(os.path.basename(output_file))]
        parameters += _parameters

        if framefilename is not None and os.path.isfile(framefilename):
            if not os.path.abspath(os.path.dirname(framefilename)) == os.path.abspath(work_dir):
                shutil.copy(framefilename, work_dir)
            parameters += ["-f", "/data/{}".format(os.path.basename(framefilename))]

        if sequencefilename is not None and os.path.isfile(sequencefilename):
            if not os.path.abspath(os.path.dirname(sequencefilename)) == os.path.abspath(work_dir):
                shutil.copy(sequencefilename, work_dir)
            parameters += ["-s", "/data/{}".format(os.path.basename(sequencefilename))]

        if paramfilename is not None and os.path.isfile(paramfilename):
            if not os.path.abspath(os.path.dirname(paramfilename)) == os.path.abspath(work_dir):
                shutil.copy(paramfilename, work_dir)
            parameters += ["-p", "/data/{}".format(os.path.basename(paramfilename))]

        try:
            apiDockerCall(job,
                          image='edraizen/scwrl4:latest',
                          working_dir=work_dir,
                          parameters=parameters)
        except (SystemExit, KeyboardInterrupt):
            raise
        except:
            raise
            #return run_scwrl(pdb_file, output_prefix=output_prefix, framefilename=framefilename,
            #    sequencefilename=sequencefilename, paramfilename=paramfilename, in_cystal=in_cystal,
            #    remove_hydrogens=remove_hydrogens, remove_h_n_term=remove_h_n_term, work_dir=work_dir, docker=False)

        output_file = os.path.join(work_dir, os.path.basename(output_file))
        #if not os.path.abspath(os.path.dirname(output_file)) == os.path.abspath(work_dir):
        #    shutil.move(os.path.join(work_dir, os.path.basename(output_file)),
        #        os.path.abspath(os.path.dirname(output_file)))

    else:
        parameters = ["scwrl4", "-i", pdb_file, "-o", output_file]+_parameters
        if framefilename is not None and os.path.isfile(framefilename):
            parameters += ["-f", framefilename]
        if sequencefilename is not None and os.path.isfile(sequencefilename):
            parameters += ["-s", sequencefilename]
        if paramfilename is not None and os.path.isfile(paramfilename):
            parameters += ["-p", paramfilename]

        try:
            subprocess.call(parameters)
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            raise
            #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e))

    assert os.path.isfile(output_file)
    return output_file
Example #13
0
def run_outlier_model(job, sample_info, args, cores=3, memory="15G"):
    # Unpack sample information and add sample specific options
    name, sample_opts = sample_info
    if sample_opts:
        for key, value in sample_opts.iteritems():
            setattr(args, key, value)

    # Check if output already exists and don't run if so
    output = os.path.join(args.out_dir, name)
    if os.path.exists(output):
        return 0

    # Process names with flexible extensions
    sample_ext = os.path.splitext(args.sample)[1]
    sample_name = "sample_matrix{}".format(sample_ext)
    bg_ext = os.path.splitext(args.background)[1]
    bg_name = "bg_matrix{}".format(bg_ext)

    # Copy input files to work directory
    shutil.copy(args.sample, os.path.join(job.tempDir, sample_name))
    shutil.copy(args.background, os.path.join(job.tempDir, bg_name))
    if args.gene_list:
        shutil.copy(args.gene_list, os.path.join(job.tempDir, "gene-list.txt"))

    # Define parameters and call Docker container
    parameters = [
        "--sample",
        "/data/{}".format(sample_name),
        "--background",
        "/data/{}".format(bg_name),
        "--name",
        name,
        "--out-dir",
        "/data",
        "--group",
        args.group,
        "--col-skip",
        str(args.col_skip),
        "--num-backgrounds",
        str(args.num_backgrounds),
        "--max-genes",
        str(args.max_genes),
        "--num-training-genes",
        str(args.num_training_genes),
        "--pval-convergence-cutoff",
        str(args.pval_convergence_cutoff),
        "--tune",
        str(args.tune),
    ]
    if args.disable_iter:
        parameters.append("--disable-iter")
    if args.save_model:
        parameters.append("--save-model")
    if args.gene_list:
        parameters.extend(["--gene-list", "/data/gene-list.txt"])
    image = "jvivian/gene-outlier-detection:0.13.0a"
    apiDockerCall(
        job=job,
        image=image,
        working_dir=job.tempDir,
        parameters=parameters,
        user="******",
    )
    _fixPermissions(tool=image, workDir=job.tempDir)

    out_dir = os.path.join(job.tempDir, name)
    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)
    shutil.move(out_dir, args.out_dir)
Example #14
0
def run_maxcluster(*args, **kwds):
    work_dir = kwds.pop("work_dir", None)
    docker = kwds.pop("docker", True)
    job = kwds.pop("job", None)

    if work_dir is None:
        work_dir = os.getcwd()

    if "file_list" in kwds and not "l" in kwds:
        kwds["l"] = kwds.pop("file_list")
    else:
        kwds.pop("file_list", None)

    log = kwds.get("log", False)
    if log and not isinstance(log, str):
        f = tempfile.NamedTemporaryFile(dir=work_dir,
                                        suffix=".log",
                                        delete=False)
        f.close()
        kwds["log"] = f.name

    file_kwds = ["log", "e", "p", "l", "R", "Rl", "Ru", "F", "M"]
    in_file_kwds = ["e", "p", "l", "F", "M"]
    parameters = ["-" + a for a in args]
    for k, v in kwds.iteritems():
        if k not in file_kwds:
            parameters += ["-{}".format(k), str(v)]
    job.log("ORIG PARAMS: {}".format(parameters))
    file_parameters = {k: v for k, v in kwds.iteritems() if k in file_kwds}

    if docker and apiDockerCall is not None and job is not None:
        for k, f in file_parameters.iteritems():
            if k in in_file_kwds and not os.path.abspath(
                    os.path.dirname(f)) == os.path.abspath(work_dir):
                shutil.copy(f, work_dir)
            job.log("BASENAMING: {}".format(os.path.basename(f)))
            parameters += ["-{}".format(k), os.path.basename(f)]

        oldcwd = os.getcwd()
        os.chdir(work_dir)
        try:
            out = apiDockerCall(
                job,
                'edraizen/maxcluster:latest',
                working_dir="/data",
                volumes={work_dir: {
                    "bind": "/data",
                    "mode": "rw"
                }},
                parameters=parameters)
        except (SystemExit, KeyboardInterrupt):
            raise
        except:
            job.log("FILE LIST IS [{}]".format(
                open(file_parameters["l"]).read()))
            raise
            #return run_scwrl(pdb_file, output_prefix=output_prefix, framefilename=framefilename,
            #    sequencefilename=sequencefilename, paramfilename=paramfilename, in_cystal=in_cystal,
            #    remove_hydrogens=remove_hydrogens, remove_h_n_term=remove_h_n_term, work_dir=work_dir, docker=False)
        os.chdir(oldcwd)
    else:
        file_args = []
        for k, f in file_parameters.iteritems():
            parameters += ["-{}".format(k), f]
        args = [maxcluster_path] + file_args + parameters
        try:
            out = subprocess.check_output(args)
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            raise
            #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e))

    if "log" in kwds and os.path.isfile(kwds["log"]):
        return kwds["log"]
    return out
Example #15
0
def run_haddock(dock_name,
                setup=False,
                work_dir=None,
                docker=True,
                toil=False,
                job=None):
    if work_dir is None:
        work_dir = os.getcwd()

    if job:
        print str(os.listdir(work_dir))
    assert any(
        os.path.isfile(os.path.join(work_dir, f))
        for f in ("new.html", "run.cns"))

    if toil:
        image = 'edraizen/haddock-toil:latest'
        parameters = [
            "--provisioner",
            "aws",
            "--nodeTypes",
            "t2.small,t2.small:0.0069",
            "--defaultCores",
            "1",
            "--maxCores",
            "1",
            "--maxNodes",
            "1,99",
            "--maxLocalJobs",
            "100",
            "--targetTime",
            "1",
            "--batchSystem",
            "mesos",
            "--defaultMemory",
            "997Mi",
            "--defaultDisk",
            "42121Mi",
            "--logFile",
            "{}.log".format(dock_name),
            "aws:us-east-1:haddock-{}".format(dock_name),
        ]
    else:
        image = 'edraizen/haddock:latest'
        parameters = []

    if docker and apiDockerCall is not None and job is not None:
        oldcwd = os.getcwd()
        os.chdir(work_dir)
        try:
            out = apiDockerCall(
                job,
                image,
                working_dir="/data",
                volumes={work_dir: {
                    "bind": "/data",
                    "mode": "rw"
                }},
                parameters=parameters,
                detach=True)
            # if not setup:
            for line in out.logs(stream=True):
                # stream=True makes this loop blocking; we will loop until
                # the container stops and there is no more output.
                RealtimeLogger.info(line)
            # else:
            #     job.log(out)

        except (SystemExit, KeyboardInterrupt):
            raise
        except:
            raise
            #return run_scwrl(pdb_file, output_prefix=output_prefix, framefilename=framefilename,
            #    sequencefilename=sequencefilename, paramfilename=paramfilename, in_cystal=in_cystal,
            #    remove_hydrogens=remove_hydrogens, remove_h_n_term=remove_h_n_term, work_dir=work_dir, docker=False)
        os.chdir(oldcwd)
    else:
        try:
            out = subprocess.check_output([sys.executable, "RunHaddock.py"])
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            raise
            #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e))
    #job.log(out)
    return out
Example #16
0
def run_minimize(pdb_file, min_type="lbfgs_armijo_nonmonotone", min_tolerance=0.001,
  ignore_zero_occupancy=False, parameters=None, work_dir=None):
    if work_dir is None:
        work_dir = os.getcwd()

    prefix = os.path.splitext(os.path.basename(pdb_file))[0]
    parameters = list(parameters) if isinstance(parameters, (list, tuple)) else []
    parameters += [
        "-run:min_type", min_type,
        "-run:min_tolerance", str(min_tolerance),
        "-ignore_zero_occupancy", str(ignore_zero_occupancy).lower()]

    if apiDockerCall is not None:
        if not os.path.abspath(os.path.dirname(pdb_file)) == os.path.abspath(work_dir):
            shutil.copy(pdb_file, work_dir)

        minimized_file = os.path.join("/data", "{}.pdb_0001.pdb".format(prefix))
        score_file = os.path.join("/data", "{}.sc".format(prefix))
        pdb_file_in = os.path.join("/data", "{}.pdb".format(prefix))

		parameters += [
            "-s", pdb_file_in,
            "-out:file:scorefile", score_file,
            "-out:path:pdb", "/data",
            "-out:path:score", "/data"]

        try:
            apiDockerCall(job,
                          image='edraizen/minimize:latest',
                          working_dir=work_dir,
                          parameters=parameters)
        except (SystemExit, KeyboardInterrupt):
            raise
        except:
            return run(pdb_file, min_type=min_type, min_tolerance=min_tolerance,
              ignore_zero_occupancy=ignore_zero_occupancy, parameters=parameters,
              work_dir=work_dir)

	else:
        work_dir = os.path.dirname(os.path.abspath(pdb_file))
        minimized_file = os.path.join(workdir, "{}.pdb_0001.pdb".format(prefix))
        score_file = os.path.join(work_dir, "{}.sc".format(prefix))

		parameters += [
            "-out:file:scorefile", score_file,
            "-out:path:pdb", work_dir,
            "-out:path:score", work_dir]

        command = ["minimize.static.linuxgccrelease"]+parameters
        command += [
            "-out:file:scorefile", score_file,
            "-out:path:pdb", work_dir,
            "-out:path:score", work_dir
        ]

        try:
            subprocess.check_output(["minimize.static.linuxgccrelease",
                "-s", pqr_file,
                "-run:min_type", "lbfgs_armijo_nonmonotone",
                "-run:min_tolerance", "0.001",
                "-overwrite", "false", #Pandas apply calls first row twice so this is needed
                "-ignore_zero_occupancy", "false",
                "-out:file:scorefile", score_file,
                "-out:path:pdb", pdb_path,
                "-out:path:score", pdb_path],
                stderr=subprocess.PIPE)
        except subprocess.CalledProcessError:
            raise RuntimeError("Unable to minimize file {}".format(pqr_file))
Example #17
0
def run_apbs(pqr_file, input_file=None, keep_input=False, work_dir=None, docker=True, job=None):
    """Run APBS. Calculates correct size using Psize and defualt from Chimera
    """
    if work_dir is None:
        work_dir = os.getcwd()

    full_pqr_path = pqr_file
    pqr_path = os.path.basename(full_pqr_path)
    file_prefix = os.path.splitext(pqr_path)[0]
    output_prefix = os.path.join(work_dir, "{}.apbs_output".format(file_prefix))
    if input_file is not None and os.path.isfile(input_file):
        keep_input = True
    else:
        input_file_contents = make_apbs_input(full_pqr_path, "{}.apbs_output".format(file_prefix))


    if docker and apiDockerCall is not None and job is not None:
        input_file_name = os.path.join(work_dir, "{}.apbs_input".format(file_prefix))
        input_file_short = "{}.apbs_input".format(file_prefix)
        output_prefix = "{}.apbs_output".format(file_prefix)

        if input_file is not None:
            if not os.path.abspath(os.path.dirname(input_file)) == os.path.abspath(work_dir):
                shutil.copy(input_file, os.path.join(work_dir, input_file_short))
            else:
                input_file_short = os.path.basename(input_file)
        else:
            input_file = input_file_name
            with open(input_file, "w") as f:
                f.write(input_file_contents)

        if not os.path.abspath(os.path.dirname(pqr_file)) == os.path.abspath(work_dir):
            shutil.copy(pqr_file, work_dir)

        try:
            parameters = ["/data/{}".format(input_file_short)]
            apiDockerCall(job,
                          image='edraizen/apbs:latest',
                          working_dir="/data",
                          volumes={work_dir:{"bind":"/data", "mode":"rw"}},
                          parameters=parameters)
            output_prefix = os.path.join(work_dir, output_prefix)
        except (SystemExit, KeyboardInterrupt):
            raise
        except:
            raise
            return run_apbs(full_pqr_file, input_file=input_file_name,
                keep_input=keep_input, work_dir=work_dir, docker=False)

    else:
        input_file = os.path.join(work_dir, "{}.apbs_input".format(file_prefix))
        output_prefix = os.path.join(work_dir, "{}.apbs_output".format(file_prefix))
        with open(input_file, "w") as f:
            f.write(input_file_contents)

        try:
            subprocess.call(["apbs", input_file])
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e))

    if not keep_input and input_file is not None and os.path.isfile(input_file):
        os.remove(input_file)

    out_file = output_prefix+".txt"
    assert os.path.isfile(out_file), "Outfile not found: {}".format(os.listdir(work_dir))
    return out_file
Example #18
0
def align(fixed_file,
          fixed_chain,
          moving_file,
          moving_chain,
          method="tmalign",
          force_alignment=None,
          extract=True,
          docker=True,
          work_dir=None,
          job=None):
    if work_dir is None:
        work_dir = os.getcwd()

    import shutil
    numalign = sum(
        1 for f in os.listdir("/root") if f.startswith("align")) / 3. + 1
    shutil.copy(
        fixed_file,
        os.path.join(
            "/root", "align{}_fixed_{}_{}.pdb".format(numalign, fixed_chain,
                                                      moving_chain)))
    shutil.copy(
        moving_file,
        os.path.join(
            "/root", "align{}_moving_{}_{}.pdb".format(numalign, fixed_chain,
                                                       moving_chain)))

    _chain1 = tempfile.NamedTemporaryFile(suffix=".pdb",
                                          dir=work_dir,
                                          delete=False)
    extract_chains(fixed_file,
                   fixed_chain,
                   rename="B" * len(fixed_chain),
                   new_file=_chain1.name)
    _chain1.close()

    job.log("MOVE CHAINS: {} {}".format(get_all_chains(moving_file),
                                        moving_chain))
    job.log("FIX CHAINS: {} {}".format(get_all_chains(fixed_file),
                                       fixed_chain))

    _chain2 = tempfile.NamedTemporaryFile(suffix=".pdb",
                                          dir=work_dir,
                                          delete=False)
    extract_chains(moving_file,
                   moving_chain,
                   rename="A" * len(moving_chain),
                   new_file=_chain2.name)
    _chain2.close()

    job.log("MOVE CHAIN A: {}".format(next(get_atom_lines(_chain2.name))))
    job.log("MOVE CHAIN A: {}".format(get_all_chains(_chain2.name)))
    job.log("MOVE CHAIN B: {}".format(next(get_atom_lines(_chain1.name))))
    job.log("MOVE CHAIN B: {}".format(get_all_chains(_chain1.name)))

    shutil.copy(
        _chain1.name,
        os.path.join(
            "/root",
            "align{}_seg_fixed_{}_{}.pdb".format(numalign, fixed_chain,
                                                 moving_chain)))
    shutil.copy(
        _chain2.name,
        os.path.join(
            "/root",
            "align{}_seg_moving_{}_{}.pdb".format(numalign, fixed_chain,
                                                  moving_chain)))

    _outf = tempfile.NamedTemporaryFile(dir=work_dir, delete=False)
    _outf.close()
    _outf = _outf.name

    if method in ["tmalign", "mmalign"]:
        image = "edraizen/{}:latest".format(method)
        parameters = [
            os.path.basename(_chain2.name),
            os.path.basename(_chain1.name), "-o",
            os.path.basename(_outf + ".sup")
        ]
        if method == "tmalign":
            if force_alignment is not None:
                parameters += ["-I", os.path.basename(force_alignment)]
            else:
                parameters += ["-m", os.path.basename(_outf + ".matrix.txt")]
    elif method == "ce":
        image = "edraizen/ce:latest"
        parameters = lambda f, m, o: [
            "--file1",
            os.path.basename(_chain1.name), "--file2",
            os.path.basename(_chain2.name), "-outputPDB", "-outFile",
            os.path.basename(_outf + ".sup")
        ]

    if docker and apiDockerCall is not None and job is not None:
        try:
            stdout = apiDockerCall(
                job,
                image,
                working_dir="/data",
                volumes={work_dir: {
                    "bind": "/data",
                    "mode": "rw"
                }},
                parameters=parameters)
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            print dir(e)
            raise Exception(str(e).decode('utf-8').encode("ascii", "ignore"))

    else:
        raise RuntimeError("Only docker works at the moment")
    job.log("OUTPUT: " + stdout)

    rmsd_re = re.compile("^Aligned length=.+, RMSD=(.+), Seq_ID=")
    tmscore_re = re.compile(
        "^TM-score=(.+) \(if normalized by length of Chain_2\)")
    rmsd_tm_re = re.compile(
        "^Aligned length=.+, RMSD=(.+), TM-score=(.+), ID=")

    rmsd = tm_score = -1.
    lines = iter(stdout.splitlines())
    for line in lines:
        job.log(line.rstrip())
        m = rmsd_re.match(line)
        if m:
            rmsd = float(m.group(1).strip())
            job.log("RMSD is {}".format(rmsd))
            continue
        m = rmsd_tm_re.match(line)
        if m:
            rmsd = float(m.group(1).strip())
            tm_score = float(m.group(2).strip())
            job.log("RMSD is {}".format(rmsd))
            job.log("TM-score is {}".format(tm_score))
        m = tmscore_re.match(line)
        if m:
            tm_score = float(m.group(1).strip())
            job.log("TM-score is {}".format(tm_score))
        if method == "mmalign" and "rotation matrix" in line:
            with open(_outf + ".matrix.txt", "w") as matfile:
                for i, mat_line in enumerate(lines):
                    if i > 4: break
                    matfile.write(mat_line)

    ending = ".sup_all_atm_lig" if method == "tmalign" else ".sup_all"
    _outfile = _outf + ending

    job.log("ALL chains: {}".format(get_all_chains(_outfile)))

    job.log("FIRST LINE: {}".format(next(get_atom_lines(_outfile))))

    job.log("ALL alinged files: {}".format(os.listdir(work_dir)))

    shutil.copy(
        _outfile,
        os.path.join(
            "/root",
            "align{}_raw_aligned_{}_{}.pdb".format(numalign, fixed_chain,
                                                   moving_chain)))

    if method == "tmalign":
        shutil.copy(
            _outf + ".matrix.txt",
            os.path.join(
                "/root", "align{}_matrix_aligned_{}_{}.pdb".format(
                    numalign, fixed_chain, moving_chain)))

    if extract:
        outfile = os.path.join(
            work_dir, "{}.aligned.pdb".format(
                os.path.splitext(os.path.basename(moving_file))[0]))
    else:
        outfile = os.path.join(
            work_dir, "{}__{}.aligned.pdb".format(
                os.path.splitext(os.path.basename(fixed_file))[0],
                os.path.splitext(os.path.basename(moving_file))[0]))

    if extract:
        #Chain A had the the moving_pdb rottrans
        _outfile = extract_chains(_outfile, "A")

        #Copy the updated XYZ coords into moving_pdb file to ensure chains are correct
        update_xyz(moving_file, _outfile, updated_pdb=outfile)
    else:
        #Remove extraneous lines
        with open(outfile, "w") as out:
            subprocess.call(
                [sys.executable,
                 os.path.join(PDB_TOOLS, "pdb_tidy.py")],
                stdout=out)

    if force_alignment is None:
        matrix_file = outfile + ".matrix"
        assert os.path.isfile(_outf + ".matrix.txt")
        shutil.move(_outf + ".matrix.txt", matrix_file)
        assert os.path.isfile(matrix_file)
        with open(matrix_file) as f:
            print "MATRIX"
            print f.read()
    else:
        matrix_file = force_alignment

    job.log("NEW chains: {}".format(get_all_chains(outfile)))

    for f in glob.glob(os.path.join(work_dir, _outf + "*")):
        try:
            os.remove(f)
        except OSError:
            pass

    assert os.path.isfile(outfile)
    return outfile, rmsd, tm_score, matrix_file
Example #19
0
def run_modeller(pir,
                 template,
                 model,
                 num_models=5,
                 work_dir=None,
                 docker=True,
                 job=None):
    if work_dir is None:
        work_dir = os.getcwd()

    python_file = os.path.join(work_dir, "run_modeller.py")

    if docker and apiDockerCall is not None and job is not None:
        #Docker can only read from work_dir
        if not os.path.abspath(
                os.path.dirname(pir)) == os.path.abspath(work_dir):
            shutil.copy(pir, work_dir)

        if not os.path.abspath(
                os.path.dirname(template)) == os.path.abspath(work_dir):
            shutil.copy(template, work_dir)

        with open(python_file, "w") as f:
            f.write(
                modeller_file.format(
                    pir=os.path.join("/data", os.path.basename(pir)),
                    template=os.path.basename(template).rsplit(".", 1)[0],
                    model=model,
                    num_models=num_models,
                    work_dir="/data"))
        print open(python_file).read()
        parameters = [os.path.join("/data", os.path.basename(python_file))]

        try:
            outputs = apiDockerCall(job,
                                    image='edraizen/modeller:latest',
                                    working_dir=work_dir,
                                    parameters=parameters)
            job.log(outputs)
            outputs = outputs.splitlines()[-1]
        except (SystemExit, KeyboardInterrupt):
            raise
        except:
            raise
            #return run_scwrl(pdb_file, output_prefix=output_prefix, framefilename=framefilename,
            #    sequencefilename=sequencefilename, paramfilename=paramfilename, in_cystal=in_cystal,
            #    remove_hydrogens=remove_hydrogens, remove_h_n_term=remove_h_n_term, work_dir=work_dir, docker=False)

    else:
        with open(python_file, "w") as f:
            f.write(
                modeller_file.format(pir=pir,
                                     template=template,
                                     model=model,
                                     num_models=num_models,
                                     work_dir=work_dir))

        try:
            outputs = subprocess.check_output([sys.executable, python_file])
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            raise
            #raise RuntimeError("APBS failed becuase it was not found in path: {}".format(e))
    outputs = json.loads(outputs)
    best_pdb, best_dope = min(outputs.iteritems(), key=lambda x: x[1])
    best_pdb = os.path.join(work_dir, best_pdb)
    assert os.path.isfile(best_pdb)
    for f in outputs.keys():
        path = os.path.join(work_dir, f)
        if path != best_pdb:
            try:
                os.remove(path)
            except OSError:
                pass
    return best_pdb
Example #20
0
def run_zrank(complex_path,
              refinement=False,
              work_dir=None,
              docker=True,
              job=None):
    if work_dir is None:
        work_dir = os.getcwd()

    _parameters = ["-R"] if refinement else []

    if not isinstance(complex_path, (list, tuple)):
        complex_path = [complex_path]

    listfile = tempfile.NamedTemporaryFile(dir=work_dir,
                                           prefix="listfile",
                                           suffix=".txt",
                                           delete=False)
    for pdb in complex_path:
        print >> listfile, os.path.basename(pdb)
    listfile.close()

    with open(listfile.name) as f:
        print f.read()

    if docker and apiDockerCall is not None and job is not None:
        parameters = _parameters + [os.path.basename(listfile.name)]
        try:
            out = apiDockerCall(
                job,
                image='edraizen/zrank:latest',
                working_dir="/data",
                volumes={work_dir: {
                    "bind": "/data",
                    "mode": "rw"
                }},
                parameters=parameters)
            print out
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            print "Error running docker for {} becuase {}".format(
                complex_path, e)
            raise
            return run_zrank(complex_path,
                             refinement=refinement,
                             work_dir=work_dir,
                             docker=False)
    else:
        cmd = [zrank_path
               ] + _parameters + [os.path.join("/data", listfile.name)]
        try:
            subprocess.call(cmd)
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception as e:
            raise RuntimeError("Cannot run zrank for {}. Error: {}".format(
                complex_path, e))

    assert os.path.isfile(listfile.name + ".zr.out"), "No output for zrank"

    with open(listfile.name + ".zr.out") as f:
        scores = dict(line.rstrip().split() for line in f)

    if len(complex_path) == 1:
        scores = scores.values()[0]

    for f in (listfile.name, listfile.name + ".zr.out"):
        try:
            os.remove(f)
        except OSError:
            pass

    return scores
Example #21
0
def _testDockerLogsFn(job,
                      working_dir,
                      script_file,
                      stream=False,
                      demux=False):
    """Return True if the test succeeds. Otherwise Exception is raised."""

    # we write a script file because the redirection operator, '>&2', is wrapped
    # in quotes when passed as parameters.
    import textwrap
    bash_script = textwrap.dedent('''
    #!/bin/bash
    echo hello stdout ;
    echo hello stderr >&2 ;
    echo hello stdout ;
    echo hello stderr >&2 ;
    echo hello stdout ;
    echo hello stdout ;
    ''')

    with open(script_file, 'w') as file:
        file.write(bash_script)

    out = apiDockerCall(
        job,
        image='quay.io/ucsc_cgl/ubuntu:20.04',
        working_dir=working_dir,
        parameters=[script_file],
        volumes={working_dir: {
            'bind': working_dir,
            'mode': 'rw'
        }},
        entrypoint="/bin/bash",
        stdout=True,
        stderr=True,
        stream=stream,
        demux=demux)

    # we check the output length because order is not guaranteed.
    if stream:
        if demux:
            # a generator with tuples of (stdout, stderr)
            assert hasattr(out, '__iter__')
            for _ in range(6):
                stdout, stderr = next(out)
                if stdout:
                    # len('hello stdout\n') == 13
                    assert len(stdout) == 13
                elif stderr:
                    assert len(stderr) == 13
                else:
                    assert False
        else:
            # a generator with bytes
            assert hasattr(out, '__iter__')
            for _ in range(6):
                assert len(next(out)) == 13
    else:
        if demux:
            # a tuple of (stdout, stderr)
            stdout, stderr = out
            # len('hello stdout\n' * 4) == 52
            assert len(stdout) == 52
            # len('hello stderr\n' * 2) == 26
            assert len(stderr) == 26
        else:
            # a bytes object
            # len('hello stdout\n' * 4 + 'hello stderr\n' * 2) == 78
            assert len(out) == 78

    return True
    def run(self, fileStore):
        fileStore.logToMaster("md5")
        tempDir = fileStore.getLocalTempDir()

        try:
            os.makedirs(os.path.join(tempDir, 'execution'))
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        inputFile = process_and_read_file(abspath_file(
            self.id_inputFile,
            asldijoiu23r8u34q89fho934t8u34fcurrentworkingdir),
                                          tempDir,
                                          fileStore,
                                          docker=True)

        try:
            # Intended to deal with "optional" inputs that may not exist
            # TODO: handle this better
            command0 = r'''
            /bin/my_md5sum '''
        except:
            command0 = ''

        try:
            # Intended to deal with "optional" inputs that may not exist
            # TODO: handle this better
            command1 = str(inputFile if not isinstance(inputFile, tuple) else
                           process_and_read_file(inputFile, tempDir, fileStore
                                                 )).strip("\n")
        except:
            command1 = ''

        try:
            # Intended to deal with "optional" inputs that may not exist
            # TODO: handle this better
            command2 = r'''
          '''
        except:
            command2 = ''

        cmd = command0 + command1 + command2
        cmd = textwrap.dedent(cmd.strip("\n"))
        generate_docker_bashscript_file(temp_dir=tempDir,
                                        docker_dir=tempDir,
                                        globs=[],
                                        cmd=cmd,
                                        job_name='md5')

        stdout = apiDockerCall(
            self,
            image='quay.io/briandoconnor/dockstore-tool-md5sum:1.0.4',
            working_dir=tempDir,
            parameters=[os.path.join(tempDir, "md5_script.sh")],
            entrypoint="/bin/bash",
            user='******',
            stderr=True,
            volumes={tempDir: {
                "bind": tempDir
            }})
        writetype = 'wb' if isinstance(stdout, bytes) else 'w'
        with open(
                os.path.join(asldijoiu23r8u34q89fho934t8u34fcurrentworkingdir,
                             'md5.log'), writetype) as f:
            f.write(stdout)

        # output-type: File
        output_filename = 'md5sum.txt'
        value = process_outfile(
            output_filename, fileStore, tempDir,
            '/home/lifeisaboutfishtacos/dockstore-workflow-md5sum')

        rvDict = {"value": value}
        return rvDict