예제 #1
0
def combine_alignments(file_list, output):
    assert(all([__is_mapped(x) for x in file_list]))
    command = ("java -Xmx{xmx} -Xms{xms} -Djava.io.tmpdir={tmp} -jar {picard} "
               "-T MergeSAMFiles {input_files} O={output_files}").format(
               xms="10G", xmx="12G", tmp="~/tmp",
               picard="~/.prog/picard-tools-2.5.0/picard.jar",
               input_files="".join(["I={} ".format(x) for x in file_list]),
               output_files=output
    )  # Use picard merge SAM to merge the files
    bash(command)
예제 #2
0
    def __merge_bam(self, files, combined_name):
        if self.dry_run:
            return

        try:
            if not isfile(combined_name):
                bash("samtools merge {} {}".format(combined_name,
                                                   " ".join(files)))
            else:
                if self.verbose:
                    print("{} already exists, using it...".format(
                        combined_name), file=stderr)
        except (IOError, OSError) as err:
            print("Error while combining files: {}".format(err), file=stderr)
            raise (err)
예제 #3
0
    def __merge_bam_files(self):
        bam_files = [f for f in self.files if f.endswith(".bam")]
        output_bam = join(self.input_root, self.all_reads_name)

        def __samtools_threads():
            return (str(min(self.get_threads(), 8)))


        if isfile(output_bam):
            print("Output BAM already exists, using it...", file=stderr)
            return(output_bam)

        try:
            (out, err) = bash("samtools -@ {t} -m {mem_p_t} "
                              "{o_bam} {i_bams}").format(t=__samtools_threads(),
                                                         mem_p_t="2G",
                                                         o_bam=output_bam,
                                                         i_bams=" ".join(
                                                             bam_files))

            if err:
                print(err, file=stderr)

        except Exception as error:
            print("Error while merging bam files: {}".format(error),
                  file=stderr)
            raise (error)

        return (output_bam)
예제 #4
0
def sbatch(command, *args):
    script = "echo '#!/usr/bin/env bash\n{}' | sbatch".format(command)

    for argument in args:
        script += " {}".format(argument)

    return (bash(script))
예제 #5
0
def qsub(command, *args):
    script = ("echo '#!/usr/bin/env bash\n{}' | qsub".format(command))

    for argument in args:
        script += " {}".format(argument)

    return (bash(script))
def get_barcode(filename):
    display_filename = "cat {}".format(filename)

    if filename.endswith(".gz"):
        display_filename = "gunzip -c {}".format(filename)

    command = ("{} | head -n 10000 | grep ^@ | cut -d':' -f10 | tr -d ' ' "
               "| sort | uniq -c | sort -nr | head -1 | sed -e "
               "'s/^[[:space:]]*//' | cut -d ' ' -f2").format(display_filename)
    return bash(command)[0]
예제 #7
0
def main(reference="reference.fa", job_prefix="Map_Tests"):
    speeds = ["vfast", "fast", "normal", "slow", "vslow"]
    modulos = ["--usemodulo"]
    stats = ["--stats", ""]
    reads = ["100"]

    command = ("map.py --verbose --partition=bigmemm --memory={mem} --cpus=14 "
               "[email protected] --extension=.fastq.gz$ "
               "--email_options=FAIL,END --input_root=ErrCorrect_Repair.1 "
               "--output_root={outroot} --job_name={jobname} {modulo} --pigz "
               "--speed={speed} {stats} --num_reads={reads} "
               "--reference={reference} --read_groups")

    for i, speed in enumerate(speeds):
        for j, modulo in enumerate(modulos):
            for k, stat in enumerate(stats):
                for l, read in enumerate(reads):
                    mem = "300G"

                    def __opts():
                        s = speed
                        m = "no_modulo"
                        st = "no_stats"
                        r = read

                        if j == 0:
                            m = "modulo"

                        if k == 0:
                            st = "stats"

                        return {'s':s, 'm':m, 'st':st, 'r':r}

                    jobname = job_prefix + ".{s}.{m}.{st}.{r}.".format(
                        **__opts()
                    )

                    outroot = "Map_Tests/Map_Test.{s}.{m}.{st}.{r}".format(
                        **__opts()
                    )

                    args = split(command.format(
                        speed=speed,
                        modulo=modulo,
                        stats=stat,
                        reads=read,
                        mem=mem,
                        jobname=jobname,
                        outroot=outroot,
                        reference=ref
                    ))

                    (out, err) = bash(*args)
                    print(out)
                    print(err, file=stderr)
예제 #8
0
def main(fastq, mapped):
    (fastq_sizes, err) = bash("find -L {f}/ -iname *q.gz | "
                              "parallel --gnu -j4  \"du -sb --apparent-size "
                              "{{}}\"".format(f=fastq.rstrip('/')))

    (map_sizes, err2) = bash("find -L {f}/ -iname *001.sam | parallel "
                             "--gnu -j4 \"du -sb --apparent-size {{}}\"".format(
                                 f=mapped.rstrip('/')))
    read1 = {}
    read2 = {}
    mapped = {}

    for line in fastq_sizes.splitlines():
        chunks = line.split()
        name = basename(chunks[1]).split("_R")[0]
        size = chunks[0]
        if "_R1" in chunks[1]:
            read1[name] = size
        if "_R2" in chunks[1]:
            read2[name] = size

    for line in map_sizes.splitlines():
        chunks = line.split()
        name = basename(chunks[1]).split("_pe")[0]
        size = chunks[0]
        mapped[name] = size

    table = {}
    for key, val in mapped.items():
        table[key] = [read1[key], read2[key], val]

    with open("filesizes", "w") as fh:
        for key, val in table.items():
            fh.write("{}\t{}\t{}\t{}\n".format(key, *val))

    (out, err) = bash("plot_map_sizes.R")
    print(out)
예제 #9
0
    def rgpu(self, filename):
        lane = "1"
        d_filename = "cat {}".format(filename)

        if filename.endswith(".gz"):
            d_filename = "gunzip -c {}".format(filename)

        command1 = ("{} | head -n 10000 | grep ^@ | cut -d':' -f10 | tr -d ' ' "
                   "| sort | uniq -c | sort -nr | head -1 | sed -e "
                   "'s/^[[:space:]]*//' | cut -d ' ' -f2").format(d_filename)

        try:
            barcode = bash(command1)[0].strip()
            if self.verbose:
                print("Barcode: {bar}".format(bar=barcode), file=stderr)
        except:
            print("Could not determine barcode", file=stderr)

        try:
            lane = int(search("(?<=_L)[0-9]{1,3}(?=.*_R[1|2])",
                              filename).group(0))
        except AttributeError:
            command2 = ("{} | head -n 10000 | grep ^@ | cut -d':' -f4 | tr -d "
                        "' ' | sort | uniq -c | sort -nr | head -1 | sed -e "
                        "'s/^[[:space:]]*//' | cut -d ' ' -f2").format(
                d_filename
            )  # strip lane number

            try:
                lane = bash(command2)[0].strip()
                if self.verbose:
                    print("Lane: {lane}".format(lane=lane))
            except:
                print("Could not determine lane number", file=stderr)

        return("{}.{}".format(barcode, lane))
예제 #10
0
def scontrol(*args):
    return (bash("scontrol", *args))
예제 #11
0
def qstat(*args):
    return (bash("qstat", *args))
예제 #12
0
def squeue(*args):
    return (bash("squeue", *args))
예제 #13
0
def scancel(*args):
    return (bash("scancel", *args))
예제 #14
0
def qjob(job):
    return (bash("qstat -j", job))
예제 #15
0
def submit_job(command_str, verbose=False, dry_run=False, **kwargs):
    """
    Anticipated positional args:
        command_str - The command to be wrapped for submission to scheduler

    Anticipated keyword args:
        memory - The memory to be allocated to this job
        nodes - The nodes to be allocated
        cpus - The cpus **per node** to request
        partition -  The queue name or partition name for the submitted job
        job_name - The name of the job
        depends_on - The dependencies (as comma separated list of job numbers)
        email_address -  The email address to use for notifications
        email_options - Email options: START|BEGIN,END|FINISH,FAIL|ABORT
        time - time to request from the scheduler
        bash -  The bash shebang line to use in the script
        input - The input filename for the job
        output - The output filename for the job
        error - The error filename for the job
    """
    shebang_line = "#!/usr/bin/env bash"

    if "bash" in kwargs:
        shebang_line = kwargs["bash"]

    script = ("{shebang_line}\n{command}").format(shebang_line=shebang_line,
                                                  command=command_str)
    sub_command = ("echo '{}' | {}")
    sub_script = ""  # Will hold entire string that will be send to bash shell

    if get_backend() == "slurm":  # Format with slurm options
        sub_script = sub_command.format(script, __submit_slurm(**kwargs))
    elif get_backend() == "torque":  # Format with torque options
        sub_script = sub_command.format(script, __submit_torque(**kwargs))

    if verbose:
        print(sub_script, file=sys.stderr)

    stdout = ""
    stderr = ""
    if not dry_run:
        (stdout, stderr) = bash(
            sub_script)  # Actaully call the script using bash

        try:  # To parse the output based on expected successful submission result
            chunks = stdout.split(" ")
            for chunk in chunks:
                if any([x.isdigit() for x in chunk.strip()]):
                    return (
                    chunk.strip())  # First try to grab IDs from sentences

            if get_backend() == "slurm":  # If still here, try common output formats
                # Successfully submitted job <Job ID>
                return (stdout.split(" ")[-1].strip("\n"))
            if get_backend() == "torque":
                # <Job ID>.hostname.etc.etc
                return (stdout.split(".")[0])

            if stderr:
                print(stderr, file=stderr)

        except (ValueError, IndexError) as err:
            print("Could not capture Job ID! Dependency checks may fail!")
            print("Err: {}".format(err))
            return ("")
    else:
        return ("")
예제 #16
0
def qdel(*args):
    return (bash("qdel", *args))
예제 #17
0
def qalter(*args):
    return (bash("qalter", *args))
예제 #18
0
def qresub(*args):
    return (bash("qresub", *args))