Esempio n. 1
0
def _run_fastqc(global_config, sample_config, sorted_libraries_by_insert):
    mainDir = os.getcwd()
    FastqcFolder = os.path.join(os.getcwd(), "fastqc")
    if not os.path.exists(FastqcFolder):
        os.makedirs(FastqcFolder)

    program=global_config["Tools"]["fastqc"]["bin"]
    program_options=global_config["Tools"]["fastqc"]["options"]
    for library, libraryInfo in sorted_libraries_by_insert:
        command = [program]
        for option in program_options:
            command.append(option)
        read1=libraryInfo["pair1"]
        read2=libraryInfo["pair2"]
        command.append(read1)
        if read2 is not None:
            command.append(read2)
        common.print_command(command)
        sample_config["commands"] += "\n" + common.get_command_str(command)
        folder_output_name = os.path.join(FastqcFolder,
                os.path.basename(read1).split(".fastq.gz")[0])
        if not common.check_dryrun(sample_config) and not \
                os.path.exists("{}_fastqc.zip".format(folder_output_name)):
            fastq_stdOut = open(os.path.join(FastqcFolder,
                    "{}_fastqc.stdout".format(library)), "a")
            fastq_stdErr = open(os.path.join(FastqcFolder,
                    "{}_fastqc.stderr".format(library)), "a")
            subprocess.call(command, stdout=fastq_stdOut, stderr=fastq_stdErr)
    sample_config["fastqc"] = FastqcFolder
    return sample_config
Esempio n. 2
0
def _run_fastqc(global_config, sample_config, sorted_libraries_by_insert):
    mainDir = os.getcwd()
    FastqcFolder = os.path.join(os.getcwd(), "fastqc")
    if not os.path.exists(FastqcFolder):
        os.makedirs(FastqcFolder)

    program = global_config["Tools"]["fastqc"]["bin"]
    program_options = global_config["Tools"]["fastqc"]["options"]
    for library, libraryInfo in sorted_libraries_by_insert:
        command = [program]
        for option in program_options:
            command.append(option)
        read1 = libraryInfo["pair1"]
        read2 = libraryInfo["pair2"]
        command.append(read1)
        if read2 is not None:
            command.append(read2)
        common.print_command(command)
        sample_config["commands"] += "\n" + common.get_command_str(command)
        folder_output_name = os.path.join(
            FastqcFolder,
            os.path.basename(read1).split(".fastq.gz")[0])
        if not common.check_dryrun(sample_config) and not \
                os.path.exists("{}_fastqc.zip".format(folder_output_name)):
            fastq_stdOut = open(
                os.path.join(FastqcFolder, "{}_fastqc.stdout".format(library)),
                "a")
            fastq_stdErr = open(
                os.path.join(FastqcFolder, "{}_fastqc.stderr".format(library)),
                "a")
            subprocess.call(command, stdout=fastq_stdOut, stderr=fastq_stdErr)
    sample_config["fastqc"] = FastqcFolder
    return sample_config
Esempio n. 3
0
def _run_abyss(global_config, sample_config, sorted_libraries_by_insert):
    mainDir = os.getcwd()
    ABySS_Kmer_Folder = os.path.join(os.getcwd(), "abyss_kmer")
    if "kmer" not in sample_config:
        sys.exit("error in _run_abyss QCcontrol: kmer must be present in \
                sample_config.yaml")

    kmer = sample_config["kmer"]
    if not os.path.exists(ABySS_Kmer_Folder):
        os.makedirs(ABySS_Kmer_Folder)

    os.chdir(ABySS_Kmer_Folder)

    program = global_config["Tools"]["abyss"]["bin"]
    program = os.path.join(os.path.dirname(program), "ABYSS-P")
    program_options=global_config["Tools"]["abyss"]["options"]
    if "abyss" in sample_config:
        program_options=sample_config["abyss"]

    threads = 16 # default for UPPMAX
    if "threads" in sample_config :
        threads = sample_config["threads"]

    command = "mpirun -np {} {} ".format(threads, program)
    command += "-k {} ".format(kmer)
    command += "--coverage-hist=histogram.hist -o preUnitgs.fa"
    for library, libraryInfo in sorted_libraries_by_insert:
        read1=libraryInfo["pair1"]
        read2=libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        if orientation=="innie" or orientation=="outtie":
            command += " {} ".format(read1)
            if read2 is not None:
                command += " {} ".format(read2)
        if orientation == "none":
            command += " {} ".format(read1)

    common.print_command(command)
    sample_config["commands"] += "\n" + common.get_command_str(command)

    if not common.check_dryrun(sample_config) and not \
            os.path.exists("histogram.hist"):
        ABySS_Kmer_stdOut = open("ABySS_Kmer_Folder.stdOut", "a")
        ABySS_Kmer_stdErr = open("ABySS_Kmer_Folder.stdErr", "a")
        returnValue = subprocess.call(command, shell=True, \
                stdout=ABySS_Kmer_stdOut, stderr=ABySS_Kmer_stdErr)
        if returnValue > 0:
            print("ABySS kmer plotting failed: unkwnown reason")
        else :
            subprocess.call(("rm", "preUnitgs.fa"))
            _plotKmerFixed(1,200, kmer, "kmer_coverage_1_200.png")
            _plotKmerFixed(1,500, kmer, "kmer_coverage_1_500.png")
            _plotKmerFixed(15,200, kmer, "kmer_coverage_15_200.png")
            _plotKmerFixed(15,500, kmer, "kmer_coverage_15_500.png")
            _plotKmer(kmer, "kmer_coverage.png")

    os.chdir("..")
    sample_config["abyss"] = ABySS_Kmer_Folder
    return sample_config
Esempio n. 4
0
def _run_abyss(global_config, sample_config, sorted_libraries_by_insert):
    mainDir = os.getcwd()
    ABySS_Kmer_Folder = os.path.join(os.getcwd(), "abyss_kmer")
    if "kmer" not in sample_config:
        sys.exit("error in _run_abyss QCcontrol: kmer must be present in \
                sample_config.yaml")

    kmer = sample_config["kmer"]
    if not os.path.exists(ABySS_Kmer_Folder):
        os.makedirs(ABySS_Kmer_Folder)

    os.chdir(ABySS_Kmer_Folder)

    program = global_config["Tools"]["abyss"]["bin"]
    program = os.path.join(os.path.dirname(program), "ABYSS-P")
    program_options = global_config["Tools"]["abyss"]["options"]
    if "abyss" in sample_config:
        program_options = sample_config["abyss"]

    threads = 16  # default for UPPMAX
    if "threads" in sample_config:
        threads = sample_config["threads"]

    command = "mpirun -np {} {} ".format(threads, program)
    command += "-k {} ".format(kmer)
    command += "--coverage-hist=histogram.hist -o preUnitgs.fa"
    for library, libraryInfo in sorted_libraries_by_insert:
        read1 = libraryInfo["pair1"]
        read2 = libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        if orientation == "innie" or orientation == "outtie":
            command += " {} ".format(read1)
            if read2 is not None:
                command += " {} ".format(read2)
        if orientation == "none":
            command += " {} ".format(read1)
    common.print_command(command)
    sample_config["commands"] += "\n" + common.get_command_str(command)

    if not common.check_dryrun(sample_config) and not \
            os.path.exists("histogram.hist"):
        ABySS_Kmer_stdOut = open("ABySS_Kmer_Folder.stdOut", "a")
        ABySS_Kmer_stdErr = open("ABySS_Kmer_Folder.stdErr", "a")
        returnValue = subprocess.call(command, shell=True, \
                stdout=ABySS_Kmer_stdOut, stderr=ABySS_Kmer_stdErr)
        if returnValue > 0:
            print("ABySS kmer plotting failed: unkwnown reason")
        else:
            subprocess.call(("rm", "preUnitgs.fa"))
            _plotKmerFixed(1, 200, kmer, "kmer_coverage_1_200.png")
            _plotKmerFixed(1, 500, kmer, "kmer_coverage_1_500.png")
            _plotKmerFixed(15, 200, kmer, "kmer_coverage_15_200.png")
            _plotKmerFixed(15, 500, kmer, "kmer_coverage_15_500.png")
            _plotKmer(kmer, "kmer_coverage.png")

    os.chdir("..")
    sample_config["abyss"] = ABySS_Kmer_Folder
    return sample_config
Esempio n. 5
0
def _run_kmergenie(global_config, sample_config, sorted_libraries_by_insert):
    """Runs kmergenie to establish a recommended kmer size for assembly"""

    maindir = os.getcwd()
    kmerdir = os.path.join(maindir, "kmergenie")
    if not os.path.exists(kmerdir):
        os.makedirs(kmerdir)
    os.chdir(kmerdir)

    #Write a list of input fastq files for kmergenie
    kmer_input = os.path.join(kmerdir,
            "{}kmerinput.txt".format(sample_config.get("output","")))

    program = global_config["Tools"]["kmergenie"]["bin"]
    program_options=global_config["Tools"]["kmergenie"]["options"]
    # Could be useful to add --diploid if sample is highly heterozygous
    if "kmergenie" in sample_config:
        program_options=sample_config["kmergenie"]

    threads = "" # Kmergenie will spawn number_of_cores - 1 threads by default
    if "threads" in sample_config :
        threads = sample_config["threads"]

    cmd_list = [program, kmer_input]
    for option in filter(None, program_options):
        cmd_list.append(option)
    if threads:
        cmd_list.append("-t {}".format(threads))
    command = " ".join(cmd_list)
    common.print_command(command)
    sample_config["commands"] += "\n" + common.get_command_str(command)


    if not common.check_dryrun(sample_config):
        with open(kmer_input, "w") as f:
            for lib, lib_info in sorted_libraries_by_insert:
                f.write(lib_info["pair1"] + "\n")
                f.write(lib_info["pair2"] + "\n")

        stdOut = open("kmergenie.stdOut", "w")
        stdErr = open("kmergenie.stdErr", "w")
        returnValue = subprocess.call(cmd_list, stdout=stdOut, stderr=stdErr)
        if returnValue != 0:
            print("error while running command: {}".format(command))
        else:
            _kmergenie_plot("histograms.dat")
    sample_config["kmergenie"] = kmerdir
    os.chdir(maindir)
    return sample_config
Esempio n. 6
0
def _run_kmergenie(global_config, sample_config, sorted_libraries_by_insert):
    """Runs kmergenie to establish a recommended kmer size for assembly"""

    maindir = os.getcwd()
    kmerdir = os.path.join(maindir, "kmergenie")
    if not os.path.exists(kmerdir):
        os.makedirs(kmerdir)
    os.chdir(kmerdir)

    #Write a list of input fastq files for kmergenie
    kmer_input = os.path.join(kmerdir,
            "{}kmerinput.txt".format(sample_config.get("output","")))

    program = global_config["Tools"]["kmergenie"]["bin"]
    program_options=global_config["Tools"]["kmergenie"]["options"]
    # Could be useful to add --diploid if sample is highly heterozygous
    if "kmergenie" in sample_config:
        program_options=sample_config["kmergenie"]

    threads = "" # Kmergenie will spawn number_of_cores - 1 threads by default
    if "threads" in sample_config :
        threads = sample_config["threads"]

    cmd_list = [program, kmer_input]
    for option in filter(None, program_options):
        cmd_list.append(option)
    if threads:
        cmd_list.append("-t {}".format(threads))
    command = " ".join(cmd_list)
    common.print_command(command)
    sample_config["commands"] += "\n" + common.get_command_str(command)


    if not common.check_dryrun(sample_config):
        with open(kmer_input, "w") as f:
            for lib, lib_info in sorted_libraries_by_insert:
                f.write(lib_info["pair1"] + "\n")
                f.write(lib_info["pair2"] + "\n")

        stdOut = open("kmergenie.stdOut", "w")
        stdErr = open("kmergenie.stdErr", "w")
        returnValue = subprocess.call(cmd_list, stdout=stdOut, stderr=stdErr)
        if returnValue != 0:
            print("error while running command: {}".format(command))
        else:
            _kmergenie_plot("histograms.dat")
    sample_config["kmergenie"] = kmerdir
    os.chdir(maindir)
    return sample_config
Esempio n. 7
0
def _run_trimmomatic(global_config, sample_config, sorted_libraries_by_insert):
    program        = global_config["Tools"]["trimmomatic"]["bin"]
    program_folder = os.path.dirname(program)
    if "adapters" not in sample_config:
        sys.exit("running MP pipeline, adapters file to be used in trimming"
                "are needed for Trimmomatic. Please specify them"
                "in the sample configuration file and rerun")
    adapterFile    = sample_config["adapters"]
    if not os.path.exists(adapterFile):
        sys.exit("Trimmomatic cannot be run as adapter file is not specified"
                "or points to unknown position: {}".format(adapterFile))

    mainDirectory = os.getcwd()
    trimmomaticDir = os.path.join(mainDirectory, "Trimmomatic")
    if not os.path.exists(trimmomaticDir):
        os.makedirs(trimmomaticDir)
    os.chdir(trimmomaticDir)
    #now I am in running dir, I need to process one by one the libraries
    threads = 8
    if "threads" in sample_config:
        threads = sample_config["threads"]

    for library, libraryInfo in sorted_libraries_by_insert:
        read1=libraryInfo["pair1"]
        read2=libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        if read2 is not None:
            read1_baseName = os.path.split(read1)[1].split(".")[0]
            read2_baseName = os.path.split(read2)[1].split(".")[0]
            output_read1_pair = os.path.join(trimmomaticDir,
                    "{}.fastq.gz".format(read1_baseName))
            output_read1_sing = os.path.join(trimmomaticDir,
                    "{}_u.fastq.gz".format(read1_baseName))
            output_read2_pair = os.path.join(trimmomaticDir,
                    "{}.fastq.gz".format(read2_baseName))
            output_read2_sing = os.path.join(trimmomaticDir,
                    "{}_u.fastq.gz".format(read2_baseName))
            command = ["java",  "-jar", program, "PE", "-threads",
                    "{}".format(threads),  "-phred33",  read1, read2,
                    output_read1_pair, output_read1_sing, output_read2_pair,
                    output_read2_sing,
                    "ILLUMINACLIP:{}:2:30:10".format(adapterFile),
                    "LEADING:3", "TRAILING:3", "SLIDINGWINDOW:4:15",
                    "MINLEN:30"]
            common.print_command(command)
            sample_config["commands"] += "\n" + common.get_command_str(command)

            # do not execute is files have been already gennerated
            if not common.check_dryrun(sample_config) and not \
                    os.path.exists(output_read1_pair):
                stdOut = open("{}_trimmomatic.stdOut".format(read1_baseName),
                        "w")
                stdErr = open("{}_trimmomatic.stdErr".format(read1_baseName),
                        "w")
                returnValue = subprocess.call(command, stdout=stdOut,
                        stderr=stdErr) # run the program
                if returnValue != 0:
                    print("error while running command: {}".format(command))
            libraryInfo["pair1"] = output_read1_pair
            libraryInfo["pair2"] = output_read2_pair
            libraryInfo["trimmomatic"] = os.path.join(trimmomaticDir,
                    "{}_trimmomatic.stdErr".format(read1_baseName))
    os.chdir(mainDirectory)
    return sample_config
Esempio n. 8
0
def _run_trimmomatic(global_config, sample_config, sorted_libraries_by_insert):
    program = global_config["Tools"]["trimmomatic"]["bin"]
    program_folder = os.path.dirname(program)
    if "adapters" not in sample_config:
        sys.exit("running MP pipeline, adapters file to be used in trimming"
                 "are needed for Trimmomatic. Please specify them"
                 "in the sample configuration file and rerun")
    adapterFile = sample_config["adapters"]
    if not os.path.exists(adapterFile):
        sys.exit("Trimmomatic cannot be run as adapter file is not specified"
                 "or points to unknown position: {}".format(adapterFile))

    mainDirectory = os.getcwd()
    trimmomaticDir = os.path.join(mainDirectory, "Trimmomatic")
    if not os.path.exists(trimmomaticDir):
        os.makedirs(trimmomaticDir)
    os.chdir(trimmomaticDir)
    #now I am in running dir, I need to process one by one the libraries
    threads = 8
    if "threads" in sample_config:
        threads = sample_config["threads"]

    for library, libraryInfo in sorted_libraries_by_insert:
        read1 = libraryInfo["pair1"]
        read2 = libraryInfo["pair2"]
        orientation = libraryInfo["orientation"]
        if read2 is not None:
            read1_baseName = os.path.split(read1)[1].split(".")[0]
            read2_baseName = os.path.split(read2)[1].split(".")[0]
            output_read1_pair = os.path.join(
                trimmomaticDir, "{}.fastq.gz".format(read1_baseName))
            output_read1_sing = os.path.join(
                trimmomaticDir, "{}_u.fastq.gz".format(read1_baseName))
            output_read2_pair = os.path.join(
                trimmomaticDir, "{}.fastq.gz".format(read2_baseName))
            output_read2_sing = os.path.join(
                trimmomaticDir, "{}_u.fastq.gz".format(read2_baseName))
            command = [
                "java", "-jar", program, "PE", "-threads",
                "{}".format(threads), "-phred33", read1, read2,
                output_read1_pair, output_read1_sing, output_read2_pair,
                output_read2_sing,
                "ILLUMINACLIP:{}:2:30:10".format(adapterFile), "LEADING:3",
                "TRAILING:3", "SLIDINGWINDOW:4:15", "MINLEN:30"
            ]
            common.print_command(command)
            sample_config["commands"] += "\n" + common.get_command_str(command)

            # do not execute is files have been already gennerated
            if not common.check_dryrun(sample_config) and not \
                    os.path.exists(output_read1_pair):
                stdOut = open("{}_trimmomatic.stdOut".format(read1_baseName),
                              "w")
                stdErr = open("{}_trimmomatic.stdErr".format(read1_baseName),
                              "w")
                returnValue = subprocess.call(command,
                                              stdout=stdOut,
                                              stderr=stdErr)  # run the program
                if returnValue != 0:
                    print("error while running command: {}".format(command))
            libraryInfo["pair1"] = output_read1_pair
            libraryInfo["pair2"] = output_read2_pair
            libraryInfo["trimmomatic"] = os.path.join(
                trimmomaticDir, "{}_trimmomatic.stdErr".format(read1_baseName))
    os.chdir(mainDirectory)
    return sample_config