def _run_fastqc(global_config, sample_config, sorted_libraries_by_insert): mainDir = os.getcwd() FastqcFolder = os.path.join(os.getcwd(), "fastqc") if not os.path.exists(FastqcFolder): os.makedirs(FastqcFolder) program=global_config["Tools"]["fastqc"]["bin"] program_options=global_config["Tools"]["fastqc"]["options"] for library, libraryInfo in sorted_libraries_by_insert: command = [program] for option in program_options: command.append(option) read1=libraryInfo["pair1"] read2=libraryInfo["pair2"] command.append(read1) if read2 is not None: command.append(read2) common.print_command(command) sample_config["commands"] += "\n" + common.get_command_str(command) folder_output_name = os.path.join(FastqcFolder, os.path.basename(read1).split(".fastq.gz")[0]) if not common.check_dryrun(sample_config) and not \ os.path.exists("{}_fastqc.zip".format(folder_output_name)): fastq_stdOut = open(os.path.join(FastqcFolder, "{}_fastqc.stdout".format(library)), "a") fastq_stdErr = open(os.path.join(FastqcFolder, "{}_fastqc.stderr".format(library)), "a") subprocess.call(command, stdout=fastq_stdOut, stderr=fastq_stdErr) sample_config["fastqc"] = FastqcFolder return sample_config
def _run_fastqc(global_config, sample_config, sorted_libraries_by_insert): mainDir = os.getcwd() FastqcFolder = os.path.join(os.getcwd(), "fastqc") if not os.path.exists(FastqcFolder): os.makedirs(FastqcFolder) program = global_config["Tools"]["fastqc"]["bin"] program_options = global_config["Tools"]["fastqc"]["options"] for library, libraryInfo in sorted_libraries_by_insert: command = [program] for option in program_options: command.append(option) read1 = libraryInfo["pair1"] read2 = libraryInfo["pair2"] command.append(read1) if read2 is not None: command.append(read2) common.print_command(command) sample_config["commands"] += "\n" + common.get_command_str(command) folder_output_name = os.path.join( FastqcFolder, os.path.basename(read1).split(".fastq.gz")[0]) if not common.check_dryrun(sample_config) and not \ os.path.exists("{}_fastqc.zip".format(folder_output_name)): fastq_stdOut = open( os.path.join(FastqcFolder, "{}_fastqc.stdout".format(library)), "a") fastq_stdErr = open( os.path.join(FastqcFolder, "{}_fastqc.stderr".format(library)), "a") subprocess.call(command, stdout=fastq_stdOut, stderr=fastq_stdErr) sample_config["fastqc"] = FastqcFolder return sample_config
def _run_abyss(global_config, sample_config, sorted_libraries_by_insert): mainDir = os.getcwd() ABySS_Kmer_Folder = os.path.join(os.getcwd(), "abyss_kmer") if "kmer" not in sample_config: sys.exit("error in _run_abyss QCcontrol: kmer must be present in \ sample_config.yaml") kmer = sample_config["kmer"] if not os.path.exists(ABySS_Kmer_Folder): os.makedirs(ABySS_Kmer_Folder) os.chdir(ABySS_Kmer_Folder) program = global_config["Tools"]["abyss"]["bin"] program = os.path.join(os.path.dirname(program), "ABYSS-P") program_options=global_config["Tools"]["abyss"]["options"] if "abyss" in sample_config: program_options=sample_config["abyss"] threads = 16 # default for UPPMAX if "threads" in sample_config : threads = sample_config["threads"] command = "mpirun -np {} {} ".format(threads, program) command += "-k {} ".format(kmer) command += "--coverage-hist=histogram.hist -o preUnitgs.fa" for library, libraryInfo in sorted_libraries_by_insert: read1=libraryInfo["pair1"] read2=libraryInfo["pair2"] orientation = libraryInfo["orientation"] if orientation=="innie" or orientation=="outtie": command += " {} ".format(read1) if read2 is not None: command += " {} ".format(read2) if orientation == "none": command += " {} ".format(read1) common.print_command(command) sample_config["commands"] += "\n" + common.get_command_str(command) if not common.check_dryrun(sample_config) and not \ os.path.exists("histogram.hist"): ABySS_Kmer_stdOut = open("ABySS_Kmer_Folder.stdOut", "a") ABySS_Kmer_stdErr = open("ABySS_Kmer_Folder.stdErr", "a") returnValue = subprocess.call(command, shell=True, \ stdout=ABySS_Kmer_stdOut, stderr=ABySS_Kmer_stdErr) if returnValue > 0: print("ABySS kmer plotting failed: unkwnown reason") else : subprocess.call(("rm", "preUnitgs.fa")) _plotKmerFixed(1,200, kmer, "kmer_coverage_1_200.png") _plotKmerFixed(1,500, kmer, "kmer_coverage_1_500.png") _plotKmerFixed(15,200, kmer, "kmer_coverage_15_200.png") _plotKmerFixed(15,500, kmer, "kmer_coverage_15_500.png") _plotKmer(kmer, "kmer_coverage.png") os.chdir("..") sample_config["abyss"] = ABySS_Kmer_Folder return sample_config
def _run_abyss(global_config, sample_config, sorted_libraries_by_insert): mainDir = os.getcwd() ABySS_Kmer_Folder = os.path.join(os.getcwd(), "abyss_kmer") if "kmer" not in sample_config: sys.exit("error in _run_abyss QCcontrol: kmer must be present in \ sample_config.yaml") kmer = sample_config["kmer"] if not os.path.exists(ABySS_Kmer_Folder): os.makedirs(ABySS_Kmer_Folder) os.chdir(ABySS_Kmer_Folder) program = global_config["Tools"]["abyss"]["bin"] program = os.path.join(os.path.dirname(program), "ABYSS-P") program_options = global_config["Tools"]["abyss"]["options"] if "abyss" in sample_config: program_options = sample_config["abyss"] threads = 16 # default for UPPMAX if "threads" in sample_config: threads = sample_config["threads"] command = "mpirun -np {} {} ".format(threads, program) command += "-k {} ".format(kmer) command += "--coverage-hist=histogram.hist -o preUnitgs.fa" for library, libraryInfo in sorted_libraries_by_insert: read1 = libraryInfo["pair1"] read2 = libraryInfo["pair2"] orientation = libraryInfo["orientation"] if orientation == "innie" or orientation == "outtie": command += " {} ".format(read1) if read2 is not None: command += " {} ".format(read2) if orientation == "none": command += " {} ".format(read1) common.print_command(command) sample_config["commands"] += "\n" + common.get_command_str(command) if not common.check_dryrun(sample_config) and not \ os.path.exists("histogram.hist"): ABySS_Kmer_stdOut = open("ABySS_Kmer_Folder.stdOut", "a") ABySS_Kmer_stdErr = open("ABySS_Kmer_Folder.stdErr", "a") returnValue = subprocess.call(command, shell=True, \ stdout=ABySS_Kmer_stdOut, stderr=ABySS_Kmer_stdErr) if returnValue > 0: print("ABySS kmer plotting failed: unkwnown reason") else: subprocess.call(("rm", "preUnitgs.fa")) _plotKmerFixed(1, 200, kmer, "kmer_coverage_1_200.png") _plotKmerFixed(1, 500, kmer, "kmer_coverage_1_500.png") _plotKmerFixed(15, 200, kmer, "kmer_coverage_15_200.png") _plotKmerFixed(15, 500, kmer, "kmer_coverage_15_500.png") _plotKmer(kmer, "kmer_coverage.png") os.chdir("..") sample_config["abyss"] = ABySS_Kmer_Folder return sample_config
def _run_kmergenie(global_config, sample_config, sorted_libraries_by_insert): """Runs kmergenie to establish a recommended kmer size for assembly""" maindir = os.getcwd() kmerdir = os.path.join(maindir, "kmergenie") if not os.path.exists(kmerdir): os.makedirs(kmerdir) os.chdir(kmerdir) #Write a list of input fastq files for kmergenie kmer_input = os.path.join(kmerdir, "{}kmerinput.txt".format(sample_config.get("output",""))) program = global_config["Tools"]["kmergenie"]["bin"] program_options=global_config["Tools"]["kmergenie"]["options"] # Could be useful to add --diploid if sample is highly heterozygous if "kmergenie" in sample_config: program_options=sample_config["kmergenie"] threads = "" # Kmergenie will spawn number_of_cores - 1 threads by default if "threads" in sample_config : threads = sample_config["threads"] cmd_list = [program, kmer_input] for option in filter(None, program_options): cmd_list.append(option) if threads: cmd_list.append("-t {}".format(threads)) command = " ".join(cmd_list) common.print_command(command) sample_config["commands"] += "\n" + common.get_command_str(command) if not common.check_dryrun(sample_config): with open(kmer_input, "w") as f: for lib, lib_info in sorted_libraries_by_insert: f.write(lib_info["pair1"] + "\n") f.write(lib_info["pair2"] + "\n") stdOut = open("kmergenie.stdOut", "w") stdErr = open("kmergenie.stdErr", "w") returnValue = subprocess.call(cmd_list, stdout=stdOut, stderr=stdErr) if returnValue != 0: print("error while running command: {}".format(command)) else: _kmergenie_plot("histograms.dat") sample_config["kmergenie"] = kmerdir os.chdir(maindir) return sample_config
def _run_trimmomatic(global_config, sample_config, sorted_libraries_by_insert): program = global_config["Tools"]["trimmomatic"]["bin"] program_folder = os.path.dirname(program) if "adapters" not in sample_config: sys.exit("running MP pipeline, adapters file to be used in trimming" "are needed for Trimmomatic. Please specify them" "in the sample configuration file and rerun") adapterFile = sample_config["adapters"] if not os.path.exists(adapterFile): sys.exit("Trimmomatic cannot be run as adapter file is not specified" "or points to unknown position: {}".format(adapterFile)) mainDirectory = os.getcwd() trimmomaticDir = os.path.join(mainDirectory, "Trimmomatic") if not os.path.exists(trimmomaticDir): os.makedirs(trimmomaticDir) os.chdir(trimmomaticDir) #now I am in running dir, I need to process one by one the libraries threads = 8 if "threads" in sample_config: threads = sample_config["threads"] for library, libraryInfo in sorted_libraries_by_insert: read1=libraryInfo["pair1"] read2=libraryInfo["pair2"] orientation = libraryInfo["orientation"] if read2 is not None: read1_baseName = os.path.split(read1)[1].split(".")[0] read2_baseName = os.path.split(read2)[1].split(".")[0] output_read1_pair = os.path.join(trimmomaticDir, "{}.fastq.gz".format(read1_baseName)) output_read1_sing = os.path.join(trimmomaticDir, "{}_u.fastq.gz".format(read1_baseName)) output_read2_pair = os.path.join(trimmomaticDir, "{}.fastq.gz".format(read2_baseName)) output_read2_sing = os.path.join(trimmomaticDir, "{}_u.fastq.gz".format(read2_baseName)) command = ["java", "-jar", program, "PE", "-threads", "{}".format(threads), "-phred33", read1, read2, output_read1_pair, output_read1_sing, output_read2_pair, output_read2_sing, "ILLUMINACLIP:{}:2:30:10".format(adapterFile), "LEADING:3", "TRAILING:3", "SLIDINGWINDOW:4:15", "MINLEN:30"] common.print_command(command) sample_config["commands"] += "\n" + common.get_command_str(command) # do not execute is files have been already gennerated if not common.check_dryrun(sample_config) and not \ os.path.exists(output_read1_pair): stdOut = open("{}_trimmomatic.stdOut".format(read1_baseName), "w") stdErr = open("{}_trimmomatic.stdErr".format(read1_baseName), "w") returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr) # run the program if returnValue != 0: print("error while running command: {}".format(command)) libraryInfo["pair1"] = output_read1_pair libraryInfo["pair2"] = output_read2_pair libraryInfo["trimmomatic"] = os.path.join(trimmomaticDir, "{}_trimmomatic.stdErr".format(read1_baseName)) os.chdir(mainDirectory) return sample_config
def _run_trimmomatic(global_config, sample_config, sorted_libraries_by_insert): program = global_config["Tools"]["trimmomatic"]["bin"] program_folder = os.path.dirname(program) if "adapters" not in sample_config: sys.exit("running MP pipeline, adapters file to be used in trimming" "are needed for Trimmomatic. Please specify them" "in the sample configuration file and rerun") adapterFile = sample_config["adapters"] if not os.path.exists(adapterFile): sys.exit("Trimmomatic cannot be run as adapter file is not specified" "or points to unknown position: {}".format(adapterFile)) mainDirectory = os.getcwd() trimmomaticDir = os.path.join(mainDirectory, "Trimmomatic") if not os.path.exists(trimmomaticDir): os.makedirs(trimmomaticDir) os.chdir(trimmomaticDir) #now I am in running dir, I need to process one by one the libraries threads = 8 if "threads" in sample_config: threads = sample_config["threads"] for library, libraryInfo in sorted_libraries_by_insert: read1 = libraryInfo["pair1"] read2 = libraryInfo["pair2"] orientation = libraryInfo["orientation"] if read2 is not None: read1_baseName = os.path.split(read1)[1].split(".")[0] read2_baseName = os.path.split(read2)[1].split(".")[0] output_read1_pair = os.path.join( trimmomaticDir, "{}.fastq.gz".format(read1_baseName)) output_read1_sing = os.path.join( trimmomaticDir, "{}_u.fastq.gz".format(read1_baseName)) output_read2_pair = os.path.join( trimmomaticDir, "{}.fastq.gz".format(read2_baseName)) output_read2_sing = os.path.join( trimmomaticDir, "{}_u.fastq.gz".format(read2_baseName)) command = [ "java", "-jar", program, "PE", "-threads", "{}".format(threads), "-phred33", read1, read2, output_read1_pair, output_read1_sing, output_read2_pair, output_read2_sing, "ILLUMINACLIP:{}:2:30:10".format(adapterFile), "LEADING:3", "TRAILING:3", "SLIDINGWINDOW:4:15", "MINLEN:30" ] common.print_command(command) sample_config["commands"] += "\n" + common.get_command_str(command) # do not execute is files have been already gennerated if not common.check_dryrun(sample_config) and not \ os.path.exists(output_read1_pair): stdOut = open("{}_trimmomatic.stdOut".format(read1_baseName), "w") stdErr = open("{}_trimmomatic.stdErr".format(read1_baseName), "w") returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr) # run the program if returnValue != 0: print("error while running command: {}".format(command)) libraryInfo["pair1"] = output_read1_pair libraryInfo["pair2"] = output_read2_pair libraryInfo["trimmomatic"] = os.path.join( trimmomaticDir, "{}_trimmomatic.stdErr".format(read1_baseName)) os.chdir(mainDirectory) return sample_config