def main(): log = snakemake.params.log tmp_dir = snakemake.params.tmp_dir mccutils.mkdir(tmp_dir + "/telocate") mccutils.log("processing", "making TE-locate taxonomy file", log=log) try: mccutils.run_command( ["cp", snakemake.input.ref_gff, "telocate_locations.gff"]) mccutils.run_command( ["cp", snakemake.input.taxonomy, "telocate_taxonomy.tsv"]) command = [ "perl", snakemake.input.script, "telocate_locations.gff", "telocate_taxonomy.tsv", "Alias" ] mccutils.run_command(command, log=log) mccutils.run_command( ["cp", "telocate_locations_HL.gff", snakemake.output[0]]) mccutils.check_file_exists(snakemake.output[0]) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...unable to produce TE-locate taxonomy file using", snakemake.input.script, file=sys.stderr) sys.exit(1) mccutils.log("processing", "TE-locate taxonomy file created")
def discover_variants(ref_name, bam, split_bam, te_bed, out, threads=1, log=None): try: os.chdir(out) command = [ "tepid-discover", "-p", str(threads), "-n", ref_name, "-c", bam, "-s", split_bam, "-t", te_bed ] mccutils.run_command(command, log=log) if not os.path.exists(snakemake.output[0]): mccutils.run_command(["touch", snakemake.output[0]]) if not os.path.exists(snakemake.output[2]): mccutils.run_command(["touch", snakemake.output[2]]) mccutils.check_file_exists(snakemake.output[1]) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...Failed to run TEPID discover step...exiting...", file=sys.stderr) sys.exit(1)
def run_trim_galore(fq1, run_id, log, out, fq2=None, cores=1, flags=[]): mccutils.mkdir(out+"/results/") command = ['trim_galore'] + flags + ["-j", str(cores), "-o", out+"/results/trimgalore"] if fq2 is None: command.append(fq1) else: command += [fq1, fq2] mccutils.run_command(command, log=log) if fq2 is None: outfq = "" for f in os.listdir(out+"/results/trimgalore"): if "_trimmed.fq" in f: outfq = out+"/results/trimgalore/"+f file_exists = mccutils.check_file_exists(outfq) return outfq else: outfq1 = "" outfq2 = "" for f in os.listdir(out+"/results/trimgalore"): if "_val_1.fq" in f: outfq1 = out+"/results/trimgalore/"+f elif "_val_2.fq" in f: outfq2= out+"/results/trimgalore/"+f file_exists = mccutils.check_file_exists(outfq1) file_exists = mccutils.check_file_exists(outfq2) return outfq1, outfq2
def map_reads(fq1, fq2, ref_name, median_insert_size, out, threads=1, paired=True, log=None): os.chdir(out) if paired: command = [ "tepid-map", "-x", out + "/" + ref_name, "-y", out + "/" + ref_name + ".X15_01_65525S", "-p", str(threads), "-s", median_insert_size, "-n", ref_name, "-1", fq1, "-2", fq2 ] else: command = [ "tepid-map-se", "-x", out + "/" + ref_name, "-y", out + "/" + ref_name + ".X15_01_65525S", "-p", str(threads), "-n", ref_name, "-q", fq1 ] mccutils.run_command(command, log=log) bam = out + "/" + ref_name + ".bam" split_bam = out + "/" + ref_name + ".split.bam" mccutils.check_file_exists(bam) mccutils.check_file_exists(split_bam) return bam, split_bam
def main(): mccutils.log("popoolationte2", "setting up for PopoolationTE2") ref_fasta = snakemake.input.ref_fasta fq1 = snakemake.input.fq1 fq2 = snakemake.input.fq2 jar = snakemake.params.jar log = snakemake.params.log out_dir = snakemake.params.out_dir threads = snakemake.threads status_log = snakemake.params.status_log try: # ensures intermediate files from previous runs are removed for f in os.listdir(out_dir): mccutils.remove(out_dir + "/" + f) mccutils.mkdir(out_dir + "/tmp") index_fasta(ref_fasta, log=log) fq1 = format_fastq(fq1, out_dir + "/reads_1.fastq", log=log) fq2 = format_fastq(fq2, out_dir + "/reads_2.fastq", log=log) sam1 = map_reads(ref_fasta, fq1, out_dir + "/mapped_1.sam", threads=threads, log=log) sam2 = map_reads(ref_fasta, fq2, out_dir + "/mapped_2.sam", threads=threads, log=log) bam = sam_to_bam(jar, fq1, fq2, sam1, sam2, snakemake.output.bam, out_dir, threads=threads, log=log) mccutils.remove(out_dir + "/tmp") mccutils.check_file_exists(snakemake.output.bam) with open(status_log, "w") as l: l.write("COMPLETED\n") mccutils.log("popoolationte2", "PopoolationTE2 preprocessing complete") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log, "a") as l: print(track, file=l) mccutils.log("popoolationte2", "popoolationte2 preprocessing failed") with open(status_log, "w") as l: l.write("FAILED\n") mccutils.run_command(["touch", snakemake.output.bam])
def run_workflow(args, sample_name, run_id, debug=False): log = args.out + "/mcclintock." + str(run_id) + ".log" results_dir = args.out + "/results/" input_dir = args.out + "/method_input/" out_files = config.OUT_PATHS for key in out_files.keys(): out_files[key] = out_files[key].replace(config.INPUT_DIR, input_dir) out_files[key] = out_files[key].replace(config.RESULTS_DIR, results_dir) out_files[key] = out_files[key].replace(config.SAMPLE_NAME, sample_name) path = os.path.dirname(os.path.abspath(__file__)) mccutils.mkdir(args.out + "/snakemake") snakemake_path = args.out + "/snakemake/" + str(run_id) mccutils.mkdir(snakemake_path) mccutils.run_command(["cp", path + "/Snakefile", snakemake_path]) os.chdir(snakemake_path) command = [ "snakemake", "--use-conda", "--conda-prefix", path + "/install/envs/conda" ] if not debug: command.append("--quiet") else: command.append("--reason") command += [ "--configfile", args.out + "/snakemake/config/config_" + str(run_id) + ".json" ] command += ["--cores", str(args.proc)] if args.clean: clean_command = command + ["--delete-all-output"] mccutils.run_command(clean_command) mccutils.remove(args.out + "/input") for method in args.methods: command.append(out_files[method]) command.append(args.out + "/results/summary/summary_report.txt") # print(" ".join(command)) try: mccutils.run_command(command) mccutils.check_file_exists(args.out + "/results/summary/summary_report.txt") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print( "McClintock Pipeline Failed... please open an issue at https://github.com/bergmanlab/mcclintock/issues if you are having trouble using McClintock", file=sys.stderr) sys.exit(1) mccutils.remove(args.out + "/tmp")
def repeat_mask(reference, te_fasta, chromosomes, procs, run_id, log, out): try: outdir = out + "/tmp/repeatmasker_" + run_id mccutils.mkdir(outdir) os.chdir(outdir) command = [ "RepeatMasker", "-pa", str(procs), "-lib", te_fasta, "-dir", outdir, "-s", "-gff", "-nolow", "-no_is", reference ] mccutils.run_command(command, log=log) os.chdir(out) # RepeatMasker appears to override the custom database names during the ProcessRepeats # this step changes them back, more rules may be needed for other reference genomes ref_name = os.path.basename(reference) repeatmasker_gff = outdir + "/" + ref_name + ".out.gff" formatted_ref_tes = out + "/tmp/" + run_id + "tmpreferenceTEs.gff" with open(repeatmasker_gff, "r") as rmgff: with open(formatted_ref_tes, "w") as outgff: for line in rmgff: if "#" not in line: line = line.replace("McClintock-int", "McClintock") line = line.replace("POGON1", "pogo") split_line = line.split("\t") feats = split_line[8] if split_line[0] in chromosomes: te = feats.split(" ")[1] te = te.replace('"', '').split(":")[1] feats = ";".join( ["ID=" + te, "Name=" + te, "Alias=" + te]) split_line[2] = te split_line[8] = feats line = "\t".join(split_line) outgff.write(line + '\n') masked_fasta = outdir + "/" + ref_name + ".masked" fasta_lines = fix_fasta.fix_fasta_lines(masked_fasta, 80) mccutils.check_file_exists(formatted_ref_tes) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...Failed to run repeatmasker on: ", reference, "with lib:", te_fasta, "check file formatting...exiting...", file=sys.stderr) sys.exit(1) return formatted_ref_tes
def main(): fq1 = snakemake.input.fq1 fq2 = snakemake.input.fq2 bam = snakemake.input.bam reference = snakemake.input.reference twobit = snakemake.input.twobit consensus = snakemake.input.consensus ref_te_bed = snakemake.input.ref_te_bed taxonomy = snakemake.input.taxonomy median_insert_size_file = snakemake.input.median_insert_size log = snakemake.params.log with open(log,"a") as l: l.write("BAM: "+bam+"\n") l.write("2bit: "+twobit+"\n") l.write("consensus fasta: "+consensus+"\n") l.write("reference TE BED: "+ref_te_bed+"\n") l.write("Taxonomy TSV: "+taxonomy+"\n") threads = snakemake.threads out_dir = snakemake.params.out_dir script_dir = snakemake.params.script_dir sample_name = snakemake.params.sample_name status_log = snakemake.params.status_log # ensures intermediate files from previous runs are removed for f in os.listdir(out_dir): mccutils.remove(out_dir+"/"+f) mccutils.log("temp2","running TEMP2 Module") try: median_insert_size = get_median_insert_size(median_insert_size_file) run_temp2_insertion(fq1, fq2, bam, median_insert_size, reference, script_dir, consensus, ref_te_bed, threads, out_dir, config, log) run_temp2_absence(script_dir, bam, twobit, ref_te_bed, median_insert_size, threads, out_dir+"/absence", config, log) mccutils.run_command(["cp", out_dir+'/absence/'+sample_name+".absence.refined.bp.summary", out_dir], log=log) mccutils.check_file_exists(snakemake.output[0]) mccutils.check_file_exists(snakemake.output[1]) with open(status_log,"w") as l: l.write("COMPLETED\n") mccutils.log("temp2","TEMP2 run complete") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log,"a") as l: print(track, file=l) mccutils.log("temp2","TEMP2 run failed") with open(status_log,"w") as l: l.write("FAILED\n") mccutils.run_command(["touch", snakemake.output[0]]) mccutils.run_command(["touch", snakemake.output[1]])
def main(): mccutils.log("teflon", "Running TEFLoN") consensus = snakemake.input.consensus reference_genome = snakemake.input.reference_genome ref_bed = snakemake.input.ref_bed teflon_taxonomy = snakemake.input.teflon_taxonomy bam = snakemake.input.bam threads = snakemake.threads out_dir = snakemake.params.out_dir script_dir = snakemake.params.script_dir log = snakemake.params.log status_log = snakemake.params.status_log prev_steps_succeeded = mccutils.check_status_file(status_log) if prev_steps_succeeded: try: sample_table = make_sample_table(out_dir, bam) run_teflon(script_dir, out_dir, sample_table, threads=threads, log=log, quality_threshold=config.PARAMS['-q'], stdev=config.PARAMS['-sd'], cov=config.PARAMS['-cov'], te_support1=config.PARAMS['-n1'], te_support2=config.PARAMS['-n2'], read_count_lower_threshold=config.PARAMS['-lt'], read_count_higher_threshold=config.PARAMS['-ht']) mccutils.check_file_exists(snakemake.output[0]) with open(status_log, "w") as l: l.write("COMPLETED\n") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log, "a") as l: print(track, file=l) mccutils.log("teflon", "teflon run failed") with open(status_log, "w") as l: l.write("FAILED\n") mccutils.run_command(["touch", snakemake.output[0]]) else: mccutils.run_command(["touch", snakemake.output[0]])
def main(): mccutils.log("processing", "mapping reads to reference", log=snakemake.log[0]) try: command = ["bwa", "mem"] if eval(snakemake.config['args']['save_comments']): command.append("-C") command += [ "-t", str(snakemake.threads), "-R", "@RG\\tID:" + snakemake.params.sample + "\\tSM:" + snakemake.params.sample, snakemake.input.ref, snakemake.input.fq1 ] if snakemake.config['in']['fq2'] != "None": command.append(snakemake.input.fq2) mccutils.run_command_stdout(command, snakemake.output[0], log=snakemake.log[0]) mccutils.check_file_exists(snakemake.output[0]) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) if snakemake.config['in']['fq2'] == "None": print( "ERROR...unable to map reads (bwa mem) using reference fasta:", snakemake.input.ref, "and reads:", snakemake.input.fq1, file=sys.stderr) else: print( "ERROR...unable to map reads (bwa mem) using reference fasta:", snakemake.input.ref, "and reads:", snakemake.input.fq1, snakemake.input.fq2, file=sys.stderr) sys.exit(1) mccutils.log("processing", "read mapping complete")
def main(): try: log = snakemake.params.log mccutils.log("processing","making samtools and bwa index files for reference fasta", log=log) mccutils.run_command(["samtools", "faidx", snakemake.input.ref],log=log) mccutils.run_command(["bwa", "index", snakemake.input.ref], log=log) for out in snakemake.output: mccutils.check_file_exists(out) mccutils.log("processing","samtools and bwa index files for reference fasta created") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...unable to index (samtools, bwa) reference fasta, please check the formatting of:", snakemake.input.ref, file=sys.stderr) sys.exit(1)
def index_ref(fasta, ref_name, out, log=None): try: os.chdir(out) fasta_no_path = fasta.split("/")[-1] fasta_copy = out + "/" + fasta_no_path mccutils.run_command(["cp", fasta, fasta_copy]) mccutils.run_command(["bowtie2-build", fasta_copy, ref_name], log=log) mccutils.run_command(["yaha", "-g", fasta_copy], log=log) mccutils.check_file_exists(out + "/" + ref_name + ".X15_01_65525S") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...Failed to index reference fasta:" + fasta_copy + " ...exiting...", file=sys.stderr) sys.exit(1)
def main(): mccutils.log("teflon","setting up for TEFLoN") te_gff = snakemake.input.te_gff taxonomy = snakemake.input.taxonomy consensus = snakemake.input.consensus reference_genome = snakemake.input.reference_genome fq1 = snakemake.input.fq1 fq2 = snakemake.input.fq2 threads = snakemake.threads out_dir = snakemake.params.out_dir script_dir = snakemake.params.script_dir log = snakemake.params.log ref_bed = snakemake.output.ref_bed teflon_taxonomy = snakemake.output.teflon_taxonomy status_log = snakemake.params.status_log try: make_reference_bed(te_gff, ref_bed) make_taxonomy_file(taxonomy, teflon_taxonomy) prep_annotations(script_dir, out_dir, ref_bed, teflon_taxonomy, consensus, reference_genome, log=log) map_reads(out_dir, fq1, fq2, threads=threads, log=log) mccutils.check_file_exists(snakemake.output[0]) with open(status_log,"w") as l: l.write("COMPLETED\n") mccutils.log("teflon","setup for TEFLoN complete") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log,"a") as l: print(track, file=l) mccutils.log("teflon","teflon preprocessing failed") with open(status_log,"w") as l: l.write("FAILED\n") mccutils.run_command(["touch", snakemake.output[0]]) mccutils.run_command(["touch", snakemake.output[1]]) mccutils.run_command(["touch", snakemake.output[2]])
def discover_variants(ref_name, bam, split_bam, te_bed, out, threads=1, log=None): os.chdir(out) command = [ "tepid-discover", "-p", str(threads), "-n", ref_name, "-c", bam, "-s", split_bam, "-t", te_bed ] mccutils.run_command(command, log=log) if not os.path.exists(snakemake.output[0]): mccutils.run_command(["touch", snakemake.output[0]]) if not os.path.exists(snakemake.output[2]): mccutils.run_command(["touch", snakemake.output[2]]) mccutils.check_file_exists(snakemake.output[1])
def main(): log = snakemake.params.log mccutils.log("processing", "making TE-locate taxonomy file", log=log) try: command = [ "perl", snakemake.input.script, snakemake.input.ref_gff, snakemake.input.taxonomy, "Alias" ] mccutils.run_command(command, log=log) mccutils.check_file_exists(snakemake.output[0]) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...unable to produce TE-locate taxonomy file using", snakemake.input.script, file=sys.stderr) sys.exit(1) mccutils.log("processing", "TE-locate taxonomy file created")
def map_reads(fq1, fq2, ref_name, median_insert_size, out, threads=1, paired=True, log=None): try: os.chdir(out) if paired: command = [ "tepid-map", "-x", out + "/" + ref_name, "-y", out + "/" + ref_name + ".X15_01_65525S", "-p", str(threads), "-s", median_insert_size, "-n", ref_name, "-1", fq1, "-2", fq2 ] else: command = [ "tepid-map-se", "-x", out + "/" + ref_name, "-y", out + "/" + ref_name + ".X15_01_65525S", "-p", str(threads), "-n", ref_name, "-q", fq1 ] mccutils.run_command(command, log=log) bam = out + "/" + ref_name + ".bam" split_bam = out + "/" + ref_name + ".split.bam" mccutils.check_file_exists(bam) mccutils.check_file_exists(split_bam) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...Failed to run TEPID mapping step...exiting...", file=sys.stderr) sys.exit(1) return bam, split_bam
def mask_reference(reference, ref_tes_gff, run_id, log, out): try: masked_reference = out + "/tmp/" + run_id + "tmpmaskedreference.fasta" command = [ "bedtools", "maskfasta", "-fi", reference, "-fo", masked_reference, "-bed", ref_tes_gff ] mccutils.run_command(command, log=log) mccutils.check_file_exists(masked_reference) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print( "ERROR...Failed to mask repeats (bedtools maskfasta) in reference: ", reference, " using repeat file:", ref_tes_gff, "check file formatting...exiting...", file=sys.stderr) sys.exit(1) return masked_reference
def get_ref_te_fasta(reference, ref_tes_gff, run_id, log, out): try: ref_te_fasta = out + "/tmp/" + run_id + "tmpreferencetes.fasta" command = [ "bedtools", "getfasta", "-name", "-fi", reference, "-bed", ref_tes_gff, "-fo", ref_te_fasta ] mccutils.run_command(command, log=log) mccutils.check_file_exists(ref_te_fasta) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print( "ERROR...Failed to create TE fasta (bedtools getfasta) using reference:", reference, " and TE annotations:", ref_tes_gff, "check file formatting...exiting...", file=sys.stderr) sys.exit(1) return ref_te_fasta
def main(): log = snakemake.params.log mccutils.log("processing","Converting sam to bam", log=log) try: command = ["samtools","view", "-@", str(snakemake.threads), "-Sb", "-t", snakemake.input.ref_idx, snakemake.input.sam] mccutils.run_command_stdout(command, snakemake.output.tmp_bam, log=log) mccutils.check_file_exists(snakemake.output.tmp_bam) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...unable convert sam to bam using SAMtools...sam file:", snakemake.input.sam, file=sys.stderr) sys.exit(1) try: command = ["samtools", "sort", "-@", str(snakemake.threads), snakemake.output.tmp_bam, snakemake.output.bam.replace(".bam", "")] mccutils.run_command(command, log=log) mccutils.check_file_exists(snakemake.output.bam) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...falied to sort the bam file using samtools sort...bam file:", snakemake.output.tmp_bam, file=sys.stderr) sys.exit(1) try: command = ["samtools", "index", snakemake.output.bam] mccutils.run_command(command, log=log) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...falied to index the bam file using samtools index...bam file:", snakemake.output.bam, file=sys.stderr) sys.exit(1) try: command = ["samtools", "flagstat", snakemake.output.bam] mccutils.run_command_stdout(command, snakemake.output.flagstat, log=log) mccutils.check_file_exists(snakemake.output.flagstat) except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) print("ERROR...falied to generate flagstat file using samtools flagstat...bam file:", snakemake.output.bam, file=sys.stderr) sys.exit(1) mccutils.log("processing","sam to bam converted")
def main(): sample_name = snakemake.params.sample_name threads = snakemake.threads out_dir = snakemake.params.out_dir median_insert_size_file = snakemake.input.median_insert_size log = snakemake.params.log status_log = snakemake.params.status_log try: # ensures intermediate files from previous runs are removed for f in os.listdir(out_dir): mccutils.remove(out_dir + "/" + f) is_paired = True if snakemake.params.raw_fq2 == "None": is_paired = False input_dir = snakemake.params.out_dir + "/input/" mccutils.remove(input_dir) mccutils.mkdir(input_dir) fq_dir = snakemake.params.out_dir + "/input/fastq/" mccutils.mkdir(fq_dir) reference = input_dir + "reference.fasta" te_seqs = input_dir + "consensus.fasta" rm_out = input_dir + "repeatmasker.out" os.symlink(snakemake.input.reference, reference) os.symlink(snakemake.input.te_seqs, te_seqs) os.symlink(snakemake.input.rm_out, rm_out) if is_paired: fq1 = fq_dir + sample_name + "_1.fq" fq2 = fq_dir + sample_name + "_2.fq" os.symlink(snakemake.input.fq1, fq1) os.symlink(snakemake.input.fq2, fq2) else: fq1 = fq_dir + sample_name + ".unPaired.fq" os.symlink(snakemake.input.fq1, fq1) median_insert_size = get_median_insert_size(median_insert_size_file) output = subprocess.Popen(["which", "relocaTE2.py"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) script = output.stdout.read() script = script.decode() script = script.replace("\n", "") mccutils.log("relocate2", "running RelocaTE2", log=log) command = [ "python2", script, "-t", te_seqs, "-g", reference, "-r", rm_out, "-o", out_dir, "-s", str(median_insert_size), "--run", "-v", "4", "-c", str(threads), "-d", fq_dir ] for param in config.PARAMS.keys(): command.append(param) command.append(str(config.PARAMS[param])) if is_paired: command += ["-1", "_1", "-2", "_2"] else: command += ["-u", ".unPaired"] mccutils.run_command(command, log=log) mccutils.check_file_exists(snakemake.output[0]) mccutils.check_file_exists(snakemake.output[1]) with open(status_log, "w") as l: l.write("COMPLETED\n") mccutils.log("relocate2", "RelocaTE2 run complete") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log, "a") as l: print(track, file=l) mccutils.log("relocate2", "RelocaTE2 run failed") with open(status_log, "w") as l: l.write("FAILED\n") mccutils.run_command(["touch", snakemake.output[0]]) mccutils.run_command(["touch", snakemake.output[1]])
def main(): mccutils.log("popoolationte2", "running PopoolationTE2") ref_fasta = snakemake.input.ref_fasta bam = snakemake.input.bam taxonomy = snakemake.input.taxonomy jar = snakemake.params.jar out_dir = snakemake.params.out_dir sample_name = snakemake.params.sample_name log = snakemake.params.log status_log = snakemake.params.status_log prev_step_succeeded = mccutils.check_status_file(status_log) if prev_step_succeeded: try: mccutils.mkdir(out_dir + "/tmp") taxonomy = format_taxonomy(taxonomy, out_dir) ppileup = popoolationte2_ppileup(jar, config.PARAMS["ppileup"], bam, taxonomy, out_dir, log=log) ppileup = popoolationte2_subsample( jar, config.PARAMS["subsampleppileup"], ppileup, out_dir, log=log) signatures = popoolationte2_signatures( jar, config.PARAMS["identifySignatures"], ppileup, out_dir, log=log) signatures = popoolationte2_strand(jar, config.PARAMS["updateStrand"], signatures, bam, taxonomy, out_dir, log=log) signatures = popoolationte2_frequency(jar, ppileup, signatures, out_dir, log=log) te_insertions = popoolationte2_pairup( jar, config.PARAMS["pairupSignatures"], signatures, ref_fasta, taxonomy, out_dir, log=log) mccutils.remove(out_dir + "/tmp") mccutils.check_file_exists(snakemake.output[0]) with open(status_log, "w") as l: l.write("COMPLETED\n") mccutils.log("popoolationte2", "popoolationte2 run complete") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log, "a") as l: print(track, file=l) mccutils.log("popoolationte2", "popoolationte2 run failed") with open(status_log, "w") as l: l.write("FAILED\n") mccutils.run_command(["touch", snakemake.output[0]]) else: mccutils.run_command(["touch", snakemake.output[0]])
def main(): te_gff = snakemake.input.te_gff sam = snakemake.input.sam ref_fasta = snakemake.input.ref median_insert_size_file = snakemake.input.median_insert_size log = snakemake.params.log status_log = snakemake.params.status_log mccutils.log("te-locate", "running TE-Locate", log=log) with open(log, "a") as l: l.write("TE GFF: " + te_gff + "\n") l.write("SAM: " + sam + "\n") l.write("reference fasta: " + ref_fasta + "\n") telocate = snakemake.params.run_script out_dir = snakemake.params.out_dir try: # ensures intermediate files from previous runs are removed for f in os.listdir(out_dir): mccutils.remove(out_dir + "/" + f) sam_dir = out_dir + "/sam/" mccutils.mkdir(sam_dir) te_locate_sam = sam_dir + "te-locate.sam" if os.path.exists(te_locate_sam): os.remove(te_locate_sam) os.symlink(sam, te_locate_sam) os.chdir(os.path.dirname(telocate)) median_insert_size = mccutils.get_median_insert_size( median_insert_size_file) distance = (median_insert_size * config.PARAMS["min_distance"]) command = [ "perl", telocate, str(config.PARAMS["max_mem"]), sam_dir, te_gff, ref_fasta, out_dir, str(distance), str(config.PARAMS["min_support_reads"]), str(config.PARAMS["min_support_individuals"]) ] mccutils.run_command(command, log=log) mccutils.check_file_exists(out_dir + "_" + str(distance) + "_reads3_acc1.info") mccutils.run_command([ "cp", out_dir + "_" + str(distance) + "_reads3_acc1.info", out_dir + "te-locate-raw.info" ]) mccutils.log("te-locate", "TE-Locate complete") with open(status_log, "w") as l: l.write("COMPLETED\n") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log, "a") as l: print(track, file=l) mccutils.log("telocate", "TE-locate run failed") with open(status_log, "w") as l: l.write("FAILED\n") mccutils.run_command(["touch", snakemake.output[0]])
def main(): mccutils.log("popoolationte", "running PopoolationTE") ref_fasta = snakemake.input.ref_fasta taxonomy = snakemake.input.taxonomy te_gff = snakemake.input.te_gff fq1 = snakemake.input.fq1 fq2 = snakemake.input.fq2 sam = snakemake.input.sam log = snakemake.params.log status_log = snakemake.params.status_log with open(log, "a") as l: l.write("reference fasta: " + ref_fasta + "\n") l.write("Taxonomy TSV: " + taxonomy + "\n") l.write("TE GFF: " + te_gff + "\n") l.write("fastq1: " + fq1 + '\n') l.write("fastq2: " + fq2 + "\n") l.write("SAM: " + sam + "\n") out_dir = snakemake.params.out_dir sample_name = snakemake.params.sample_name script_dir = snakemake.params.script_dir prev_step_succeeded = mccutils.check_status_file(status_log) if prev_step_succeeded: try: mccutils.log("popoolationte", "getting read length") read_length = get_read_length(fq1, fq2) mccutils.log("popoolationte", "calculating median insert size") median_insert_size = get_median_insert_size(sam) max_dist = int(median_insert_size * 3) + read_length mccutils.log("popoolationte", "converting TE gff to PoPoolationTE known TE file") known_inserts = make_known_insert_file(te_gff, out_dir) mccutils.log("popoolationte", "running the PoPoolationTE workflow scripts") run_popoolationte(sam, ref_fasta, taxonomy, read_length, median_insert_size, max_dist, known_inserts, script_dir, out_dir, config.PARAMS, log=log) mccutils.check_file_exists(snakemake.output[0]) with open(status_log, "w") as l: l.write("COMPLETED\n") mccutils.log("popoolationte", "popoolationte run complete") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log, "a") as l: print(track, file=l) with open(status_log, "w") as l: l.write("FAILED\n") mccutils.run_command(["touch", snakemake.output[0]]) else: mccutils.run_command(["touch", snakemake.output[0]])
def main(): consensus_fasta = snakemake.input.consensus_fasta reference_fasta = snakemake.input.reference_fasta fastq1 = snakemake.input.fastq1 fastq2 = snakemake.input.fastq2 locations = snakemake.input.locations log = snakemake.params.log with open(log, "a") as l: l.write("consensus fasta: " + consensus_fasta + "\n") l.write("reference fasta: " + reference_fasta + "\n") l.write("fastq1: " + fastq1 + "\n") l.write("fastq2: " + fastq2 + "\n") threads = snakemake.threads sample_name = snakemake.params.sample_name script_dir = snakemake.params.script_dir out_dir = snakemake.params.out_dir status_log = snakemake.params.status_log out_bed_nonref = snakemake.output[0] out_bed_ref = snakemake.output[1] try: # ensures intermediate files from previous runs are removed for f in os.listdir(out_dir): mccutils.remove(out_dir + "/" + f) is_paired = True if snakemake.params.raw_fq2 == "None": is_paired = False command = [ 'python', script_dir + "/ngs_te_mapper2.py", "-r", reference_fasta, "-l", consensus_fasta, "-t", str(threads), "-o", out_dir, "--keep_files", "-p", sample_name, "-a", locations, ] for key in config.PARAMS.keys(): command.append(key) command.append(str(config.PARAMS[key])) command.append("-f") if is_paired: command.append(fastq1 + "," + fastq2) else: command.append(fastq1) mccutils.log("ngs_te_mapper2", "running ngs_te_mapper2", log=log) mccutils.run_command(command, log=log) mccutils.check_file_exists(out_bed_ref) mccutils.check_file_exists(out_bed_nonref) with open(status_log, "w") as l: l.write("COMPLETED\n") mccutils.log("ngs_te_mapper2", "ngs_te_mapper2 run complete", log=log) mccutils.log("ngs_te_mapper2", "ngs_te_mapper2 run complete") except Exception as e: mccutils.log("ngs_te_mapper2", "ngs_te_mapper2 run failed", log=log) mccutils.log("ngs_te_mapper2", "ngs_te_mapper2 run failed") track = traceback.format_exc() print(track, file=sys.stderr) with open(log, "a") as l: print(track, file=l) with open(status_log, "w") as l: l.write("FAILED\n") mccutils.run_command(["touch", out_bed_ref]) mccutils.run_command(["touch", out_bed_nonref])
def main(): sample_name = snakemake.params.sample_name log = snakemake.params.log raw_fq2 = snakemake.params.raw_fq2 is_paired = True if raw_fq2 == "None": is_paired = False script_dir = snakemake.params.script_dir out_dir = snakemake.params.out_dir status_log = snakemake.params.status_log out_gff = snakemake.output[0] try: # ensures intermediate files from previous runs are removed for f in os.listdir(out_dir): mccutils.remove(out_dir + "/" + f) mccutils.log("relocate", "running RelocaTE", log=log) input_dir = snakemake.params.out_dir + "/input/" mccutils.remove(input_dir) mccutils.mkdir(input_dir) fq_dir = input_dir + "fastq/" mccutils.mkdir(fq_dir) consensus_fasta = input_dir + "consensus.fasta" te_gff = input_dir + "te.gff" reference_fasta = input_dir + "reference.fasta" uniq_id = str(random.randint(10000, 99999)) while uniq_id in fq_dir: mccutils.log("relocate", "unique id: " + uniq_id + " occurs in file path... selecting a new one...", log=log) uniq_id = str(random.randint(10000, 99999)) fq1_uniq_id = uniq_id + "_mcc_relocate_1" fq2_uniq_id = uniq_id + "_mcc_relocate_2" unpaired_id = uniq_id + "_unPaired" os.symlink(snakemake.input.consensus_fasta, consensus_fasta) os.symlink(snakemake.input.te_gff, te_gff) os.symlink(snakemake.input.reference_fasta, reference_fasta) if is_paired: os.symlink(snakemake.input.fq1, fq_dir + sample_name + "." + fq1_uniq_id + ".fq") os.symlink(snakemake.input.fq2, fq_dir + sample_name + "." + fq2_uniq_id + ".fq") else: os.symlink(snakemake.input.fq1, fq_dir + sample_name + "." + unpaired_id + ".fq") annotation = make_annotation_file(te_gff, out_dir) os.chdir(out_dir) command = [ "perl", script_dir + "/relocaTE.pl", "-t", consensus_fasta, "-d", fq_dir, "-g", reference_fasta, "-o", ".", "-r", annotation ] for param in config.PARAMS.keys(): command.append(param) command.append(str(config.PARAMS[param])) if is_paired: command += ["-1", fq1_uniq_id, "-2", fq2_uniq_id] else: command += ["-u", unpaired_id] mccutils.run_command(command, log=log) combine_gffs(out_dir, out_gff) mccutils.check_file_exists(out_gff) mccutils.log("relocate", "RelocaTE run complete") with open(status_log, "w") as l: l.write("COMPLETED\n") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log, "a") as l: print(track, file=l) mccutils.log("relocate", "RelocaTE run failed") with open(status_log, "w") as l: l.write("FAILED\n") mccutils.run_command(["touch", snakemake.output[0]])
def main(): consensus_fasta = snakemake.input.consensus_fasta reference_fasta = snakemake.input.reference_fasta fastq1 = snakemake.input.fastq1 fastq2 = snakemake.input.fastq2 status_log = snakemake.params.status_log log = snakemake.params.log try: with open(log,"a") as l: l.write("consensus fasta: "+consensus_fasta+"\n") l.write("reference fasta: "+reference_fasta+"\n") l.write("fastq1: "+fastq1+"\n") l.write("fastq2: "+fastq2+"\n") threads = snakemake.threads sample_name = snakemake.params.sample_name script_dir = snakemake.params.script_dir out_dir = snakemake.params.out_dir out_bed = snakemake.output[0] # ensures intermediate files from previous runs are removed for f in os.listdir(out_dir): mccutils.remove(out_dir+"/"+f) is_paired = True if snakemake.params.raw_fq2 == "None": is_paired = False command = ['Rscript', "--vanilla", script_dir+"/ngs_te_mapper.R", "genome="+reference_fasta, "teFile="+consensus_fasta, "tsd="+str(config.PARAMS["tsd="]), "thread="+str(threads), "output="+out_dir, "sourceCodeFolder="+script_dir] if is_paired: command.append("sample="+fastq1+";"+fastq2) else: command.append("sample="+fastq1) mccutils.log("ngs_te_mapper","running ngs_te_mapper", log=log) mccutils.run_command(command, log=log) mccutils.log("ngs_te_mapper","ngs_te_mapper run complete", log=log) raw_bed = "" for f in os.listdir(out_dir+"/bed_tsd/"): if "insertions.bed" in f: raw_bed = out_dir+"/bed_tsd/"+f mccutils.check_file_exists(raw_bed) mccutils.run_command(["cp", raw_bed, out_bed]) mccutils.log("ngs_te_mapper","ngs_te_mapper run complete") with open(status_log,"w") as l: l.write("COMPLETED\n") except Exception as e: track = traceback.format_exc() print(track, file=sys.stderr) with open(log,"a") as l: print(track, file=l) mccutils.log("ngs_te_mapper","ngs_te_mapper run failed") with open(status_log,"w") as l: l.write("FAILED\n") mccutils.mkdir(out_dir+"/bed_tsd/") mccutils.run_command(["touch", out_bed])