def main():
    log = snakemake.params.log
    tmp_dir = snakemake.params.tmp_dir
    mccutils.mkdir(tmp_dir + "/telocate")

    mccutils.log("processing", "making TE-locate taxonomy file", log=log)
    try:
        mccutils.run_command(
            ["cp", snakemake.input.ref_gff, "telocate_locations.gff"])
        mccutils.run_command(
            ["cp", snakemake.input.taxonomy, "telocate_taxonomy.tsv"])
        command = [
            "perl", snakemake.input.script, "telocate_locations.gff",
            "telocate_taxonomy.tsv", "Alias"
        ]
        mccutils.run_command(command, log=log)
        mccutils.run_command(
            ["cp", "telocate_locations_HL.gff", snakemake.output[0]])
        mccutils.check_file_exists(snakemake.output[0])

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...unable to produce TE-locate taxonomy file using",
              snakemake.input.script,
              file=sys.stderr)
        sys.exit(1)

    mccutils.log("processing", "TE-locate taxonomy file created")
Example #2
0
def discover_variants(ref_name,
                      bam,
                      split_bam,
                      te_bed,
                      out,
                      threads=1,
                      log=None):
    try:
        os.chdir(out)
        command = [
            "tepid-discover", "-p",
            str(threads), "-n", ref_name, "-c", bam, "-s", split_bam, "-t",
            te_bed
        ]

        mccutils.run_command(command, log=log)

        if not os.path.exists(snakemake.output[0]):
            mccutils.run_command(["touch", snakemake.output[0]])
        if not os.path.exists(snakemake.output[2]):
            mccutils.run_command(["touch", snakemake.output[2]])
        mccutils.check_file_exists(snakemake.output[1])

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...Failed to run TEPID discover step...exiting...",
              file=sys.stderr)
        sys.exit(1)
Example #3
0
def run_trim_galore(fq1, run_id, log, out, fq2=None, cores=1, flags=[]):
    mccutils.mkdir(out+"/results/")
    command = ['trim_galore'] + flags + ["-j", str(cores), "-o", out+"/results/trimgalore"]
    if fq2 is None:
        command.append(fq1)
    else:
        command += [fq1, fq2]
    
    mccutils.run_command(command, log=log)

    if fq2 is None:
        outfq = ""
        for f in os.listdir(out+"/results/trimgalore"):
            if "_trimmed.fq" in f:
                outfq = out+"/results/trimgalore/"+f

        file_exists = mccutils.check_file_exists(outfq)
        return outfq

    else:
        outfq1 = ""
        outfq2 = ""
        for f in os.listdir(out+"/results/trimgalore"):
            if "_val_1.fq" in f:
                outfq1 = out+"/results/trimgalore/"+f
            elif "_val_2.fq" in f:
                outfq2= out+"/results/trimgalore/"+f

        file_exists = mccutils.check_file_exists(outfq1)
        file_exists = mccutils.check_file_exists(outfq2)
        return outfq1, outfq2
Example #4
0
def map_reads(fq1,
              fq2,
              ref_name,
              median_insert_size,
              out,
              threads=1,
              paired=True,
              log=None):
    os.chdir(out)
    if paired:
        command = [
            "tepid-map", "-x", out + "/" + ref_name, "-y",
            out + "/" + ref_name + ".X15_01_65525S", "-p",
            str(threads), "-s", median_insert_size, "-n", ref_name, "-1", fq1,
            "-2", fq2
        ]
    else:
        command = [
            "tepid-map-se", "-x", out + "/" + ref_name, "-y",
            out + "/" + ref_name + ".X15_01_65525S", "-p",
            str(threads), "-n", ref_name, "-q", fq1
        ]

    mccutils.run_command(command, log=log)

    bam = out + "/" + ref_name + ".bam"
    split_bam = out + "/" + ref_name + ".split.bam"
    mccutils.check_file_exists(bam)
    mccutils.check_file_exists(split_bam)

    return bam, split_bam
def main():
    mccutils.log("popoolationte2", "setting up for PopoolationTE2")
    ref_fasta = snakemake.input.ref_fasta
    fq1 = snakemake.input.fq1
    fq2 = snakemake.input.fq2
    jar = snakemake.params.jar
    log = snakemake.params.log
    out_dir = snakemake.params.out_dir
    threads = snakemake.threads
    status_log = snakemake.params.status_log

    try:
        # ensures intermediate files from previous runs are removed
        for f in os.listdir(out_dir):
            mccutils.remove(out_dir + "/" + f)

        mccutils.mkdir(out_dir + "/tmp")
        index_fasta(ref_fasta, log=log)
        fq1 = format_fastq(fq1, out_dir + "/reads_1.fastq", log=log)
        fq2 = format_fastq(fq2, out_dir + "/reads_2.fastq", log=log)
        sam1 = map_reads(ref_fasta,
                         fq1,
                         out_dir + "/mapped_1.sam",
                         threads=threads,
                         log=log)
        sam2 = map_reads(ref_fasta,
                         fq2,
                         out_dir + "/mapped_2.sam",
                         threads=threads,
                         log=log)
        bam = sam_to_bam(jar,
                         fq1,
                         fq2,
                         sam1,
                         sam2,
                         snakemake.output.bam,
                         out_dir,
                         threads=threads,
                         log=log)
        mccutils.remove(out_dir + "/tmp")

        mccutils.check_file_exists(snakemake.output.bam)
        with open(status_log, "w") as l:
            l.write("COMPLETED\n")

        mccutils.log("popoolationte2", "PopoolationTE2 preprocessing complete")

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        with open(log, "a") as l:
            print(track, file=l)
        mccutils.log("popoolationte2", "popoolationte2 preprocessing failed")
        with open(status_log, "w") as l:
            l.write("FAILED\n")

        mccutils.run_command(["touch", snakemake.output.bam])
Example #6
0
def run_workflow(args, sample_name, run_id, debug=False):
    log = args.out + "/mcclintock." + str(run_id) + ".log"

    results_dir = args.out + "/results/"
    input_dir = args.out + "/method_input/"
    out_files = config.OUT_PATHS
    for key in out_files.keys():
        out_files[key] = out_files[key].replace(config.INPUT_DIR, input_dir)
        out_files[key] = out_files[key].replace(config.RESULTS_DIR,
                                                results_dir)
        out_files[key] = out_files[key].replace(config.SAMPLE_NAME,
                                                sample_name)

    path = os.path.dirname(os.path.abspath(__file__))
    mccutils.mkdir(args.out + "/snakemake")
    snakemake_path = args.out + "/snakemake/" + str(run_id)
    mccutils.mkdir(snakemake_path)
    mccutils.run_command(["cp", path + "/Snakefile", snakemake_path])
    os.chdir(snakemake_path)
    command = [
        "snakemake", "--use-conda", "--conda-prefix",
        path + "/install/envs/conda"
    ]
    if not debug:
        command.append("--quiet")
    else:
        command.append("--reason")

    command += [
        "--configfile",
        args.out + "/snakemake/config/config_" + str(run_id) + ".json"
    ]
    command += ["--cores", str(args.proc)]

    if args.clean:
        clean_command = command + ["--delete-all-output"]
        mccutils.run_command(clean_command)
        mccutils.remove(args.out + "/input")

    for method in args.methods:
        command.append(out_files[method])

    command.append(args.out + "/results/summary/summary_report.txt")

    # print(" ".join(command))
    try:
        mccutils.run_command(command)
        mccutils.check_file_exists(args.out +
                                   "/results/summary/summary_report.txt")
    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print(
            "McClintock Pipeline Failed... please open an issue at https://github.com/bergmanlab/mcclintock/issues if you are having trouble using McClintock",
            file=sys.stderr)
        sys.exit(1)
    mccutils.remove(args.out + "/tmp")
Example #7
0
def repeat_mask(reference, te_fasta, chromosomes, procs, run_id, log, out):
    try:
        outdir = out + "/tmp/repeatmasker_" + run_id
        mccutils.mkdir(outdir)
        os.chdir(outdir)
        command = [
            "RepeatMasker", "-pa",
            str(procs), "-lib", te_fasta, "-dir", outdir, "-s", "-gff",
            "-nolow", "-no_is", reference
        ]
        mccutils.run_command(command, log=log)
        os.chdir(out)

        # RepeatMasker appears to override the custom database names during the ProcessRepeats
        # this step changes them back, more rules may be needed for other reference genomes
        ref_name = os.path.basename(reference)
        repeatmasker_gff = outdir + "/" + ref_name + ".out.gff"
        formatted_ref_tes = out + "/tmp/" + run_id + "tmpreferenceTEs.gff"
        with open(repeatmasker_gff, "r") as rmgff:
            with open(formatted_ref_tes, "w") as outgff:
                for line in rmgff:
                    if "#" not in line:
                        line = line.replace("McClintock-int", "McClintock")
                        line = line.replace("POGON1", "pogo")
                        split_line = line.split("\t")
                        feats = split_line[8]
                        if split_line[0] in chromosomes:
                            te = feats.split(" ")[1]
                            te = te.replace('"', '').split(":")[1]
                            feats = ";".join(
                                ["ID=" + te, "Name=" + te, "Alias=" + te])
                            split_line[2] = te
                            split_line[8] = feats
                            line = "\t".join(split_line)

                            outgff.write(line + '\n')

        masked_fasta = outdir + "/" + ref_name + ".masked"
        fasta_lines = fix_fasta.fix_fasta_lines(masked_fasta, 80)

        mccutils.check_file_exists(formatted_ref_tes)

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...Failed to run repeatmasker on: ",
              reference,
              "with lib:",
              te_fasta,
              "check file formatting...exiting...",
              file=sys.stderr)
        sys.exit(1)

    return formatted_ref_tes
Example #8
0
def main():
    fq1 = snakemake.input.fq1
    fq2 = snakemake.input.fq2
    bam = snakemake.input.bam
    reference = snakemake.input.reference
    twobit = snakemake.input.twobit
    consensus = snakemake.input.consensus
    ref_te_bed = snakemake.input.ref_te_bed
    taxonomy = snakemake.input.taxonomy
    median_insert_size_file = snakemake.input.median_insert_size
    log = snakemake.params.log

    with open(log,"a") as l:
        l.write("BAM: "+bam+"\n")
        l.write("2bit: "+twobit+"\n")
        l.write("consensus fasta: "+consensus+"\n")
        l.write("reference TE BED: "+ref_te_bed+"\n")
        l.write("Taxonomy TSV: "+taxonomy+"\n")

    threads = snakemake.threads
    out_dir = snakemake.params.out_dir
    script_dir = snakemake.params.script_dir
    sample_name = snakemake.params.sample_name
    status_log = snakemake.params.status_log

    # ensures intermediate files from previous runs are removed
    for f in os.listdir(out_dir):
        mccutils.remove(out_dir+"/"+f)
    
    mccutils.log("temp2","running TEMP2 Module")

    try:
        median_insert_size = get_median_insert_size(median_insert_size_file)
        run_temp2_insertion(fq1, fq2, bam, median_insert_size, reference, script_dir, consensus, ref_te_bed, threads, out_dir, config, log)
        run_temp2_absence(script_dir, bam, twobit, ref_te_bed, median_insert_size, threads, out_dir+"/absence", config, log)
        mccutils.run_command(["cp", out_dir+'/absence/'+sample_name+".absence.refined.bp.summary", out_dir], log=log)

        mccutils.check_file_exists(snakemake.output[0])
        mccutils.check_file_exists(snakemake.output[1])
        with open(status_log,"w") as l:
            l.write("COMPLETED\n")
        mccutils.log("temp2","TEMP2 run complete")
    
    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        with open(log,"a") as l:
            print(track, file=l)
        mccutils.log("temp2","TEMP2 run failed")
        with open(status_log,"w") as l:
            l.write("FAILED\n")

        mccutils.run_command(["touch", snakemake.output[0]])
        mccutils.run_command(["touch", snakemake.output[1]])
Example #9
0
def main():
    mccutils.log("teflon", "Running TEFLoN")

    consensus = snakemake.input.consensus
    reference_genome = snakemake.input.reference_genome
    ref_bed = snakemake.input.ref_bed
    teflon_taxonomy = snakemake.input.teflon_taxonomy
    bam = snakemake.input.bam

    threads = snakemake.threads
    out_dir = snakemake.params.out_dir
    script_dir = snakemake.params.script_dir
    log = snakemake.params.log
    status_log = snakemake.params.status_log

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    if prev_steps_succeeded:
        try:
            sample_table = make_sample_table(out_dir, bam)
            run_teflon(script_dir,
                       out_dir,
                       sample_table,
                       threads=threads,
                       log=log,
                       quality_threshold=config.PARAMS['-q'],
                       stdev=config.PARAMS['-sd'],
                       cov=config.PARAMS['-cov'],
                       te_support1=config.PARAMS['-n1'],
                       te_support2=config.PARAMS['-n2'],
                       read_count_lower_threshold=config.PARAMS['-lt'],
                       read_count_higher_threshold=config.PARAMS['-ht'])

            mccutils.check_file_exists(snakemake.output[0])
            with open(status_log, "w") as l:
                l.write("COMPLETED\n")

        except Exception as e:
            track = traceback.format_exc()
            print(track, file=sys.stderr)
            with open(log, "a") as l:
                print(track, file=l)
            mccutils.log("teflon", "teflon run failed")
            with open(status_log, "w") as l:
                l.write("FAILED\n")

            mccutils.run_command(["touch", snakemake.output[0]])

    else:
        mccutils.run_command(["touch", snakemake.output[0]])
Example #10
0
def main():
    mccutils.log("processing",
                 "mapping reads to reference",
                 log=snakemake.log[0])

    try:
        command = ["bwa", "mem"]
        if eval(snakemake.config['args']['save_comments']):
            command.append("-C")

        command += [
            "-t",
            str(snakemake.threads), "-R", "@RG\\tID:" +
            snakemake.params.sample + "\\tSM:" + snakemake.params.sample,
            snakemake.input.ref, snakemake.input.fq1
        ]

        if snakemake.config['in']['fq2'] != "None":
            command.append(snakemake.input.fq2)

        mccutils.run_command_stdout(command,
                                    snakemake.output[0],
                                    log=snakemake.log[0])

        mccutils.check_file_exists(snakemake.output[0])

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        if snakemake.config['in']['fq2'] == "None":
            print(
                "ERROR...unable to map reads (bwa mem) using reference fasta:",
                snakemake.input.ref,
                "and reads:",
                snakemake.input.fq1,
                file=sys.stderr)
        else:
            print(
                "ERROR...unable to map reads (bwa mem) using reference fasta:",
                snakemake.input.ref,
                "and reads:",
                snakemake.input.fq1,
                snakemake.input.fq2,
                file=sys.stderr)
        sys.exit(1)

    mccutils.log("processing", "read mapping complete")
def main():
    try:
        log = snakemake.params.log
        mccutils.log("processing","making samtools and bwa index files for reference fasta", log=log)
        mccutils.run_command(["samtools", "faidx", snakemake.input.ref],log=log)
        mccutils.run_command(["bwa", "index", snakemake.input.ref], log=log)

        for out in snakemake.output:
            mccutils.check_file_exists(out)
        
        mccutils.log("processing","samtools and bwa index files for reference fasta created")
    
    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...unable to index (samtools, bwa) reference fasta, please check the formatting of:", snakemake.input.ref, file=sys.stderr)
        sys.exit(1) 
Example #12
0
def index_ref(fasta, ref_name, out, log=None):
    try:
        os.chdir(out)
        fasta_no_path = fasta.split("/")[-1]
        fasta_copy = out + "/" + fasta_no_path
        mccutils.run_command(["cp", fasta, fasta_copy])

        mccutils.run_command(["bowtie2-build", fasta_copy, ref_name], log=log)
        mccutils.run_command(["yaha", "-g", fasta_copy], log=log)

        mccutils.check_file_exists(out + "/" + ref_name + ".X15_01_65525S")

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...Failed to index reference fasta:" + fasta_copy +
              " ...exiting...",
              file=sys.stderr)
        sys.exit(1)
Example #13
0
def main():
    mccutils.log("teflon","setting up for TEFLoN")

    te_gff = snakemake.input.te_gff
    taxonomy = snakemake.input.taxonomy
    consensus = snakemake.input.consensus
    reference_genome = snakemake.input.reference_genome
    fq1 = snakemake.input.fq1
    fq2 = snakemake.input.fq2

    threads = snakemake.threads
    out_dir = snakemake.params.out_dir
    script_dir = snakemake.params.script_dir
    log = snakemake.params.log

    ref_bed = snakemake.output.ref_bed
    teflon_taxonomy = snakemake.output.teflon_taxonomy
    status_log = snakemake.params.status_log

    try:
        make_reference_bed(te_gff, ref_bed)
        make_taxonomy_file(taxonomy, teflon_taxonomy)
        prep_annotations(script_dir, out_dir, ref_bed, teflon_taxonomy, consensus, reference_genome, log=log)
        map_reads(out_dir, fq1, fq2, threads=threads, log=log)
        mccutils.check_file_exists(snakemake.output[0])
        with open(status_log,"w") as l:
            l.write("COMPLETED\n")
        
        mccutils.log("teflon","setup for TEFLoN complete")

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        with open(log,"a") as l:
            print(track, file=l)
        mccutils.log("teflon","teflon preprocessing failed")
        with open(status_log,"w") as l:
            l.write("FAILED\n")
        
        mccutils.run_command(["touch", snakemake.output[0]])
        mccutils.run_command(["touch", snakemake.output[1]])
        mccutils.run_command(["touch", snakemake.output[2]])
Example #14
0
def discover_variants(ref_name,
                      bam,
                      split_bam,
                      te_bed,
                      out,
                      threads=1,
                      log=None):
    os.chdir(out)
    command = [
        "tepid-discover", "-p",
        str(threads), "-n", ref_name, "-c", bam, "-s", split_bam, "-t", te_bed
    ]

    mccutils.run_command(command, log=log)

    if not os.path.exists(snakemake.output[0]):
        mccutils.run_command(["touch", snakemake.output[0]])
    if not os.path.exists(snakemake.output[2]):
        mccutils.run_command(["touch", snakemake.output[2]])
    mccutils.check_file_exists(snakemake.output[1])
Example #15
0
def main():
    log = snakemake.params.log
    mccutils.log("processing", "making TE-locate taxonomy file", log=log)
    try:
        command = [
            "perl", snakemake.input.script, snakemake.input.ref_gff,
            snakemake.input.taxonomy, "Alias"
        ]
        mccutils.run_command(command, log=log)
        mccutils.check_file_exists(snakemake.output[0])

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...unable to produce TE-locate taxonomy file using",
              snakemake.input.script,
              file=sys.stderr)
        sys.exit(1)

    mccutils.log("processing", "TE-locate taxonomy file created")
Example #16
0
def map_reads(fq1,
              fq2,
              ref_name,
              median_insert_size,
              out,
              threads=1,
              paired=True,
              log=None):
    try:
        os.chdir(out)
        if paired:
            command = [
                "tepid-map", "-x", out + "/" + ref_name, "-y",
                out + "/" + ref_name + ".X15_01_65525S", "-p",
                str(threads), "-s", median_insert_size, "-n", ref_name, "-1",
                fq1, "-2", fq2
            ]
        else:
            command = [
                "tepid-map-se", "-x", out + "/" + ref_name, "-y",
                out + "/" + ref_name + ".X15_01_65525S", "-p",
                str(threads), "-n", ref_name, "-q", fq1
            ]

        mccutils.run_command(command, log=log)

        bam = out + "/" + ref_name + ".bam"
        split_bam = out + "/" + ref_name + ".split.bam"
        mccutils.check_file_exists(bam)
        mccutils.check_file_exists(split_bam)

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...Failed to run TEPID mapping step...exiting...",
              file=sys.stderr)
        sys.exit(1)

    return bam, split_bam
Example #17
0
def mask_reference(reference, ref_tes_gff, run_id, log, out):
    try:
        masked_reference = out + "/tmp/" + run_id + "tmpmaskedreference.fasta"
        command = [
            "bedtools", "maskfasta", "-fi", reference, "-fo", masked_reference,
            "-bed", ref_tes_gff
        ]
        mccutils.run_command(command, log=log)

        mccutils.check_file_exists(masked_reference)
    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print(
            "ERROR...Failed to mask repeats (bedtools maskfasta) in reference: ",
            reference,
            " using repeat file:",
            ref_tes_gff,
            "check file formatting...exiting...",
            file=sys.stderr)
        sys.exit(1)

    return masked_reference
Example #18
0
def get_ref_te_fasta(reference, ref_tes_gff, run_id, log, out):
    try:
        ref_te_fasta = out + "/tmp/" + run_id + "tmpreferencetes.fasta"
        command = [
            "bedtools", "getfasta", "-name", "-fi", reference, "-bed",
            ref_tes_gff, "-fo", ref_te_fasta
        ]
        mccutils.run_command(command, log=log)
        mccutils.check_file_exists(ref_te_fasta)

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print(
            "ERROR...Failed to create TE fasta (bedtools getfasta) using reference:",
            reference,
            " and TE annotations:",
            ref_tes_gff,
            "check file formatting...exiting...",
            file=sys.stderr)
        sys.exit(1)

    return ref_te_fasta
Example #19
0
def main():
    log = snakemake.params.log
    mccutils.log("processing","Converting sam to bam", log=log)

    try:
        command = ["samtools","view", "-@", str(snakemake.threads), "-Sb", "-t", snakemake.input.ref_idx, snakemake.input.sam]
        mccutils.run_command_stdout(command, snakemake.output.tmp_bam, log=log)
        mccutils.check_file_exists(snakemake.output.tmp_bam)

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...unable convert sam to bam using SAMtools...sam file:", snakemake.input.sam, file=sys.stderr)
        sys.exit(1)


    try:
        command = ["samtools", "sort", "-@", str(snakemake.threads), snakemake.output.tmp_bam, snakemake.output.bam.replace(".bam", "")]
        mccutils.run_command(command, log=log)
        mccutils.check_file_exists(snakemake.output.bam)
    
    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...falied to sort the bam file using samtools sort...bam file:", snakemake.output.tmp_bam, file=sys.stderr)
        sys.exit(1)

    try:
        command = ["samtools", "index", snakemake.output.bam]
        mccutils.run_command(command, log=log)

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...falied to index the bam file using samtools index...bam file:", snakemake.output.bam, file=sys.stderr)
        sys.exit(1)


    try:
        command = ["samtools", "flagstat", snakemake.output.bam]
        mccutils.run_command_stdout(command, snakemake.output.flagstat, log=log)
        mccutils.check_file_exists(snakemake.output.flagstat)
    
    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        print("ERROR...falied to generate flagstat file using samtools flagstat...bam file:", snakemake.output.bam, file=sys.stderr)
        sys.exit(1)
    
    mccutils.log("processing","sam to bam converted")
Example #20
0
def main():
    sample_name = snakemake.params.sample_name
    threads = snakemake.threads
    out_dir = snakemake.params.out_dir
    median_insert_size_file = snakemake.input.median_insert_size
    log = snakemake.params.log
    status_log = snakemake.params.status_log

    try:
        # ensures intermediate files from previous runs are removed
        for f in os.listdir(out_dir):
            mccutils.remove(out_dir + "/" + f)

        is_paired = True
        if snakemake.params.raw_fq2 == "None":
            is_paired = False

        input_dir = snakemake.params.out_dir + "/input/"
        mccutils.remove(input_dir)
        mccutils.mkdir(input_dir)
        fq_dir = snakemake.params.out_dir + "/input/fastq/"
        mccutils.mkdir(fq_dir)

        reference = input_dir + "reference.fasta"
        te_seqs = input_dir + "consensus.fasta"
        rm_out = input_dir + "repeatmasker.out"

        os.symlink(snakemake.input.reference, reference)
        os.symlink(snakemake.input.te_seqs, te_seqs)
        os.symlink(snakemake.input.rm_out, rm_out)

        if is_paired:
            fq1 = fq_dir + sample_name + "_1.fq"
            fq2 = fq_dir + sample_name + "_2.fq"
            os.symlink(snakemake.input.fq1, fq1)
            os.symlink(snakemake.input.fq2, fq2)
        else:
            fq1 = fq_dir + sample_name + ".unPaired.fq"
            os.symlink(snakemake.input.fq1, fq1)

        median_insert_size = get_median_insert_size(median_insert_size_file)
        output = subprocess.Popen(["which", "relocaTE2.py"],
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
        script = output.stdout.read()
        script = script.decode()
        script = script.replace("\n", "")

        mccutils.log("relocate2", "running RelocaTE2", log=log)
        command = [
            "python2", script, "-t", te_seqs, "-g", reference, "-r", rm_out,
            "-o", out_dir, "-s",
            str(median_insert_size), "--run", "-v", "4", "-c",
            str(threads), "-d", fq_dir
        ]

        for param in config.PARAMS.keys():
            command.append(param)
            command.append(str(config.PARAMS[param]))

        if is_paired:
            command += ["-1", "_1", "-2", "_2"]

        else:
            command += ["-u", ".unPaired"]

        mccutils.run_command(command, log=log)

        mccutils.check_file_exists(snakemake.output[0])
        mccutils.check_file_exists(snakemake.output[1])
        with open(status_log, "w") as l:
            l.write("COMPLETED\n")
        mccutils.log("relocate2", "RelocaTE2 run complete")

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        with open(log, "a") as l:
            print(track, file=l)
        mccutils.log("relocate2", "RelocaTE2 run failed")
        with open(status_log, "w") as l:
            l.write("FAILED\n")

        mccutils.run_command(["touch", snakemake.output[0]])
        mccutils.run_command(["touch", snakemake.output[1]])
def main():
    mccutils.log("popoolationte2", "running PopoolationTE2")
    ref_fasta = snakemake.input.ref_fasta
    bam = snakemake.input.bam
    taxonomy = snakemake.input.taxonomy
    jar = snakemake.params.jar
    out_dir = snakemake.params.out_dir
    sample_name = snakemake.params.sample_name
    log = snakemake.params.log
    status_log = snakemake.params.status_log

    prev_step_succeeded = mccutils.check_status_file(status_log)

    if prev_step_succeeded:
        try:
            mccutils.mkdir(out_dir + "/tmp")
            taxonomy = format_taxonomy(taxonomy, out_dir)
            ppileup = popoolationte2_ppileup(jar,
                                             config.PARAMS["ppileup"],
                                             bam,
                                             taxonomy,
                                             out_dir,
                                             log=log)
            ppileup = popoolationte2_subsample(
                jar,
                config.PARAMS["subsampleppileup"],
                ppileup,
                out_dir,
                log=log)
            signatures = popoolationte2_signatures(
                jar,
                config.PARAMS["identifySignatures"],
                ppileup,
                out_dir,
                log=log)
            signatures = popoolationte2_strand(jar,
                                               config.PARAMS["updateStrand"],
                                               signatures,
                                               bam,
                                               taxonomy,
                                               out_dir,
                                               log=log)
            signatures = popoolationte2_frequency(jar,
                                                  ppileup,
                                                  signatures,
                                                  out_dir,
                                                  log=log)
            te_insertions = popoolationte2_pairup(
                jar,
                config.PARAMS["pairupSignatures"],
                signatures,
                ref_fasta,
                taxonomy,
                out_dir,
                log=log)
            mccutils.remove(out_dir + "/tmp")
            mccutils.check_file_exists(snakemake.output[0])

            with open(status_log, "w") as l:
                l.write("COMPLETED\n")
            mccutils.log("popoolationte2", "popoolationte2 run complete")

        except Exception as e:
            track = traceback.format_exc()
            print(track, file=sys.stderr)
            with open(log, "a") as l:
                print(track, file=l)
            mccutils.log("popoolationte2", "popoolationte2 run failed")
            with open(status_log, "w") as l:
                l.write("FAILED\n")

            mccutils.run_command(["touch", snakemake.output[0]])

    else:
        mccutils.run_command(["touch", snakemake.output[0]])
Example #22
0
def main():
    te_gff = snakemake.input.te_gff
    sam = snakemake.input.sam
    ref_fasta = snakemake.input.ref
    median_insert_size_file = snakemake.input.median_insert_size
    log = snakemake.params.log
    status_log = snakemake.params.status_log

    mccutils.log("te-locate", "running TE-Locate", log=log)
    with open(log, "a") as l:
        l.write("TE GFF: " + te_gff + "\n")
        l.write("SAM: " + sam + "\n")
        l.write("reference fasta: " + ref_fasta + "\n")

    telocate = snakemake.params.run_script
    out_dir = snakemake.params.out_dir

    try:
        # ensures intermediate files from previous runs are removed
        for f in os.listdir(out_dir):
            mccutils.remove(out_dir + "/" + f)

        sam_dir = out_dir + "/sam/"
        mccutils.mkdir(sam_dir)
        te_locate_sam = sam_dir + "te-locate.sam"
        if os.path.exists(te_locate_sam):
            os.remove(te_locate_sam)
        os.symlink(sam, te_locate_sam)

        os.chdir(os.path.dirname(telocate))

        median_insert_size = mccutils.get_median_insert_size(
            median_insert_size_file)

        distance = (median_insert_size * config.PARAMS["min_distance"])

        command = [
            "perl", telocate,
            str(config.PARAMS["max_mem"]), sam_dir, te_gff, ref_fasta, out_dir,
            str(distance),
            str(config.PARAMS["min_support_reads"]),
            str(config.PARAMS["min_support_individuals"])
        ]

        mccutils.run_command(command, log=log)

        mccutils.check_file_exists(out_dir + "_" + str(distance) +
                                   "_reads3_acc1.info")
        mccutils.run_command([
            "cp", out_dir + "_" + str(distance) + "_reads3_acc1.info",
            out_dir + "te-locate-raw.info"
        ])

        mccutils.log("te-locate", "TE-Locate complete")
        with open(status_log, "w") as l:
            l.write("COMPLETED\n")

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        with open(log, "a") as l:
            print(track, file=l)
        mccutils.log("telocate", "TE-locate run failed")
        with open(status_log, "w") as l:
            l.write("FAILED\n")

        mccutils.run_command(["touch", snakemake.output[0]])
def main():
    mccutils.log("popoolationte", "running PopoolationTE")
    ref_fasta = snakemake.input.ref_fasta
    taxonomy = snakemake.input.taxonomy
    te_gff = snakemake.input.te_gff
    fq1 = snakemake.input.fq1
    fq2 = snakemake.input.fq2
    sam = snakemake.input.sam
    log = snakemake.params.log
    status_log = snakemake.params.status_log

    with open(log, "a") as l:
        l.write("reference fasta: " + ref_fasta + "\n")
        l.write("Taxonomy TSV: " + taxonomy + "\n")
        l.write("TE GFF: " + te_gff + "\n")
        l.write("fastq1: " + fq1 + '\n')
        l.write("fastq2: " + fq2 + "\n")
        l.write("SAM: " + sam + "\n")

    out_dir = snakemake.params.out_dir
    sample_name = snakemake.params.sample_name
    script_dir = snakemake.params.script_dir

    prev_step_succeeded = mccutils.check_status_file(status_log)
    if prev_step_succeeded:
        try:
            mccutils.log("popoolationte", "getting read length")
            read_length = get_read_length(fq1, fq2)
            mccutils.log("popoolationte", "calculating median insert size")
            median_insert_size = get_median_insert_size(sam)
            max_dist = int(median_insert_size * 3) + read_length
            mccutils.log("popoolationte",
                         "converting TE gff to PoPoolationTE known TE file")
            known_inserts = make_known_insert_file(te_gff, out_dir)
            mccutils.log("popoolationte",
                         "running the PoPoolationTE workflow scripts")
            run_popoolationte(sam,
                              ref_fasta,
                              taxonomy,
                              read_length,
                              median_insert_size,
                              max_dist,
                              known_inserts,
                              script_dir,
                              out_dir,
                              config.PARAMS,
                              log=log)

            mccutils.check_file_exists(snakemake.output[0])

            with open(status_log, "w") as l:
                l.write("COMPLETED\n")
            mccutils.log("popoolationte", "popoolationte run complete")

        except Exception as e:
            track = traceback.format_exc()
            print(track, file=sys.stderr)
            with open(log, "a") as l:
                print(track, file=l)
            with open(status_log, "w") as l:
                l.write("FAILED\n")

            mccutils.run_command(["touch", snakemake.output[0]])
    else:
        mccutils.run_command(["touch", snakemake.output[0]])
def main():
    consensus_fasta = snakemake.input.consensus_fasta
    reference_fasta = snakemake.input.reference_fasta
    fastq1 = snakemake.input.fastq1
    fastq2 = snakemake.input.fastq2
    locations = snakemake.input.locations

    log = snakemake.params.log
    with open(log, "a") as l:
        l.write("consensus fasta: " + consensus_fasta + "\n")
        l.write("reference fasta: " + reference_fasta + "\n")
        l.write("fastq1: " + fastq1 + "\n")
        l.write("fastq2: " + fastq2 + "\n")

    threads = snakemake.threads
    sample_name = snakemake.params.sample_name
    script_dir = snakemake.params.script_dir
    out_dir = snakemake.params.out_dir
    status_log = snakemake.params.status_log
    out_bed_nonref = snakemake.output[0]
    out_bed_ref = snakemake.output[1]

    try:
        # ensures intermediate files from previous runs are removed
        for f in os.listdir(out_dir):
            mccutils.remove(out_dir + "/" + f)

        is_paired = True
        if snakemake.params.raw_fq2 == "None":
            is_paired = False

        command = [
            'python',
            script_dir + "/ngs_te_mapper2.py",
            "-r",
            reference_fasta,
            "-l",
            consensus_fasta,
            "-t",
            str(threads),
            "-o",
            out_dir,
            "--keep_files",
            "-p",
            sample_name,
            "-a",
            locations,
        ]

        for key in config.PARAMS.keys():
            command.append(key)
            command.append(str(config.PARAMS[key]))

        command.append("-f")
        if is_paired:
            command.append(fastq1 + "," + fastq2)
        else:
            command.append(fastq1)

        mccutils.log("ngs_te_mapper2", "running ngs_te_mapper2", log=log)
        mccutils.run_command(command, log=log)
        mccutils.check_file_exists(out_bed_ref)
        mccutils.check_file_exists(out_bed_nonref)
        with open(status_log, "w") as l:
            l.write("COMPLETED\n")

        mccutils.log("ngs_te_mapper2", "ngs_te_mapper2 run complete", log=log)
        mccutils.log("ngs_te_mapper2", "ngs_te_mapper2 run complete")

    except Exception as e:
        mccutils.log("ngs_te_mapper2", "ngs_te_mapper2 run failed", log=log)
        mccutils.log("ngs_te_mapper2", "ngs_te_mapper2 run failed")

        track = traceback.format_exc()
        print(track, file=sys.stderr)
        with open(log, "a") as l:
            print(track, file=l)

        with open(status_log, "w") as l:
            l.write("FAILED\n")

        mccutils.run_command(["touch", out_bed_ref])
        mccutils.run_command(["touch", out_bed_nonref])
Example #25
0
def main():

    sample_name = snakemake.params.sample_name
    log = snakemake.params.log
    raw_fq2 = snakemake.params.raw_fq2
    is_paired = True
    if raw_fq2 == "None":
        is_paired = False

    script_dir = snakemake.params.script_dir
    out_dir = snakemake.params.out_dir
    status_log = snakemake.params.status_log
    out_gff = snakemake.output[0]

    try:
        # ensures intermediate files from previous runs are removed
        for f in os.listdir(out_dir):
            mccutils.remove(out_dir + "/" + f)

        mccutils.log("relocate", "running RelocaTE", log=log)
        input_dir = snakemake.params.out_dir + "/input/"
        mccutils.remove(input_dir)
        mccutils.mkdir(input_dir)
        fq_dir = input_dir + "fastq/"
        mccutils.mkdir(fq_dir)

        consensus_fasta = input_dir + "consensus.fasta"
        te_gff = input_dir + "te.gff"
        reference_fasta = input_dir + "reference.fasta"

        uniq_id = str(random.randint(10000, 99999))
        while uniq_id in fq_dir:
            mccutils.log("relocate",
                         "unique id: " + uniq_id +
                         " occurs in file path... selecting a new one...",
                         log=log)
            uniq_id = str(random.randint(10000, 99999))

        fq1_uniq_id = uniq_id + "_mcc_relocate_1"
        fq2_uniq_id = uniq_id + "_mcc_relocate_2"
        unpaired_id = uniq_id + "_unPaired"

        os.symlink(snakemake.input.consensus_fasta, consensus_fasta)
        os.symlink(snakemake.input.te_gff, te_gff)
        os.symlink(snakemake.input.reference_fasta, reference_fasta)
        if is_paired:
            os.symlink(snakemake.input.fq1,
                       fq_dir + sample_name + "." + fq1_uniq_id + ".fq")
            os.symlink(snakemake.input.fq2,
                       fq_dir + sample_name + "." + fq2_uniq_id + ".fq")
        else:
            os.symlink(snakemake.input.fq1,
                       fq_dir + sample_name + "." + unpaired_id + ".fq")

        annotation = make_annotation_file(te_gff, out_dir)
        os.chdir(out_dir)

        command = [
            "perl", script_dir + "/relocaTE.pl", "-t", consensus_fasta, "-d",
            fq_dir, "-g", reference_fasta, "-o", ".", "-r", annotation
        ]

        for param in config.PARAMS.keys():
            command.append(param)
            command.append(str(config.PARAMS[param]))

        if is_paired:
            command += ["-1", fq1_uniq_id, "-2", fq2_uniq_id]
        else:
            command += ["-u", unpaired_id]

        mccutils.run_command(command, log=log)
        combine_gffs(out_dir, out_gff)

        mccutils.check_file_exists(out_gff)
        mccutils.log("relocate", "RelocaTE run complete")
        with open(status_log, "w") as l:
            l.write("COMPLETED\n")

    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        with open(log, "a") as l:
            print(track, file=l)
        mccutils.log("relocate", "RelocaTE run failed")
        with open(status_log, "w") as l:
            l.write("FAILED\n")

        mccutils.run_command(["touch", snakemake.output[0]])
def main():
    consensus_fasta = snakemake.input.consensus_fasta
    reference_fasta = snakemake.input.reference_fasta
    fastq1 = snakemake.input.fastq1
    fastq2 = snakemake.input.fastq2
    status_log = snakemake.params.status_log

    log = snakemake.params.log

    try:
        with open(log,"a") as l:
            l.write("consensus fasta: "+consensus_fasta+"\n")
            l.write("reference fasta: "+reference_fasta+"\n")
            l.write("fastq1: "+fastq1+"\n")
            l.write("fastq2: "+fastq2+"\n")


        threads = snakemake.threads
        sample_name = snakemake.params.sample_name
        script_dir = snakemake.params.script_dir
        out_dir = snakemake.params.out_dir
        out_bed = snakemake.output[0]

        # ensures intermediate files from previous runs are removed
        for f in os.listdir(out_dir):
            mccutils.remove(out_dir+"/"+f)

        is_paired = True
        if snakemake.params.raw_fq2 == "None":
            is_paired = False
        
        command = ['Rscript', "--vanilla", script_dir+"/ngs_te_mapper.R", "genome="+reference_fasta, "teFile="+consensus_fasta, "tsd="+str(config.PARAMS["tsd="]), "thread="+str(threads), "output="+out_dir, "sourceCodeFolder="+script_dir]

        if is_paired:
            command.append("sample="+fastq1+";"+fastq2)
        else:
            command.append("sample="+fastq1)
        
        mccutils.log("ngs_te_mapper","running ngs_te_mapper", log=log)
        mccutils.run_command(command, log=log)
        mccutils.log("ngs_te_mapper","ngs_te_mapper run complete", log=log)

        raw_bed = ""
        for f in os.listdir(out_dir+"/bed_tsd/"):
            if "insertions.bed" in f:
                raw_bed = out_dir+"/bed_tsd/"+f

        mccutils.check_file_exists(raw_bed)
        mccutils.run_command(["cp", raw_bed, out_bed])

        mccutils.log("ngs_te_mapper","ngs_te_mapper run complete")
        with open(status_log,"w") as l:
            l.write("COMPLETED\n")
    
    except Exception as e:
        track = traceback.format_exc()
        print(track, file=sys.stderr)
        with open(log,"a") as l:
            print(track, file=l)
        mccutils.log("ngs_te_mapper","ngs_te_mapper run failed")
        with open(status_log,"w") as l:
            l.write("FAILED\n")
        
        mccutils.mkdir(out_dir+"/bed_tsd/")
        mccutils.run_command(["touch", out_bed])