예제 #1
0
def main():
    mccutils.log("retroseq","processing RetroSeq results")
    retroseq_out = snakemake.input.retroseq_out
    reference_fasta = snakemake.input.reference_fasta

    out_dir = snakemake.params.out_dir
    ref_name = snakemake.params.ref_name
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    if prev_steps_succeeded:
        insertions = read_insertions(retroseq_out, sample_name, chromosomes, support_threshold=config.PARAMS["read_support_threshold"], breakpoint_threshold=config.PARAMS["breakpoint_confidence_threshold"])
        if len(insertions) >= 1:
            insertions = output.make_redundant_bed(insertions, sample_name, out_dir, method="retroseq")
            insertions = output.make_nonredundant_bed(insertions, sample_name, out_dir, method="retroseq")
            output.write_vcf(insertions, reference_fasta, sample_name, "retroseq", out_dir)
        else:
            mccutils.run_command(["touch",out_dir+"/"+sample_name+"_retroseq_redundant.bed"])
            mccutils.run_command(["touch",out_dir+"/"+sample_name+"_retroseq_nonredundant.bed"])
    else:
            mccutils.run_command(["touch",out_dir+"/"+sample_name+"_retroseq_redundant.bed"])
            mccutils.run_command(["touch",out_dir+"/"+sample_name+"_retroseq_nonredundant.bed"])
    
    mccutils.log("retroseq","RetroSeq post processing complete")
예제 #2
0
def main():
    ref_bed = snakemake.input.ref_bed
    nonref_bed = snakemake.input.nonref_bed
    reference_fasta = snakemake.input.reference_fasta

    threads = snakemake.threads
    log = snakemake.params.log
    sample_name = snakemake.params.sample_name
    out_dir = snakemake.params.out_dir
    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    out_bed = snakemake.output[0]


    succeeded = mccutils.check_status_file(status_log)
    if succeeded:
        mccutils.log("ngs_te_mapper2","processing ngs_te_mapper2 results", log=log)
        insertions = read_insertions(ref_bed, nonref_bed, chromosomes, sample_name, out_dir)
        if len(insertions) > 0:
            insertions = output.make_redundant_bed(insertions, sample_name, out_dir, method="ngs_te_mapper2")
            intertions = output.make_nonredundant_bed(insertions, sample_name, out_dir, method="ngs_te_mapper2")
            output.write_vcf(insertions, reference_fasta, sample_name, "ngs_te_mapper2", out_dir)

        else:
            mccutils.run_command(["touch", out_dir+"/"+sample_name+"_ngs_te_mapper2_redundant.bed"])
            mccutils.run_command(["touch", out_dir+"/"+sample_name+"_ngs_te_mapper2_nonredundant.bed"])
        
        mccutils.log("ngs_te_mapper2","ngs_te_mapper2 postprocessing complete")
    else:
        mccutils.run_command(["touch", out_dir+"/"+sample_name+"_ngs_te_mapper2_redundant.bed"])
        mccutils.run_command(["touch", out_dir+"/"+sample_name+"_ngs_te_mapper2_nonredundant.bed"])
예제 #3
0
def main():
    mccutils.log("te-locate","processing TE-Locate results")
    telocate_raw = snakemake.input.telocate_raw
    te_gff = snakemake.input.te_gff
    reference_fasta = snakemake.input.reference_fasta

    out_dir = snakemake.params.out_dir
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    if prev_steps_succeeded:
        insertions = read_insertions(telocate_raw, sample_name, chromosomes, rp_threshold=config.PARAMS['read_pair_support_threshold'])
        insertions = filter_by_reference(insertions, te_gff)
        if len(insertions) > 0:
            insertions = output.make_redundant_bed(insertions, sample_name, out_dir, method="telocate")
            intertions = output.make_nonredundant_bed(insertions, sample_name, out_dir,method="telocate")
            output.write_vcf(insertions, reference_fasta, sample_name, "telocate", out_dir)
        else:
            mccutils.run_command(["touch", out_dir+"/"+sample_name+"_telocate_redundant.bed"])
            mccutils.run_command(["touch", out_dir+"/"+sample_name+"_telocate_nonredundant.bed"])
    else:
        mccutils.run_command(["touch", out_dir+"/"+sample_name+"_telocate_redundant.bed"])
        mccutils.run_command(["touch", out_dir+"/"+sample_name+"_telocate_nonredundant.bed"])
    mccutils.log("te-locate", "TE-Locate post processing complete")
예제 #4
0
def main():
    mccutils.log("tebreak","running tebreak post processing")
    tebreak_out = snakemake.input.tebreak_out
    ref_fasta = snakemake.input.ref_fasta

    out_dir = snakemake.params.out_dir
    ref_name = snakemake.params.ref_name
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    prev_steps_succeeded = mccutils.check_status_file(status_log)
    if prev_steps_succeeded:
        insertions = read_insertions(tebreak_out, sample_name, chromosomes, config)

        if len(insertions) > 0:
            insertions = output.make_redundant_bed(insertions, sample_name, out_dir, method="tebreak")
            insertions = output.make_nonredundant_bed(insertions, sample_name, out_dir, method="tebreak")
            output.write_vcf(insertions, ref_fasta, sample_name, "tebreak", out_dir)
        else:
            mccutils.run_command(["touch", out_dir+"/"+sample_name+"_tebreak_redundant.bed"])
            mccutils.run_command(["touch", out_dir+"/"+sample_name+"_tebreak_nonredundant.bed"])
    else:
        mccutils.run_command(["touch", out_dir+"/"+sample_name+"_tebreak_redundant.bed"])
        mccutils.run_command(["touch", out_dir+"/"+sample_name+"_tebreak_nonredundant.bed"])
    
    mccutils.log("tebreak","tebreak postprocessing complete")
예제 #5
0
def main():
    mccutils.log("jitterbug", "jitterbug postprocessing")

    jitterbug_out = snakemake.input.jitterbug_out
    te_taxonomy = snakemake.input.taxonomy
    reference_fasta = snakemake.input.reference_fasta

    out_dir = snakemake.params.out_dir
    log = snakemake.params.log
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    out = snakemake.output.out

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    if prev_steps_succeeded:
        insertions = read_insertions(
            jitterbug_out,
            te_taxonomy,
            chromosomes,
            sample_name,
            min_fwd_read_support=config.FILTER['MIN_FWD_READ_SUPPORT'],
            min_rev_read_support=config.FILTER['MIN_REV_READ_SUPPORT'],
            min_sr_support=config.FILTER['MIN_SPLIT_READ_SUPPORT'],
            min_zygosity=config.FILTER['MIN_ZYGOSITY'])

        if len(insertions) >= 1:
            insertions = output.make_redundant_bed(insertions,
                                                   sample_name,
                                                   out_dir,
                                                   method="jitterbug")
            insertions = output.make_nonredundant_bed(insertions,
                                                      sample_name,
                                                      out_dir,
                                                      method="jitterbug")
            output.write_vcf(insertions, reference_fasta, sample_name,
                             "jitterbug", out_dir)
        else:
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_jitterbug_redundant.bed"
            ])
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_jitterbug_nonredundant.bed"
            ])

    else:
        mccutils.run_command([
            "touch", out_dir + "/" + sample_name + "_jitterbug_redundant.bed"
        ])
        mccutils.run_command([
            "touch",
            out_dir + "/" + sample_name + "_jitterbug_nonredundant.bed"
        ])
예제 #6
0
def get_failed_runs(methods, status_files):
    failed_runs = []
    for method in methods:
        if method in status_files.keys():
            succeeded = mccutils.check_status_file(status_files[method])
            if not succeeded:
                failed_runs.append(method)

    return failed_runs
예제 #7
0
def main():
    mccutils.log("popoolationte2", "processing PopoolationTE2 results")
    te_predictions = snakemake.input.popoolationte2_out
    te_gff = snakemake.input.te_gff
    taxonomy = snakemake.input.taxonomy
    reference_fasta = snakemake.input.reference_fasta

    out_dir = snakemake.params.out_dir
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    log = snakemake.params.log

    status_log = snakemake.params.status_log

    prev_step_succeeded = mccutils.check_status_file(status_log)

    if prev_step_succeeded:
        ref_tes = get_ref_tes(te_gff, taxonomy, chromosomes)
        insertions = read_insertions(
            te_predictions,
            ref_tes,
            chromosomes,
            sample_name,
            both_end_support_needed=config.PARAMS["require_both_end_support"],
            support_threshold=config.PARAMS["frequency_threshold"])
        if len(insertions) >= 1:
            insertions = output.make_redundant_bed(insertions,
                                                   sample_name,
                                                   out_dir,
                                                   method="popoolationte2")
            insertions = output.make_nonredundant_bed(insertions,
                                                      sample_name,
                                                      out_dir,
                                                      method="popoolationte2")
            output.write_vcf(insertions, reference_fasta, sample_name,
                             "popoolationte2", out_dir)
        else:
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_popoolationte2_redundant.bed"
            ])
            mccutils.run_command([
                "touch", out_dir + "/" + sample_name +
                "_popoolationte2_nonredundant.bed"
            ])
    else:
        mccutils.run_command([
            "touch",
            out_dir + "/" + sample_name + "_popoolationte2_redundant.bed"
        ])
        mccutils.run_command([
            "touch",
            out_dir + "/" + sample_name + "_popoolationte2_nonredundant.bed"
        ])

    mccutils.log("popoolationte2", "PopoolationTE2 postprocessing complete")
예제 #8
0
def main():
    mccutils.log("teflon", "TEFLoN postprocessing")

    teflon_raw = snakemake.input.teflon_out
    ref_te_bed = snakemake.input.ref_bed
    reference_fasta = snakemake.input.reference_fasta

    out_dir = snakemake.params.out_dir
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    out = snakemake.output.out

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    if prev_steps_succeeded:
        ref_tes = get_ref_tes(ref_te_bed)
        insertions = read_insertions(
            teflon_raw,
            chromosomes,
            sample_name,
            ref_tes,
            min_presence=config.PARAMS['min_presence_reads'],
            max_absence=config.PARAMS['max_absence_reads'],
            min_presence_fraction=config.PARAMS['min_presence_fraction'],
            require_tsd=config.PARAMS['require_tsd'],
            require_both_breakpoints=config.PARAMS['require_both_breakpoints'])
        if len(insertions) >= 1:
            insertions = output.make_redundant_bed(insertions,
                                                   sample_name,
                                                   out_dir,
                                                   method="teflon")
            insertions = output.make_nonredundant_bed(insertions,
                                                      sample_name,
                                                      out_dir,
                                                      method="teflon")
            output.write_vcf(insertions, reference_fasta, sample_name,
                             "teflon", out_dir)
        else:
            mccutils.run_command([
                "touch", out_dir + "/" + sample_name + "_teflon_redundant.bed"
            ])
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_teflon_nonredundant.bed"
            ])
    else:
        mccutils.run_command(
            ["touch", out_dir + "/" + sample_name + "_teflon_redundant.bed"])
        mccutils.run_command([
            "touch", out_dir + "/" + sample_name + "_teflon_nonredundant.bed"
        ])
예제 #9
0
def main():
    mccutils.log("teflon", "Running TEFLoN")

    consensus = snakemake.input.consensus
    reference_genome = snakemake.input.reference_genome
    ref_bed = snakemake.input.ref_bed
    teflon_taxonomy = snakemake.input.teflon_taxonomy
    bam = snakemake.input.bam

    threads = snakemake.threads
    out_dir = snakemake.params.out_dir
    script_dir = snakemake.params.script_dir
    log = snakemake.params.log
    status_log = snakemake.params.status_log

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    if prev_steps_succeeded:
        try:
            sample_table = make_sample_table(out_dir, bam)
            run_teflon(script_dir,
                       out_dir,
                       sample_table,
                       threads=threads,
                       log=log,
                       quality_threshold=config.PARAMS['-q'],
                       stdev=config.PARAMS['-sd'],
                       cov=config.PARAMS['-cov'],
                       te_support1=config.PARAMS['-n1'],
                       te_support2=config.PARAMS['-n2'],
                       read_count_lower_threshold=config.PARAMS['-lt'],
                       read_count_higher_threshold=config.PARAMS['-ht'])

            mccutils.check_file_exists(snakemake.output[0])
            with open(status_log, "w") as l:
                l.write("COMPLETED\n")

        except Exception as e:
            track = traceback.format_exc()
            print(track, file=sys.stderr)
            with open(log, "a") as l:
                print(track, file=l)
            mccutils.log("teflon", "teflon run failed")
            with open(status_log, "w") as l:
                l.write("FAILED\n")

            mccutils.run_command(["touch", snakemake.output[0]])

    else:
        mccutils.run_command(["touch", snakemake.output[0]])
예제 #10
0
def main():
    mccutils.log("popoolationte", "processing PopoolationTE results")
    popoolationte_out = snakemake.input.popoolationte_out
    genome_fasta = snakemake.input.ref

    out_dir = snakemake.params.out_dir
    sample_name = snakemake.params.sample_name
    log = snakemake.params.log
    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    succeeded = mccutils.check_status_file(status_log)
    if succeeded:
        insertions = read_insertions(
            popoolationte_out,
            sample_name,
            chromosomes,
            require_both_end_support=config.PARAMS["require_both_end_support"],
            percent_read_support_threshold=config.
            PARAMS["percent_read_support_threshold"])
        if len(insertions) >= 1:
            insertions = output.make_redundant_bed(insertions,
                                                   sample_name,
                                                   out_dir,
                                                   method="popoolationte")
            insertions = output.make_nonredundant_bed(insertions,
                                                      sample_name,
                                                      out_dir,
                                                      method="popoolationte")
            output.write_vcf(insertions, genome_fasta, sample_name,
                             "popoolationte", out_dir)
        else:
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_popoolationte_redundant.bed"
            ])
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_popoolationte_nonredundant.bed"
            ])
    else:
        mccutils.run_command([
            "touch",
            out_dir + "/" + sample_name + "_popoolationte_redundant.bed"
        ])
        mccutils.run_command([
            "touch",
            out_dir + "/" + sample_name + "_popoolationte_nonredundant.bed"
        ])
    mccutils.log("popoolationte", "PopoolationTE postprocessing complete")
예제 #11
0
def main():
    mccutils.log("temp", "running TEMP post processing")
    insert_summary = snakemake.input.insert_summary
    absence_summary = snakemake.input.absence_summary
    te_gff = snakemake.input.te_gff
    reference_fasta = snakemake.input.reference_fasta
    log = snakemake.params.log
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    out_dir = snakemake.params.out_dir
    status_log = snakemake.params.status_log

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    if prev_steps_succeeded:
        insertions = read_insertion_summary(insert_summary, sample_name)
        absence_bed = make_absence_bed(absence_summary, sample_name, out_dir)
        non_absent_ref_insertions = get_non_absent_ref_tes(
            te_gff, absence_bed, sample_name, out_dir, log)
        insertions += non_absent_ref_insertions
        insertions = filter_insertions(
            insertions,
            chromosomes,
            acceptable_classes=config.
            PARAMS["acceptable_insertion_support_classes"],
            frequency_theshold=config.PARAMS["frequency_threshold"])
        if len(insertions) > 0:
            insertions = output.make_redundant_bed(insertions,
                                                   sample_name,
                                                   out_dir,
                                                   method="temp")
            insertions = output.make_nonredundant_bed(insertions,
                                                      sample_name,
                                                      out_dir,
                                                      method="temp")
            output.write_vcf(insertions, reference_fasta, sample_name, "temp",
                             out_dir)
        else:
            mccutils.run_command(
                ["touch", out_dir + "/" + sample_name + "_temp_redundant.bed"])
            mccutils.run_command([
                "touch", out_dir + "/" + sample_name + "_temp_nonredundant.bed"
            ])
    else:
        mccutils.run_command(
            ["touch", out_dir + "/" + sample_name + "_temp_redundant.bed"])
        mccutils.run_command(
            ["touch", out_dir + "/" + sample_name + "_temp_nonredundant.bed"])
    mccutils.log("temp", "TEMP postprocessing complete")
예제 #12
0
def main():
    mccutils.log("temp2", "running TEMP2 post processing")
    insert_bed = snakemake.input.insert_bed
    absence_summary = snakemake.input.absence_summary
    te_gff = snakemake.input.te_gff
    reference_fasta = snakemake.input.reference_fasta
    log = snakemake.params.log
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    out_dir = snakemake.params.out_dir
    status_log = snakemake.params.status_log

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    if prev_steps_succeeded:
        insertions = read_insertions(insert_bed, sample_name, chromosomes,
                                     config)
        absence_bed = make_absence_bed(absence_summary, sample_name, out_dir)
        non_absent_ref_insertions = get_non_absent_ref_tes(
            te_gff, absence_bed, sample_name, chromosomes, out_dir, log)
        insertions += non_absent_ref_insertions

        if len(insertions) > 0:
            insertions = output.make_redundant_bed(insertions,
                                                   sample_name,
                                                   out_dir,
                                                   method="temp2")
            insertions = output.make_nonredundant_bed(insertions,
                                                      sample_name,
                                                      out_dir,
                                                      method="temp2")
            output.write_vcf(insertions, reference_fasta, sample_name, "temp2",
                             out_dir)
        else:
            mccutils.run_command([
                "touch", out_dir + "/" + sample_name + "_temp2_redundant.bed"
            ])
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_temp2_nonredundant.bed"
            ])
    else:
        mccutils.run_command(
            ["touch", out_dir + "/" + sample_name + "_temp2_redundant.bed"])
        mccutils.run_command(
            ["touch", out_dir + "/" + sample_name + "_temp2_nonredundant.bed"])

    mccutils.log("temp2", "TEMP2 postprocessing complete")
예제 #13
0
def main():
    relocate_gff = snakemake.input.relocate_gff
    te_gff = snakemake.input.te_gff
    reference_fasta = snakemake.input.reference_fasta

    out_dir = snakemake.params.out_dir
    log = snakemake.params.log
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    mccutils.log("relocate","processing RelocaTE results")

    prev_step_succeeded = mccutils.check_status_file(status_log)

    if prev_step_succeeded:
        insertions = get_insertions(
                        relocate_gff, 
                        sample_name, 
                        chromosomes, 
                        ref_l_threshold=config.PARAMS["ref_left_threshold"], 
                        ref_r_threshold=config.PARAMS["ref_right_threshold"], 
                        nonref_l_threshold=config.PARAMS["nonref_left_threshold"], 
                        nonref_r_threshold=config.PARAMS["nonref_right_threshold"]
                    )
        insertions = set_ref_orientations(insertions, te_gff)

        if len(insertions) >= 1:
            insertions = output.make_redundant_bed(insertions, sample_name, out_dir, method="relocate")
            insertions = output.make_nonredundant_bed(insertions, sample_name, out_dir, method="relocate")
            output.write_vcf(insertions, reference_fasta, sample_name, "relocate", out_dir)
        else:
            mccutils.run_command(["touch",out_dir+"/"+sample_name+"_relocate_redundant.bed"])
            mccutils.run_command(["touch",out_dir+"/"+sample_name+"_relocate_nonredundant.bed"])

    else:
            mccutils.run_command(["touch",out_dir+"/"+sample_name+"_relocate_redundant.bed"])
            mccutils.run_command(["touch",out_dir+"/"+sample_name+"_relocate_nonredundant.bed"])
    mccutils.log("relocate","RelocaTE postprocessing complete")
예제 #14
0
def main():
    insertions_bed = snakemake.input.insertions_bed
    deletions_bed = snakemake.input.deletions_bed
    insertions_support = snakemake.input.insertions_support
    deletions_support = snakemake.input.deletions_support
    te_gff = snakemake.input.te_gff
    te_taxonomy = snakemake.input.te_taxonomy
    reference_fasta = snakemake.input.reference_fasta

    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    sample_name = snakemake.params.sample_name
    out_dir = snakemake.params.out_dir

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    mccutils.log("tepid", "running TEPID post processing")

    if prev_steps_succeeded:
        te_to_family = get_te_family_map(te_taxonomy)
        te_pos_to_family = get_te_pos_family_map(te_gff, te_to_family)
        insertions = read_insertions(insertions_bed,
                                     te_to_family,
                                     sample_name,
                                     te_pos_to_family,
                                     chromosomes,
                                     reference=False)
        insertions = add_support(insertions,
                                 insertions_support,
                                 threshold=config.READ_SUPPORT_THRESHOLD)

        deletions = read_insertions(deletions_bed,
                                    te_to_family,
                                    sample_name,
                                    te_pos_to_family,
                                    chromosomes,
                                    reference=True)
        deletions = add_support(deletions,
                                deletions_support,
                                threshold=config.READ_SUPPORT_THRESHOLD)
        non_abs_ref_insertions = get_non_absent_ref_tes(
            deletions, te_gff, te_to_family, sample_name)

        insertions += non_abs_ref_insertions
        if len(insertions) > 0:
            insertions = output.make_redundant_bed(insertions,
                                                   sample_name,
                                                   out_dir,
                                                   method="tepid")
            insertions = output.make_nonredundant_bed(insertions,
                                                      sample_name,
                                                      out_dir,
                                                      method="tepid")
            output.write_vcf(insertions, reference_fasta, sample_name, "tepid",
                             out_dir)
        else:
            mccutils.run_command([
                "touch", out_dir + "/" + sample_name + "_tepid_redundant.bed"
            ])
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_tepid_nonredundant.bed"
            ])

    else:
        mccutils.run_command(
            ["touch", out_dir + "/" + sample_name + "_tepid_redundant.bed"])
        mccutils.run_command(
            ["touch", out_dir + "/" + sample_name + "_tepid_nonredundant.bed"])

    mccutils.log("tepid", "TEPID post processing complete")
예제 #15
0
def main():
    mccutils.log("popoolationte2", "running PopoolationTE2")
    ref_fasta = snakemake.input.ref_fasta
    bam = snakemake.input.bam
    taxonomy = snakemake.input.taxonomy
    jar = snakemake.params.jar
    out_dir = snakemake.params.out_dir
    sample_name = snakemake.params.sample_name
    log = snakemake.params.log
    status_log = snakemake.params.status_log

    prev_step_succeeded = mccutils.check_status_file(status_log)

    if prev_step_succeeded:
        try:
            mccutils.mkdir(out_dir + "/tmp")
            taxonomy = format_taxonomy(taxonomy, out_dir)
            ppileup = popoolationte2_ppileup(jar,
                                             config.PARAMS["ppileup"],
                                             bam,
                                             taxonomy,
                                             out_dir,
                                             log=log)
            ppileup = popoolationte2_subsample(
                jar,
                config.PARAMS["subsampleppileup"],
                ppileup,
                out_dir,
                log=log)
            signatures = popoolationte2_signatures(
                jar,
                config.PARAMS["identifySignatures"],
                ppileup,
                out_dir,
                log=log)
            signatures = popoolationte2_strand(jar,
                                               config.PARAMS["updateStrand"],
                                               signatures,
                                               bam,
                                               taxonomy,
                                               out_dir,
                                               log=log)
            signatures = popoolationte2_frequency(jar,
                                                  ppileup,
                                                  signatures,
                                                  out_dir,
                                                  log=log)
            te_insertions = popoolationte2_pairup(
                jar,
                config.PARAMS["pairupSignatures"],
                signatures,
                ref_fasta,
                taxonomy,
                out_dir,
                log=log)
            mccutils.remove(out_dir + "/tmp")
            mccutils.check_file_exists(snakemake.output[0])

            with open(status_log, "w") as l:
                l.write("COMPLETED\n")
            mccutils.log("popoolationte2", "popoolationte2 run complete")

        except Exception as e:
            track = traceback.format_exc()
            print(track, file=sys.stderr)
            with open(log, "a") as l:
                print(track, file=l)
            mccutils.log("popoolationte2", "popoolationte2 run failed")
            with open(status_log, "w") as l:
                l.write("FAILED\n")

            mccutils.run_command(["touch", snakemake.output[0]])

    else:
        mccutils.run_command(["touch", snakemake.output[0]])
예제 #16
0
def main():
    nonref_gff = snakemake.input.nonref_gff
    ref_gff = snakemake.input.ref_gff
    rm_out = snakemake.input.rm_out
    reference_fasta = snakemake.input.reference_fasta

    log = snakemake.params.log
    out_dir = snakemake.params.out_dir
    sample_name = snakemake.params.sample_name
    chromosomes = snakemake.params.chromosomes.split(",")
    status_log = snakemake.params.status_log

    prev_steps_succeeded = mccutils.check_status_file(status_log)

    mccutils.log("relocate2", "processing RelocaTE2 results")

    if prev_steps_succeeded:
        ref_insertions = get_insertions(
            ref_gff,
            sample_name,
            chromosomes,
            insert_type="ref",
            l_support_threshold=config.PARAMS["ref_left_support_threshold"],
            r_support_threshold=config.PARAMS["ref_right_support_threshold"],
            l_junction_threshold=config.PARAMS["ref_left_junction_threshold"],
            r_junction_threshold=config.PARAMS["ref_right_junction_threshold"])

        nonref_insertions = get_insertions(
            nonref_gff,
            sample_name,
            chromosomes,
            insert_type="nonref",
            l_support_threshold=config.PARAMS["nonref_left_support_threshold"],
            r_support_threshold=config.
            PARAMS["nonref_right_support_threshold"],
            l_junction_threshold=config.
            PARAMS["nonref_left_junction_threshold"],
            r_junction_threshold=config.
            PARAMS["nonref_right_junction_threshold"])

        ref_insertions = fix_ref_te_names(ref_insertions, rm_out, sample_name)

        all_insertions = ref_insertions + nonref_insertions

        if len(all_insertions) >= 1:
            all_insertions = output.make_redundant_bed(all_insertions,
                                                       sample_name,
                                                       out_dir,
                                                       method="relocate2")
            insertions = output.make_nonredundant_bed(all_insertions,
                                                      sample_name,
                                                      out_dir,
                                                      method="relocate2")
            output.write_vcf(insertions, reference_fasta, sample_name,
                             "relocate2", out_dir)
        else:
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_relocate2_redundant.bed"
            ])
            mccutils.run_command([
                "touch",
                out_dir + "/" + sample_name + "_relocate2_nonredundant.bed"
            ])
    else:
        mccutils.run_command([
            "touch", out_dir + "/" + sample_name + "_relocate2_redundant.bed"
        ])
        mccutils.run_command([
            "touch",
            out_dir + "/" + sample_name + "_relocate2_nonredundant.bed"
        ])

    mccutils.log("relocate2", "RelocaTE2 postprocessing complete")
예제 #17
0
def main():
    mccutils.log("popoolationte", "running PopoolationTE")
    ref_fasta = snakemake.input.ref_fasta
    taxonomy = snakemake.input.taxonomy
    te_gff = snakemake.input.te_gff
    fq1 = snakemake.input.fq1
    fq2 = snakemake.input.fq2
    sam = snakemake.input.sam
    log = snakemake.params.log
    status_log = snakemake.params.status_log

    with open(log, "a") as l:
        l.write("reference fasta: " + ref_fasta + "\n")
        l.write("Taxonomy TSV: " + taxonomy + "\n")
        l.write("TE GFF: " + te_gff + "\n")
        l.write("fastq1: " + fq1 + '\n')
        l.write("fastq2: " + fq2 + "\n")
        l.write("SAM: " + sam + "\n")

    out_dir = snakemake.params.out_dir
    sample_name = snakemake.params.sample_name
    script_dir = snakemake.params.script_dir

    prev_step_succeeded = mccutils.check_status_file(status_log)
    if prev_step_succeeded:
        try:
            mccutils.log("popoolationte", "getting read length")
            read_length = get_read_length(fq1, fq2)
            mccutils.log("popoolationte", "calculating median insert size")
            median_insert_size = get_median_insert_size(sam)
            max_dist = int(median_insert_size * 3) + read_length
            mccutils.log("popoolationte",
                         "converting TE gff to PoPoolationTE known TE file")
            known_inserts = make_known_insert_file(te_gff, out_dir)
            mccutils.log("popoolationte",
                         "running the PoPoolationTE workflow scripts")
            run_popoolationte(sam,
                              ref_fasta,
                              taxonomy,
                              read_length,
                              median_insert_size,
                              max_dist,
                              known_inserts,
                              script_dir,
                              out_dir,
                              config.PARAMS,
                              log=log)

            mccutils.check_file_exists(snakemake.output[0])

            with open(status_log, "w") as l:
                l.write("COMPLETED\n")
            mccutils.log("popoolationte", "popoolationte run complete")

        except Exception as e:
            track = traceback.format_exc()
            print(track, file=sys.stderr)
            with open(log, "a") as l:
                print(track, file=l)
            with open(status_log, "w") as l:
                l.write("FAILED\n")

            mccutils.run_command(["touch", snakemake.output[0]])
    else:
        mccutils.run_command(["touch", snakemake.output[0]])