def write_sub_bam(chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, out_dir,
                  total_modify_reads_file, total_delete_reads_file,
                  total_add_reads_file, process):
    write_bam_pool = Pool(int(process))
    exclude_bam_list = [exclude_bam_file_tmp]
    usedBamList = []
    for chrom in chrom_list:
        excludeBam_chr = "%s/exclude_%s.bam" % (out_dir, chrom)
        exclude_bam_list.append(excludeBam_chr)
        usedBam_chr = "%s/used_%s.bam" % (out_dir, chrom)
        usedBamList.append(usedBam_chr)

        write_bam_pool.apply_async(
            write_bam_byChr,
            args=(used_bam_file_tmp, chrom, excludeBam_chr, usedBam_chr,
                  total_modify_reads_file, total_delete_reads_file,
                  total_add_reads_file))
    write_bam_pool.close()
    write_bam_pool.join()

    exclude_bam_file = os.path.join(out_dir, "exclude.bam")
    bamMerge(exclude_bam_list, exclude_bam_file)
    used_bam_file = os.path.join(out_dir, "used.bam")
    if len(usedBamList) != 1:
        bamMerge(usedBamList, used_bam_file)
    else:
        used_bam_file = usedBamList[0]

    bamSort(used_bam_file, os.path.join(out_dir, "used.sort"))
    used_sort_bam_file = os.path.join(out_dir, "used.sort.bam")
    bamIndex(used_sort_bam_file)
    return used_sort_bam_file, exclude_bam_file
def get_reads_by_region(bam_file, sv_list, out_dir):
    # get reads by region bed
    region_bed_file = os.path.join(out_dir, "consider_region.bed")
    chrom_list = write_region_bed(region_bed_file, sv_list)
    exclude_bam_file_tmp = os.path.join(out_dir, "exclude_tmp.bam")
    used_bam_file_tmp = os.path.join(out_dir, "used_tmp.bam")
    getRegionReads(bam_file, region_bed_file, used_bam_file_tmp, exclude_bam_file_tmp)
    bamIndex(used_bam_file_tmp)
    return chrom_list, used_bam_file_tmp, exclude_bam_file_tmp
Example #3
0
def main(run_args):
    start_time = time.asctime(time.localtime(time.time()))
    # print start_time
    temp_out_dir = os.path.join(run_args.outdir, "tempDir")
    os.system("mkdir -p %s" % temp_out_dir)
    invalid_log_file = os.path.join(run_args.outdir, 'invalid_mutation.txt')
    invalid_log = InvalidLog(invalid_log_file)

    # step1: deal with mutfile and get haplotypes
    print "step1: deal with mutfile and get haplotypes"
    haplotype_list = get_haplotypes(run_args.bamfile, run_args.reffasta,
                                    run_args.mutfile, int(run_args.haplosize),
                                    float(run_args.snpfrac), invalid_log)

    # step2: deal haplotypes and get total_chosen_reads, total_chosen_reads_muts
    print "step2: deal haplotypes and get total_chosen_reads, total_chosen_reads_muts"
    success_list_file = os.path.join(run_args.outdir, 'success_list.txt')
    total_chosen_reads, total_chosen_reads_muts = deal_haplotype_multi(
        run_args.bamfile, haplotype_list, temp_out_dir, run_args.reffasta,
        int(run_args.process), int(run_args.mindepth),
        int(run_args.minmutreads), int(run_args.minmapq),
        float(run_args.diffcover), run_args.single, run_args.multmapfilter,
        run_args.aligner, run_args.alignerIndex, invalid_log,
        success_list_file)
    invalid_log.close()
    if len(total_chosen_reads) == 0:
        print "Warning: No reads to deal with of all these sv, checkout your sv file"
        return

    # step3: modify the reads in total_chosen_reads itself
    print "step3: modify the reads in total_chosen_reads itself"
    reads_modify(total_chosen_reads, total_chosen_reads_muts,
                 run_args.reffasta, int(run_args.process))

    # step4: write edited reads to edited file and exclude reads to exclude file ,than remap edited file to reference
    print "step4: write edited reads to edited file and exclude reads to exclude file ,than remap edited file to reference"
    edit_remap_bam_file, exclude_bam_file = reads_replace(
        run_args.bamfile, total_chosen_reads, run_args.seqer,
        run_args.floworder, run_args.libkey, run_args.barcode, run_args.tag,
        temp_out_dir, run_args.aligner, run_args.alignerIndex, run_args.single)

    # step5: merge remap.edit.bam and exclude exclude.bam and sort
    print "step5: merge remap.edit.bam and exclude exclude.bam and sort"
    # edit_remap_bam_file, exclude_bam_file = os.path.join(temp_out_dir, "edit.remap.sort.bam"), os.path.join(
    #     temp_out_dir, "exclude.bam")
    out_bam_file = os.path.join(run_args.outdir, "edit.sorted.bam")
    bamMerge([edit_remap_bam_file, exclude_bam_file], out_bam_file)
    bamIndex(out_bam_file)
    end_time = time.asctime(time.localtime(time.time()))
    # speed_time = end_time - start_time
    print "Edit Bam is completed! Result see %s and valid mutation see %s. Invalid mutation can't be spike in see %s." % (
        out_bam_file, success_list_file, invalid_log_file)
Example #4
0
def main(run_args):
    invalid_log_file = os.path.join(run_args.outdir, 'invalid_mutation.txt')
    invalid_log = log(invalid_log_file)
    temp_out_dir = os.path.join(run_args.out_dir, "tempDir")
    os.system("mkdir -p %s" % temp_out_dir)

    # step1: deal with mutfile and get haplotypes
    haplotype_list = get_haplotypes(run_args.bamfile, run_args.reffasta, run_args.mutfile, int(run_args.haplosize),
                                    float(run_args.snpfrac), invalid_log)

    # step2: deal haplotypes and get total_chosen_reads, total_chosen_reads_muts
    total_chosen_reads, total_chosen_reads_muts = deal_haplotype_multi(run_args.bam_file, haplotype_list,
                                                                       temp_out_dir, run_args.reffasta,
                                                                       int(run_args.process), int(run_args.mindepth),
                                                                       int(run_args.minmutreads), int(run_args.minmapq),
                                                                       float(run_args.diffcover), run_args.single,
                                                                       run_args.is_multmapfilter, run_args.aligner,
                                                                       run_args.alignerIndex, invalid_log)

    # step3: modify the reads in total_chosen_reads itself
    reads_modify(total_chosen_reads, total_chosen_reads_muts, run_args.reffasta, int(run_args.prceoss))

    # step4: write edited reads to edited file and exclude reads to exclude file ,than remap edited file to reference
    edit_remap_bam_file, exclude_bam_file = reads_replace(run_args.bam_file, total_chosen_reads, run_args.seqer,
                                                          run_args.floworder, run_args.lib_key, run_args.barcode,
                                                          run_args.tag, temp_out_dir, run_args.aligner,
                                                          run_args.aligner_index, run_args.single)

    # step5: merge remap.edit.bam and exclude exclude.bam and sort
    bamIndex(exclude_bam_file)
    out_bam_file = os.path.join(temp_out_dir, "edit_exclude.bam")
    bamMerge([edit_remap_bam_file, exclude_bam_file], out_bam_file)

    out_sort_bam_file = os.path.join(run_args.outdir, "edit.sort.bam")
    out_sort_bam_file_prefix = os.path.join(run_args.outdir, "edit.sort")
    bamSort(out_bam_file, out_sort_bam_file_prefix)
    bamIndex(out_sort_bam_file)

    print "Edit Bam is completed! Result see %s and invalid mutation can't be spike in see %s." % (
        out_sort_bam_file, invalid_log_file)
Example #5
0
def write_sub_bam(chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, out_dir,
                  total_modify_readname_list, total_delete_readname_list,
                  total_add_readname_list, process):
    write_bam_pool = Pool(int(process))
    exclude_bam_list = [exclude_bam_file_tmp]
    usedBamList = []
    for chrom in chrom_list:
        excludeBam_chr = "%s/exclude_%s.bam" % (out_dir, chrom)
        exclude_bam_list.append(excludeBam_chr)
        usedBam_chr = "%s/used_%s.bam" % (out_dir, chrom)
        usedBamList.append(usedBam_chr)

        write_bam_pool.apply_async(
            write_bam_byChr,
            args=(used_bam_file_tmp, chrom, excludeBam_chr, usedBam_chr,
                  total_modify_readname_list, total_delete_readname_list,
                  total_add_readname_list))
    write_bam_pool.close()
    write_bam_pool.join()

    exclude_bam_file = os.path.join(out_dir, "exclude.bam")
    bamMerge(exclude_bam_list, exclude_bam_file)
    used_bam_file = os.path.join(out_dir, "used.bam")
    if len(usedBamList) != 1:
        bamMerge(usedBamList, used_bam_file)
    else:
        used_bam_file = usedBamList[0]

    bamSort(used_bam_file, os.path.join(out_dir, "used.sort"))
    used_sort_bam_file = os.path.join(out_dir, "used.sort.bam")
    bamIndex(used_sort_bam_file)
    used_bam = pysam.AlignmentFile(used_sort_bam_file, 'rb')
    used_reads = {}
    for read in used_bam.fetch():
        keyname = getKeyName(read)
        used_reads[keyname] = read
    used_bam.close()
    return used_reads, used_bam_file, exclude_bam_file
Example #6
0
def main(run_args):
    start_time = time.asctime(time.localtime(time.time()))
    print start_time
    if not os.path.exists(run_args.outdir):
        os.mkdir(run_args.outdir)
    invalid_log_file = os.path.join(run_args.outdir, 'invalid_mutation.txt')
    invalid_log = InvalidLog(invalid_log_file)

    run_log_file = os.path.join(run_args.outdir, 'run.log')
    run_log = RunLog(run_log_file)

    temp_out_dir = os.path.join(run_args.outdir, "tempDir")
    if not os.path.exists(temp_out_dir):
        os.mkdir(temp_out_dir)

    # step0: prepare sv list
    sv_list = check_sv_file(run_args.svfile, run_args.reffasta, invalid_log)
    if run_args.debug:
        print len(sv_list)
    if not sv_list:
        exit("no sv list to deal with")

    # step1: get insert size of paired reads
    insert_size = get_insertSize_range(run_args.bamfile, run_args.readlength,
                                       run_args.single, run_args.debug)
    if run_args.debug:
        print insert_size

    # step2: deal with sv
    total_modify_reads, total_delete_reads, total_add_reads = deal_sv(
        run_args.bamfile, run_args.reffasta, sv_list, run_args.single,
        int(run_args.minmapq), run_args.multmapfilter, int(run_args.mindepth),
        int(run_args.minmutreads), int(run_args.readlength), temp_out_dir,
        insert_size, invalid_log, run_log)
    # exit
    total_deal_reads_num = len(total_modify_reads) + len(
        total_delete_reads) + len(total_add_reads)
    if total_deal_reads_num == 0:
        # run_log.info("No reads to deal with of all these sv, check out your sv file")
        print "Warning: No reads to deal with of all these sv, checkout your sv file"
        return
    if run_args.debug:
        print len(total_modify_reads), len(total_delete_reads), len(
            total_add_reads)

    # step3: merge edit reads
    total_modify_readname_list, total_delete_readname_list, total_add_readname_list = merge_edit_reads(
        total_modify_reads, total_add_reads, total_delete_reads)

    if run_args.debug:
        print "list num: ", len(total_modify_readname_list), len(
            total_delete_readname_list), len(total_add_readname_list)

    # step4: get reads by region bed and write bam file
    chrom_list, used_bam_file_tmp, exclude_bam_file_tmp = get_reads_by_region(
        run_args.bamfile, sv_list, temp_out_dir)

    # write reads which may probably used to used.bam and reads should not be used to exclude.bam
    used_reads, used_bam_file, exclude_bam_file = write_sub_bam(
        chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, temp_out_dir,
        total_modify_readname_list, total_delete_readname_list,
        total_add_readname_list, int(run_args.process))
    if run_args.debug:
        print "used & exclude bam:", used_bam_file, exclude_bam_file

    # step5: merge edited reads and remap to new bam, consider about the tag, RG, life reads
    edit_remap_bam_file = merge_edit_bam(
        run_args.bamfile, temp_out_dir, run_args.single, total_modify_reads,
        total_add_reads, used_reads, run_args.seqer, run_args.aligner,
        run_args.aligner_index, run_args.flow_order, run_args.lib_key,
        run_args.barcode, run_args.tag)

    if run_args.debug:
        print "edit remap bam:", edit_remap_bam_file

    # step6: read remapped edit reads to dict
    out_bam_file = os.path.join(temp_out_dir, "edit_exclude.bam")
    bamMerge([edit_remap_bam_file, exclude_bam_file], out_bam_file)
    bamIndex(out_bam_file)

    out_sort_bam_file = os.path.join(run_args.outdir, "edit.sort.bam")
    out_sort_bam_file_prefix = os.path.join(run_args.outdir, "edit.sort")
    bamSort(out_bam_file, out_sort_bam_file_prefix)
    bamIndex(out_sort_bam_file)

    end_time = time.asctime(time.localtime(time.time()))
    print end_time
Example #7
0
def main(run_args):
    start_time = time.asctime(time.localtime(time.time()))
    # print start_time
    if not os.path.exists(run_args.outdir):
        os.mkdir(run_args.outdir)
    invalid_log_file = os.path.join(run_args.outdir, 'invalid_mutation.txt')
    invalid_log = InvalidLog(invalid_log_file)

    run_log_file = os.path.join(run_args.outdir, 'run.log')
    run_log = RunLog(run_log_file)

    temp_out_dir = os.path.join(run_args.outdir, "tempDir")
    if not os.path.exists(temp_out_dir):
        os.mkdir(temp_out_dir)

    # step0: prepare sv list
    sv_list = check_sv_file(run_args.svfile, run_args.reffasta, invalid_log)

    if not sv_list:
        exit("no sv list to deal with")

    # step1: get insert size of paired reads
    print "step1: get insert size of paired reads"
    insert_size = get_insertSize_range(run_args.bamfile, run_args.readlength,
                                       run_args.single)

    # step2: deal with sv and get total edited reads
    print "step2: deal with sv and get total edited reads"
    success_file = os.path.join(run_args.outdir, 'success_list.txt')
    total_modify_reads_file, total_delete_reads_file, total_add_reads_file, total_modify_list, total_delete_list, total_add_list = deal_sv(
        run_args.bamfile, run_args.reffasta, sv_list, run_args.single,
        int(run_args.minmapq), run_args.multmapfilter, int(run_args.mindepth),
        int(run_args.minmutreads), int(run_args.readlength), temp_out_dir,
        insert_size, invalid_log, run_log, success_file)
    invalid_log.close()

    # step3: get reads by region bed and write bam file
    print "step3: get reads by region bed and write bam file"
    chrom_list, used_bam_file_tmp, exclude_bam_file_tmp = get_reads_by_region(
        run_args.bamfile, sv_list, temp_out_dir)

    # write reads which may probably used to used.bam and reads should not be used to exclude.bam
    used_bam_file, exclude_bam_file = write_sub_bam(
        chrom_list, used_bam_file_tmp, exclude_bam_file_tmp, temp_out_dir,
        total_modify_reads_file, total_delete_reads_file, total_add_reads_file,
        int(run_args.process))

    # step4: merge edited reads and remap to new bam, consider about the tag, RG, life reads
    print "step4: merge edited reads and remap to new bam, consider about the tag, RG, life reads"
    edit_remap_bam_file = merge_edit_bam(
        run_args.bamfile, temp_out_dir, run_args.single,
        total_modify_reads_file, total_add_reads_file, used_bam_file,
        total_modify_list, total_add_list, run_args.seqer, run_args.aligner,
        run_args.alignerIndex, run_args.floworder, run_args.libkey,
        run_args.barcode, run_args.tag)

    # step5: remapped edit reads and merge
    print "step5: remapped edit reads and merge"
    out_bam_file = os.path.join(run_args.outdir, "edit.sorted.bam")
    bamMerge([edit_remap_bam_file, exclude_bam_file], out_bam_file)
    bamIndex(out_bam_file)

    end_time = time.asctime(time.localtime(time.time()))
    # print end_time
    # speed_time = end_time - start_time
    print "Edit Bam is completed! Result see %s and valid mutation see %s. Invalid mutation can't be spike in see %s." % (
        out_bam_file, success_file, invalid_log_file)
Example #8
0
def merge_edit_bam(bam_file, out_dir, is_single, total_modify_reads,
                   total_add_reads, used_reads, seqer, aligner, aligner_index,
                   flow_order, lib_key, barcode, tag):
    bam = pysam.AlignmentFile(bam_file, 'rb')
    edit_bam_file = os.path.join(out_dir, "edit.bam")
    edit_bam = pysam.AlignmentFile(edit_bam_file, 'wb', template=bam)
    readname_convert_file = os.path.join(out_dir, "readname_convert.txt")
    fout_convert = open(readname_convert_file, 'w')
    edit_bam_reads = {}
    if is_single:
        for read_pair in total_modify_reads:
            read1 = read_pair[0]
            keyname_read1 = getKeyName(read1)
            orig_read1 = used_reads[keyname_read1]
            new_read1 = copy.deepcopy(orig_read1)
            new_read1.query_sequence = read1.query_sequence
            new_read1.query_qualities = read1.query_qualities
            new_name = read1.query_name.split(
                ":")[0] + ":" + get_new_readname()
            new_read1.query_name = new_name
            if seqer == "life":
                new_read1 = deal_life_reads(new_read1, flow_order, lib_key,
                                            barcode)
            if tag:
                new_read1 = add_tag(new_read1)
            edit_bam.write(new_read1)
            fout_convert.write(
                "%s: %s, %s, %s-%s\n" %
                (new_name, orig_read1.query_name, new_read1.is_read1,
                 new_read1.reference_start, new_read1.reference_end))
            strand = getReadStrand(new_read1)
            if new_name not in edit_bam_reads:
                edit_bam_reads[new_name] = dict()
            edit_bam_reads[new_name][strand] = new_read1

        for read_pair in total_add_reads:
            read1 = read_pair[0]
            keyname_read1 = getKeyName(read1)
            orig_read1 = used_reads[keyname_read1]
            new_read1 = copy.deepcopy(orig_read1)
            new_name = get_new_readname()
            new_read1.query_name = new_name
            edit_bam.write(new_read1)
            fout_convert.write(
                "%s: %s, %s, %s-%s\n" %
                (new_name, orig_read1.query_name, new_read1.is_read1,
                 new_read1.reference_start, new_read1.reference_end))
            strand = getReadStrand(new_read1)
            if new_name not in edit_bam_reads:
                edit_bam_reads[new_name] = dict()
            edit_bam_reads[new_name][strand] = new_read1

    else:
        for read_pair in total_modify_reads + total_add_reads:
            read1 = read_pair[0]
            read2 = read_pair[1]
            keyname_read1 = getKeyName(read1)
            keyname_read2 = getKeyName(read2)
            orig_read1 = used_reads[keyname_read1]
            orig_read2 = used_reads[keyname_read2]
            orig_read1_name = orig_read1.query_name
            orig_read2_name = orig_read2.query_name
            new_read1 = copy.deepcopy(orig_read1)
            new_read2 = copy.deepcopy(orig_read2)
            new_read1.query_sequence = read1.query_sequence
            new_read1.query_qualities = read1.query_qualities
            new_read2.query_sequence = read2.query_sequence
            new_read2.query_qualities = read2.query_qualities
            new_name = get_new_readname()
            new_read1.query_name = new_name
            new_read2.query_name = new_name
            strand1 = getReadStrand(new_read1)
            strand2 = getReadStrand(new_read2)
            if new_name not in edit_bam_reads:
                edit_bam_reads[new_name] = dict()
            edit_bam_reads[new_name][strand1] = new_read1
            edit_bam_reads[new_name][strand2] = new_read2
            if tag:
                new_read1 = add_tag(new_read1)
                new_read2 = add_tag(new_read2)

            fout_convert.write("%s: %s, %s, %s, %s, %s-%s, %s-%s\n" % (
                new_name,
                orig_read1_name,
                orig_read2_name,
                new_read1.is_read1,
                new_read2.is_read2,
                new_read1.reference_start,
                new_read1.reference_end,
                new_read2.reference_start,
                new_read2.reference_end,
            ))
            edit_bam.write(new_read1)
            edit_bam.write(new_read2)
    fout_convert.close()
    edit_bam.close()

    edit_remap_bam_file = os.path.join(out_dir, "edit.remap.bam")
    remap(aligner_index, edit_bam_file, edit_remap_bam_file, aligner,
          is_single)

    if not is_single:
        editRemap = pysam.AlignmentFile(edit_remap_bam_file, 'rb')
        editRemapBam_addRG_File = os.path.join(out_dir, "edit.remap.addRG.bam")
        bamAddRG(editRemap, edit_bam_reads, bam, editRemapBam_addRG_File)
        editRemap.close()
    else:
        editRemapBam_addRG_File = edit_remap_bam_file
    bamIndex(editRemapBam_addRG_File)
    return editRemapBam_addRG_File