Ejemplo n.º 1
0
def implement_cnv(chromosome_event):
    chr, event = chromosome_event.split("_")

    logger.debug("___ Bamgineer main engine started ___")
    success = True
    try:
        if not terminating.is_set():
            bamfn, sortbyname, sortbyCoord, bedfn = init_file_names(
                chr, tmpbams_path, haplotype_path, event)
            bamsortfn = sub('.bam$', '.sorted.bam', bamfn)

            if (os.path.isfile(bedfn)):
                fn = list(csv.reader(open(bedfn, 'rb'), delimiter='\t'))
                copy_number = int(fn[0][6])

                if (not params.GetXY() or (chr != 'chrX' and chr != 'chrY')):

                    if (copy_number == 2):
                        event = 'loh'
                    elif (copy_number == 3):
                        event = 'gain'
                    elif (copy_number > 3):
                        event = 'amp'

                else:

                    logger.debug("*** handling single sex chromosome for: " +
                                 ntpath.basename(bamsortfn))
                    if (copy_number == 1):
                        event = 'loh'
                    elif (copy_number == 2):
                        event = 'gain'
                    elif (copy_number > 2):
                        event = 'amp'

                if (event.startswith('amp') or event.startswith('gain')):

                    bamrepairedsortfn = sub('.sorted.bam$',
                                            ".re_paired.sorted.bam", bamsortfn)
                    mergedsortfn = sub('.sorted.bam$',
                                       ".mutated_merged.sorted.bam",
                                       bamrepairedsortfn)
                    GAIN_FINAL = "/".join(
                        [finalbams_path,
                         str(chr).upper() + '_GAIN.bam'])

                    if os.path.isfile(bamsortfn):

                        re_pair_reads(bamsortfn, copy_number)
                        mutate_reads(bamrepairedsortfn, chr, event)
                        coverageratio = float(
                            countReads(mergedsortfn)) / float(
                                countReads(bamsortfn))
                        logger.debug("+++ coverage ratio for: " +
                                     ntpath.basename(bamsortfn) + ": " +
                                     str(coverageratio))

                        if coverageratio < copy_number - 2:
                            logger.error('not enough reads for ' +
                                         ntpath.basename(bamsortfn))
                            return
                        else:
                            samplerate = float(copy_number - 2) / coverageratio
                            subsample(mergedsortfn, GAIN_FINAL,
                                      str(samplerate))

                elif event == 'loss':

                    inbam_deletion = "/".join(
                        [finalbams_path,
                         str(chr).upper() + '_LOSS.bam'])
                    if os.path.isfile(bamsortfn):

                        mutate_reads(bamsortfn, chr, 'loss')
                        mergedsortfn = sub('.sorted.bam$',
                                           ".mutated_merged.sorted.bam",
                                           bamsortfn)
                        mergedsortsampledfn = sub(
                            '.sorted.bam$',
                            ".mutated_merged.sampled.sorted.bam", bamsortfn)

                        ratio_kept = float(countReads(bamsortfn)) / float(
                            countReads(bamfn))
                        samplerate = round(0.5 / ratio_kept, 2)
                        LOSS_FINAL = "/".join(
                            [finalbams_path,
                             str(chr).upper() + '_LOSS.bam'])
                        logger.debug("ratios kept for:" +
                                     ntpath.basename(bamsortfn) + ": " +
                                     str(ratio_kept))
                        subsample(mergedsortfn, mergedsortsampledfn,
                                  str(samplerate))
                        bamDiff(sortbyCoord, mergedsortsampledfn, tmpbams_path)
                        os.rename(
                            "/".join(
                                [tmpbams_path, 'diff_only1_' + chr + '.bam']),
                            LOSS_FINAL)

                    elif (not os.path.isfile(inbam_deletion)
                          and os.path.isfile(sortbyCoord)
                          ):  # if it exists from previous runs

                        os.symlink(sortbyCoord, inbam_deletion)

            else:
                logger.debug(bedfn + ' does not exist!')
                success = False

    except (KeyboardInterrupt):
        logger.error(
            'Exception Crtl+C pressed in the child process  in find_roi_bam for chr '
            + chr + event)
        terminating.set()
        success = False
        return
    except Exception as e:
        logger.exception("Exception in find_roi_bam %s", e)
        terminating.set()
        success = False
        return
    if (success):
        logger.debug("implement_cnv complete successfully for " + chr + event)
    return
Ejemplo n.º 2
0
def mutate_reads(bamsortfn, chr, event=''):
    fn, sortbyname, sortbyCoord, bedfn = init_file_names(
        chr, tmpbams_path, haplotype_path, event)
    cmd = " ".join(["sort -u", bedfn, "-o", bedfn])
    runCommand(cmd)
    hetbamfn = sub('.sorted.bam$', ".mutated_het.bam", bamsortfn)
    hetbamfnsorted = sub('.sorted.bam$', ".mutated_het.sorted", bamsortfn)
    allreadsfn = sub('.sorted.bam$', ".all.reads.bam", bamsortfn)
    allreadssortfn = sub('.sorted.bam$', ".all.reads.sorted", bamsortfn)
    mergedsortfn = sub('.sorted.bam$', ".mutated_merged.sorted.bam", bamsortfn)
    try:
        if not terminating.is_set():

            if (os.path.isfile(bamsortfn) and os.path.isfile(bedfn)):

                samfile = pysam.Samfile(bamsortfn, "rb")
                alignmentfile = pysam.AlignmentFile(bamsortfn, "rb")
                outbam = pysam.Samfile(hetbamfn, 'wb', template=samfile)
                allreads = pysam.Samfile(allreadsfn, 'wb', template=samfile)

                bedfile = open(bedfn, 'r')
                covpath = "/".join(
                    [haplotype_path, "written_coverage_het.txt"])
                covfile = open(covpath, 'w')
                snpratiopath = "/".join([haplotype_path, "het_snp_ratio.txt"])
                snpaltratiofile = open(snpratiopath, 'w')
                writtenreads = []

                num_reads_written = 0
                num_total_reads = 0

                for bedline in bedfile:
                    c = bedline.strip().split()

                    if (len(c) == 7):
                        chr2 = c[0]
                        chr = c[0].strip("chr")
                        start = int(c[1])
                        end = int(c[2])
                        refbase = str(c[3])
                        altbase = str(c[4])
                        haplotype = str(c[5])
                        copy_number = int(c[6])
                    else:
                        continue

                    readmappings = alignmentfile.fetch(chr2, start, end)

                    # sex chromosome
                    if (params.GetXY() and (chr == 'chrX' or chr == 'chrY')):
                        haplotype = 'hap1'
                        print('sex chromosome ' + str(chr))

                    for shortread in readmappings:

                        allreads.write(shortread)
                        num_total_reads += 1
                        problem_with_read = False

                        try:
                            index = shortread.get_reference_positions(
                                full_length=True).index(start)
                            tmpread = shortread.query_sequence
                            qual = shortread.query_qualities
                            mutated_hap1 = tmpread[:index] + altbase + tmpread[
                                index + 1:]
                            mutated_hap2 = tmpread[:index] + refbase + tmpread[
                                index + 1:]
                            if (haplotype == "hap1"):
                                shortread.query_sequence = mutated_hap1

                            elif (haplotype == "hap2"):
                                shortread.query_sequence = mutated_hap2

                            shortread.query_qualities = qual

                        except Exception as e:
                            print('Exception! ')
                            problem_with_read = True
                            pass

                        if (not problem_with_read):
                            outbam.write(shortread)
                            num_reads_written += 1

                outbam.close()
                allreads.close()

                sortBam(hetbamfn, hetbamfnsorted + '.bam', tmpbams_path)
                sortBam(allreadsfn, allreadssortfn + '.bam', tmpbams_path)

                os.remove(hetbamfn)
                os.remove(allreadsfn)

                # ratio of het reads to nonhet reads, we need to adjust the coverage
                ratio = float(num_reads_written) / float(num_total_reads)
                bamsortfnsampled = sub('.sorted.bam$', ".sampled.nh.bam",
                                       bamsortfn)
                subsample(bamsortfn, bamsortfnsampled, str(ratio))
                bamDiff(bamsortfnsampled, allreadssortfn + '.bam',
                        tmpbams_path)

                if ("/".join([
                        tmpbams_path,
                        'diff_only1_' + os.path.basename(bamsortfnsampled)
                ])):
                    merge_bams(
                        "/".join([
                            tmpbams_path,
                            'diff_only1_' + os.path.basename(bamsortfnsampled)
                        ]), hetbamfnsorted + '.bam', mergedsortfn)
                    os.remove("/".join([
                        tmpbams_path,
                        'diff_only1_' + os.path.basename(bamsortfnsampled)
                    ]))

                os.remove(bamsortfnsampled)
                os.remove(allreadssortfn + '.bam')
                os.remove(allreadssortfn + '.bam.bai')

                os.remove(hetbamfnsorted + '.bam')
                os.remove(hetbamfnsorted + '.bam.bai')

    except (KeyboardInterrupt):
        logger.error(
            'Exception Crtl+C pressed in the child process  in mutaute_reads')
        terminating.set()
        return
    except Exception as e:
        logger.exception("Exception in mutate_reads %s", e)
        terminating.set()
        return
    return