def init_file_names(chr, event,tmpbams_path, haplotypedir):
    
    flist=[]
    splitbams = params.GetSplitBamsPath()
    roibam = "/".join([tmpbams_path ,chr + event +"_roi.bam"])
    sortbyname =  "/".join([splitbams,  chr + '.byname.bam'])
    sortbyCoord = "/".join([splitbams,  chr + '.bam'])
    hetsnp   = "/".join([haplotypedir, event+'_het_snp_' + chr + '.bed'])
    flist.extend([roibam,sortbyname,sortbyCoord,hetsnp])
    return flist
Exemple #2
0
def find_non_roi_bam(chr_list):
    """
    Extract paired reads from original bam using generated non-ROI bed. 
    """
    chr = chr_list
    splitbams = params.GetSplitBamsPath()
    
    sortbyname = "/".join([splitbams, chr + '.byname.bam'])
    sortbyCoord = "/".join([splitbams, chr + '.bam'])
    nonroi = "/".join([finalbams_path, chr + "_non_roi.bam"])
    exonsnonroibed = "/".join([haplotype_path, chr + "_non_roi.bed"])

    success = False
    try:
        if not terminating.is_set():
            nonroisort = sub('.bam$', '.sorted', nonroi)
            if os.path.isfile(nonroisort):
                success = True

            else:
                if os.path.isfile(exonsnonroibed):

                    cmd = " ".join(["sort -u", exonsnonroibed, "-o", exonsnonroibed]);
                    runCommand(cmd)
                    print(" ___ extracting non-roi bams  ___")
                    extractAllReadsfromROI(sortbyCoord, exonsnonroibed, nonroi)
                    removeIfEmpty(finalbams_path, ntpath.basename(nonroi))
                    pysam.sort(nonroi, nonroisort)
                    pysam.index(nonroisort + '.bam')
                    os.remove(nonroi)
                    success = True

                else:
                    logger.debug(exonsnonroibed + ' does not exist!')
                    return

    except (KeyboardInterrupt):
        logger.error('Exception Crtl+C pressed in the child process  in find_roi_bam for chr ' + chr)
        terminating.set()
        success = False
        return
    
    except Exception as e:
        logger.exception("Exception in find_non_roi_bam %s", e)
        terminating.set()
        success = False
        return
    
    if (success):
        logger.debug("find_non_roi_bam complete successfully for " + chr)
    
    return
Exemple #3
0
def init_file_names(chr, tmpbams_path, haplotypedir, event=''):
    flist = []

    roibam = "/".join([tmpbams_path, chr + "_roi" + event + ".bam"])
    splitbams = params.GetSplitBamsPath()
    hetsnp = "/".join([haplotypedir, chr + '_het_snp' + event + '.bed'])

    if (not splitbams):
        splitbams = "/".join([res_path, 'splitbams'])

    sortbyname = "/".join([splitbams, chr + '.byname.bam'])
    sortbyCoord = "/".join([splitbams, chr + '.bam'])

    flist.extend([roibam, sortbyname, sortbyCoord, hetsnp])
    return flist
Exemple #4
0
def mergeSortBamFiles(mergedBamfn, finalbamdir):
    java_path, beagle_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath(
    )
    command = ""
    os.chdir(finalbamdir)
    matches = []
    num_files = 0

    for root, dirnames, filenames in os.walk(finalbamdir):
        for filename in fnmatch.filter(filenames, '*.bam'):

            path = os.path.join(root, filename)
            if os.path.islink(path):
                path = os.path.realpath(path)

            if not matches.__contains__(path):
                matches.append(path)
                command = " ".join([path, command])
                num_files = num_files + 1

    if num_files > 1:
        command2 = " ".join([
            sambamba_path, "merge", mergedBamfn, command, "--nthreads",
            str(4)
        ])
        runCommand(command2)
    elif num_files == 1:

        if str(command.strip()).endswith("GAIN.bam"):
            path, fname = os.path.split(str(command.strip()))
            inbam_original = '/'.join(
                [params.GetSplitBamsPath(),
                 sub('_gain', '', fname.lower())])

            command2 = " ".join([
                sambamba_path, "merge", mergedBamfn, command, inbam_original,
                "--nthreads",
                str(4)
            ])
            runCommand(command2)

        elif str(command.strip()).endswith("LOSS.bam"):

            outbam = sub('.bam$', '.sort.bam', str(command.strip()))
            sortBam(command, outbam, finalbamdir)
            os.remove(str(command.strip()))
Exemple #5
0
def init_file_names(chr, tmpbams_path, haplotypedir, event):
    """
    Initialize file names for: 
    ROI bam, chromosome sorted by name, chromosome sorted by coordinate, and heterozygous SNPs bed.
    """
    flist = []

    roibam = "/".join([tmpbams_path, chr + "_roi" + event + ".bam"])
    splitbams = params.GetSplitBamsPath()
    hetsnp = "/".join([haplotypedir, chr + '_het_snp' + event + '.bed'])

    if not splitbams:
        splitbams = "/".join([res_path, 'splitbams'])

    sortbyname = "/".join([splitbams, chr + '.byname.bam'])
    sortbyCoord = "/".join([splitbams, chr + '.bam'])

    flist.extend([roibam, sortbyname, sortbyCoord, hetsnp])
    return flist
Exemple #6
0
def CreateFileList(file_type, num_files, path, flag=None):

    sentinel_path, results_path, haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path = GetProjectNamePathRunID(
    )
    job_list = []
    infile_list = ParseInfile(params.GetInfile())

    for line in infile_list:
        file_list = []
        job_info = line

        ## Find the tumour_id for the sample
        sample_id = GetSampleIDs(line)

        if (flag is None):
            if file_type == 'bam':
                file_list.append(line[1])

            elif num_files == 1:

                file_list.append(path + file_type.format(sample_id))
            elif num_files >= 2:

                for chr in chr_list:
                    file_list.append(path + file_type.format('chr', str(chr)))
            else:
                file_list.append('')
            job_list.append(file_list)

        elif (flag == "extractROI"):
            for chr, event in itertools.product(chr_list, event_list):
                exonsinroibed = "/".join([
                    haplotype_path,
                    event + "_exons_in_roi_" + 'chr' + str(chr) + '.bed'
                ])
                if (os.path.isfile(exonsinroibed)):
                    splittmpbams = "/".join([path])
                    file_list.append(splittmpbams + '/' + 'chr' +
                                     file_type.format(chr, event))
                    job_list.append(file_list)

        elif (flag == "gain"):

            for chr in chr_list:
                splittmpbams = "/".join([path])
                if (os.path.isfile(splittmpbams + '/' + 'chr' + str(chr) +
                                   '.gain.roi.sorted.bam')):
                    file_list.append(splittmpbams + '/' + 'chr' +
                                     file_type.format(chr, "gain"))
                    job_list.append(file_list)

        elif (flag == "loss"):

            for chr in chr_list:
                splittmpbams = "/".join([path])
                if (os.path.isfile(splittmpbams + '/' + 'chr' + str(chr) +
                                   '.loss.roi.sorted.bam')):
                    file_list.append(splittmpbams + '/' + 'chr' +
                                     file_type.format(chr, "loss"))
                    job_list.append(file_list)

        elif (flag == "FINAL"):

            for chr, event in itertools.product(chr_list, event_list):
                chrbam = "/".join([
                    finalbams_path,
                    'CHR' + str(chr) + '_' + event.upper() + '.bam'
                ])
                sortbyCoord = "/".join(
                    [params.GetSplitBamsPath(), 'chr' + str(chr) + '.bam'])
                if (os.path.isfile(chrbam)):
                    file_list.append(chrbam)
                    job_list.append(file_list)
                elif (event == 'loss' and sortbyCoord):

                    os.symlink(sortbyCoord, chrbam)

    return job_list
Exemple #7
0
def run_pipeline(results_path):
    print(results_path)
    global haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path, log_path, logfile, terminating, logger, logQueue, res_path
    res_path = results_path
    haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path, log_path, logfile = handle.GetProjectPaths(
        results_path)
    terminating, logger, logQueue = handle.GetLoggings(logfile)

    chr_list = ['chr' + str(x) for x in range(1, 23)]
    chr_list.extend(['chrX', 'chrY'])

    t0 = time.time()
    outbamfn = params.GetOutputFileName()

    cnv_list = glob.glob("/".join([params.GetCNVDir(), '*.*']))
    chromosome_event = create_chr_event_list(cnv_list, chr_list)

    logger.debug('pipeline started!')

    phase_path = '/'.join([results_path, 'phasedvcfdir'])
    if not os.path.exists('/'.join([results_path, 'phasedvcfdir'])):
        os.makedirs(phase_path)

    initialize0(phase_path, cancer_dir_path)

    for cnv_path in cnv_list:
        initialize_pipeline(phase_path, haplotype_path, cnv_path)

    pool1 = multiprocessing.Pool(
        processes=12,
        initializer=initPool,
        initargs=[logQueue, logger.getEffectiveLevel(), terminating])
    try:

        if not params.GetSplitBamsPath():

            if not os.path.exists("/".join([res_path, 'splitbams'])):
                os.makedirs("/".join([res_path, 'splitbams']))
                params.SetSplitBamsPath("/".join([res_path, 'splitbams']))

            result0 = pool1.map_async(split_bam_by_chr,
                                      chromosome_event).get(9999999)

        result1 = pool1.map_async(find_roi_bam, chromosome_event).get(9999999)
        result2 = pool1.map_async(implement_cnv, chromosome_event).get(9999999)
        pool1.close()
    except KeyboardInterrupt:
        logger.debug('You cancelled the program!')
        pool1.terminate()
    except Exception as e:
        logger.exception("Exception in main %s", e)
        pool1.terminate()
    finally:
        pool1.join()
    time.sleep(.1)
    mergeSortBamFiles(outbamfn, finalbams_path)
    t1 = time.time()
    shutil.rmtree(tmpbams_path)
    logger.debug(' ***** pipeline finished in ' +
                 str(round((t1 - t0) / 60.0, 1)) + ' minutes ***** ')
    logging.shutdown()