Example #1
0
def test_merge_bam():
    with get_input_files('1.bam',
                         '1.bam') as input_files, get_tmp_path() as outpath:
        Bam.merge(input_files, outpath)
        alignment_count_output = int(view("-c", outpath).strip())
        alignment_count_input = int(view("-c", input_files[0]).strip()) * 2
        assert alignment_count_input == alignment_count_output
def setup_module():
    # This function is run once for this module
    for bam_path in bam_files:
        assert bam_path.endswith(".bam")
        sam_path = bam_path[:-4] + ".sam"
        pysam.view(sam_path, "-b", "-o", bam_path, catch_stdout=False)
        pysam.index(bam_path, catch_stdout=False)
Example #3
0
def checkSamtoolsViewEqual(filename1, filename2, without_header=False):
    '''return true if the two files are equal in their
    content through samtools view.
    '''

    # strip MD and NM tags, as not preserved in CRAM files
    args = ["-x", "MD", "-x", "NM"]
    if not without_header:
        args.append("-h")

    lines1 = pysam.view(*(args + [filename1]))
    lines2 = pysam.view(*(args + [filename2]))

    if len(lines1) != len(lines2):
        return False

    if lines1 != lines2:
        # line by line comparison
        # sort each line, as tags get rearranged between
        # BAM/CRAM
        for n, pair in enumerate(zip(lines1, lines2)):
            l1, l2 = pair
            l1 = sorted(l1[:-1].split("\t"))
            l2 = sorted(l2[:-1].split("\t"))
            if l1 != l2:
                print("mismatch in line %i" % n)
                print(l1)
                print(l2)
                return False
        else:
            return False

    return True
Example #4
0
def checkSamtoolsViewEqual(filename1, filename2,
                           without_header=False):
    '''return true if the two files are equal in their
    content through samtools view.
    '''

    # strip MD and NM tags, as not preserved in CRAM files
    args = ["-x", "MD", "-x", "NM"]
    if not without_header:
        args.append("-h")

    lines1 = pysam.view(*(args + [filename1]))
    lines2 = pysam.view(*(args + [filename2]))

    if len(lines1) != len(lines2):
        return False

    if lines1 != lines2:
        # line by line comparison
        # sort each line, as tags get rearranged between
        # BAM/CRAM
        for n, pair in enumerate(zip(lines1, lines2)):
            l1, l2 = pair
            l1 = sorted(l1[:-1].split("\t"))
            l2 = sorted(l2[:-1].split("\t"))
            if l1 != l2:
                print "mismatch in line %i" % n
                print l1
                print l2
                return False
        else:
            return False

    return True
Example #5
0
    def execute(self, inBam, exclude, readList, outBam, picardOptions=None, JVMmemory=None):    # pylint: disable=W0221
        picardOptions = picardOptions or []

        if tools.samtools.SamtoolsTool().isEmpty(inBam):
            # Picard FilterSamReads cannot deal with an empty input BAM file
            shutil.copyfile(inBam, outBam)
        elif os.path.getsize(readList) == 0:
            # Picard FilterSamReads cannot deal with an empty READ_LIST_FILE
            if exclude:
                shutil.copyfile(inBam, outBam)
            else:
                tmpf = util.file.mkstempfname('.sam')
                if inBam.endswith('.sam'):
                    # output format (sam/bam) is inferred by samtools based on file extension
                    header = pysam.view('-o', tmpf, '-H', '-S', inBam, catch_stdout=False)
                else:
                    header = pysam.view('-o', tmpf, '-H', inBam, catch_stdout=False)
                # pysam.AlignmentFile cannot write an empty file
                # samtools cannot convert SAM -> BAM on an empty file
                # but Picard SamFormatConverter can deal with empty files
                opts = ['INPUT=' + tmpf, 'OUTPUT=' + outBam, 'VERBOSITY=ERROR']
                PicardTools.execute(self, 'SamFormatConverter', opts, JVMmemory='50m')
        else:
            opts = [
                'INPUT=' + inBam, 'OUTPUT=' + outBam, 'READ_LIST_FILE=' + readList,
                'FILTER=' + (exclude and 'excludeReadList' or 'includeReadList'), 'WRITE_READS_FILES=false'
            ]
            PicardTools.execute(self, self.subtoolName, opts + picardOptions, JVMmemory)
Example #6
0
def test_sieve():
    """
    Test filtering a BAM file by MAPQ, flag, and blacklist
    """
    outfile = '/tmp/test_sieve.bam'
    outfiltered = '/tmp/test_sieveFiltered.bam'
    outlog = '/tmp/test_sieve.log'
    args = '-b {} --smartLabels --minMappingQuality 10 --samFlagExclude 512 -bl {} -o {} --filterMetrics {} --filteredOutReads {}'.format(
        BAMFILE_FILTER, BEDFILE_FILTER, outfile, outlog, outfiltered).split()
    sieve.main(args)

    _foo = open(outlog, 'r')
    resp = _foo.readlines()
    _foo.close()

    expected = [
        '#bamFilterReads --filterMetrics\n',
        '#File\tReads Remaining\tTotal Initial Reads\n',
        'test_filtering\t5\t193\n'
    ]
    assert_equal(resp, expected)
    unlink(outlog)
    h = hashlib.md5(pysam.view(outfile).encode('utf-8')).hexdigest()
    assert (h == "acbc4443fb0387bfd6c412af9d4fc414")
    unlink(outfile)

    h1 = hashlib.md5(pysam.view(outfiltered).encode('utf-8')).hexdigest()
    assert (h1 == "b90befdd5f073f14acb9a38661f301ad")
    unlink(outfiltered)
Example #7
0
    def execute(self, inBam, exclude, readList, outBam, picardOptions=None, JVMmemory=None):
        picardOptions = picardOptions or []

        if os.path.getsize(readList) == 0:
            # Picard FilterSamReads cannot deal with an empty READ_LIST_FILE
            if exclude:
                shutil.copyfile(inBam, outBam)
            else:
                tmpf = util.file.mkstempfname('.sam')
                with open(tmpf, 'wt') as outf:
                    if inBam.endswith('.sam'):
                        header = pysam.view('-H', '-S', inBam)
                    else:
                        header = pysam.view('-H', inBam)
                    for line in header:
                        outf.write(line)
                # pysam.AlignmentFile cannot write an empty file
                # samtools cannot convert SAM -> BAM on an empty file
                # but Picard SamFormatConverter can deal with empty files
                opts = ['INPUT=' + tmpf, 'OUTPUT=' + outBam, 'VERBOSITY=ERROR']
                PicardTools.execute(self, 'SamFormatConverter', opts, JVMmemory='50m')
        else:
            opts = ['INPUT=' + inBam, 'OUTPUT=' + outBam, 'READ_LIST_FILE=' + readList, 'FILTER=' +
                    (exclude and 'excludeReadList' or 'includeReadList'), 'WRITE_READS_FILES=false']
            PicardTools.execute(self, self.subtoolName, opts + picardOptions, JVMmemory)
Example #8
0
def Downsample(k, path_bam, output_path, reads_min_count):

    base_name = os.path.basename(path_bam)
    path_new = output_path + "/" + base_name.split(".bam")[0] + ".DS.bam"
    path_temp = output_path + "/" + base_name.split(".bam")[0] + ".temp.sorted"
    path_new_sort = output_path + "/" + base_name.split(
        ".bam")[0] + ".sorted_DS.bam"
    reads_count = count_reads(path_bam, output_path)
    s = reads_min_count / reads_count
    string = str(s)
    s = string.split('.')[1]

    if reads_count <= reads_min_count:
        shutil.copy2(path_bam, path_new_sort)
        #os.rename(path_bam, path_new_sort)
        pysam.index(path_new_sort)

    else:
        #		pysam.view("-b","-s",str(k)+'.'+str(s),"-O","BAM","-o",path_new,path_bam,catch_stdout=False)
        pysam.view("-b",
                   "-s",
                   str(k) + '.' + str(s),
                   "-O",
                   "BAM",
                   "-o",
                   path_new,
                   path_bam,
                   catch_stdout=False)
        pysam.sort("-O", "BAM", "-T", path_temp, "-o", path_new_sort, path_new)
        pysam.index(path_new_sort)
        os.remove(path_new)
Example #9
0
def create_bam(filename, threads=0):
    """
    Function that create a BAM file from a SAM file.

    Args :
        filename [STR] = SAM filename

    Returns:
        bamfile [STR] = BAM filename
    """
    # name of the bam file to create
    bamfile = os.path.dirname(filename)[:-3] + "bam/" + os.path.basename(
        filename)[:-3] + "bam"
    # convert sam to bam using pysam
    pysam.view('-@',
               str(threads - 1),
               '-S',
               '-b',
               '-o',
               bamfile,
               filename,
               catch_stdout=False)
    os.remove(filename)

    return bamfile
Example #10
0
def filter_bam(in_fpath, out_fpath, min_mapq=0, required_flag_tags=None,
               filtering_flag_tags=None, regions=None):
    cmd = ['-bh']

    # The following line:
    cmd.append('-o' + out_fpath)
    # should be
    # cmd.extend(['-o', out_fpath])
    # but it is a workaround, take a look at:
    # https://groups.google.com/forum/#!msg/pysam-user-group/ooHgIiNVe4c/CcY06d45rzQJ

    if min_mapq:
        cmd.extend(['-q', str(min_mapq)])

    if required_flag_tags:
        flag = create_flag(required_flag_tags)
        cmd.extend(['-f', str(flag)])

    if filtering_flag_tags:
        flag = create_flag(filtering_flag_tags)
        cmd.extend(['-F', str(flag)])

    cmd.extend([in_fpath])

    if regions:
        regions = ['{0}:{1}-{2}'.format(*s) for s in regions.segments]
        cmd.extend(regions)

    pysam.view(*cmd)
Example #11
0
def setup_module():
	# This function is run once for this module
	for bam_path in bam_files:
		assert bam_path.endswith('.bam')
		sam_path = bam_path[:-4] + '.sam'
		pysam.view(sam_path, '-b', '-o', bam_path, catch_stdout=False)
		pysam.index(bam_path, catch_stdout=False)
def bam_to_sam(bamfile, odir, bname):
    samfile = odir + bname + '.sam'
    # print()
    # print('Using bamfile: '+bamfile)
    # print('to make samfile: '+samfile)
    pysam.view('-h', bamfile, '-o', samfile, catch_stdout=False)
    return samfile
Example #13
0
def only_mapped(filename, threads=1):
    """
    Function that keep only mapped reads in the BAM file

    Args:
        filename [STR] = BAM file, containing all alignments

    Returns:
        mappedfile [STR] = BAM file with only the mapped reads
    """
    # new bamfile name
    mappedfile = '{0}/filtered_{1}'.format(os.path.dirname(filename),
                                           os.path.basename(filename)[7:])
    # get only mapped reads
    pysam.view('-@',
               str(threads - 1),
               '-b',
               '-F',
               '4',
               filename,
               '-o',
               mappedfile,
               catch_stdout=False)

    return mappedfile
Example #14
0
def run_mapping(num_threads, reference_file, fastq_files, output_path, sample_name, remove_large_files):
    '''Run mapping with bwa to create a SAM file, then convert it to BAM, sort and index the file'''
    logging.info("Starting mapping with BWA")
    output_file = output_path + '/' + sample_name
    logging.info("Creating output file: {}.sorted.bam".format(output_file))
    if len(fastq_files) == 1:
        bwacommand = ['bwa', 'mem', '-t', num_threads, reference_file, fastq_files[0]]
    if len(fastq_files) == 2:
        bwacommand = ['bwa', 'mem', '-t', num_threads, reference_file, fastq_files[0], fastq_files[1]]
    
    with open(output_file + '.sam', 'w') as g:
        p1 = subprocess.Popen(bwacommand, stdout=g)
    p1.communicate()
    p1.wait()
    pysam.view('-Sb', '-@', num_threads,  output_file + '.sam', '-o', output_file + '.bam', catch_stdout=False)
    
    pysam.sort('-@',  num_threads, output_file + '.bam', '-o', output_file + '.sorted.bam', catch_stdout=False)
    pysam.index(output_file + '.sorted.bam', catch_stdout=False)
    os.remove(output_file + '.sam')
    os.remove(output_file + '.bam')
    if remove_large_files:
        os.remove(fastq_files[0])
        if len(fastq_files)==2:
            os.remove(fastq_files[1])
    logging.info("Finished mapping")
    return(output_file + '.sorted.bam')
Example #15
0
    def execute(self, inBam, exclude, readList, outBam, picardOptions=None, JVMmemory=None):    # pylint: disable=W0221
        picardOptions = picardOptions or []

        if tools.samtools.SamtoolsTool().isEmpty(inBam):
            # Picard FilterSamReads cannot deal with an empty input BAM file
            shutil.copyfile(inBam, outBam)
        elif os.path.getsize(readList) == 0:
            # Picard FilterSamReads cannot deal with an empty READ_LIST_FILE
            if exclude:
                shutil.copyfile(inBam, outBam)
            else:
                tmpf = util.file.mkstempfname('.sam')
                if inBam.endswith('.sam'):
                    # output format (sam/bam) is inferred by samtools based on file extension
                    header = pysam.view('-o', tmpf, '-H', '-S', inBam, catch_stdout=False)
                else:
                    header = pysam.view('-o', tmpf, '-H', inBam, catch_stdout=False)
                # pysam.AlignmentFile cannot write an empty file
                # samtools cannot convert SAM -> BAM on an empty file
                # but Picard SamFormatConverter can deal with empty files
                opts = ['INPUT=' + tmpf, 'OUTPUT=' + outBam, 'VERBOSITY=ERROR']
                PicardTools.execute(self, 'SamFormatConverter', opts, JVMmemory='50m')
        else:
            opts = [
                'INPUT=' + inBam, 'OUTPUT=' + outBam, 'READ_LIST_FILE=' + readList,
                'FILTER=' + (exclude and 'excludeReadList' or 'includeReadList'), 'WRITE_READS_FILES=false'
            ]
            PicardTools.execute(self, self.subtoolName, opts + picardOptions, JVMmemory)
Example #16
0
 def _convert_bam_to_sam(self):
     pysam.view(
         "-h",
         "-o",
         self.aligning_result_path_sam,
         self.aligning_result_path_bam,
         catch_stdout=False,
     )
Example #17
0
def split_bam(bam):
    fwdbam = re.sub(r'bam$', 'fwd.bam', bam)
    revbam = re.sub(r'bam$', 'rev.bam', bam)
    pysam.view("-F", "16", "-h", "-b", "-o", fwdbam, bam, catch_stdout=False)
    pysam.view("-f", "16", "-h", "-b", "-o", revbam, bam, catch_stdout=False)
    pysam.index(fwdbam)
    pysam.index(revbam)
    return fwdbam, revbam
Example #18
0
def generate_bam_file(sam_content, file_prefix):
    sam_file = "{}.sam".format(file_prefix)
    bam_file = "{}.bam".format(file_prefix)
    sam_fh = open(sam_file, "w")
    sam_fh.write(sam_content)
    sam_fh.close()
    pysam.view("-Sb", "-o{}".format(bam_file), sam_file, catch_stdout=False)
    pysam.index(bam_file)
Example #19
0
 def convert_to_sam(self):
     '''
     Convert input BAM to SAM format
     '''
     sorted_output = self.bam_output[:-4] + ".sorted.bam"
     sam_output = sorted_output[:-4] + ".sam"
     pysam.view("-h", "-o", sam_output, sorted_output, catch_stdout=False)
     sys.stderr.write('New sorted sam file: ' + str(sam_output) + '.sam\n')
Example #20
0
 def _generate_bam_file(self, sam_content, file_prefix):
     sam_file = "%s.sam" % file_prefix
     bam_file = "%s.bam" % file_prefix
     sam_fh = open(sam_file, "w")
     sam_fh.write(sam_content)
     sam_fh.close()
     pysam.view("-Sb", "-o%s" % bam_file, sam_file)
     pysam.index(bam_file)
    def bam_count_reads(bam_file, aligned=False):
        """
        Wrapper to count the number of (aligned) reads in a bam file
        """
        if aligned:
            return pysam.view("-c", "-F", "260", bam_file).strip()  # pylint: disable=no-member

        return pysam.view("-c", bam_file).strip()  # pylint: disable=no-member
Example #22
0
def split_reads_by_chrom(sam_file,
                         tmp_dir="/dev/shm/tmp_label_reads",
                         n_threads=1):
    """ Reads a SAM/BAM file and splits the reads into one file per chromosome.
        Returns a list of the resulting filenames."""

    ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
    print("[ %s ] Splitting SAM by chromosome..." % (ts))

    tmp_dir = tmp_dir + "/raw"
    os.system("mkdir -p %s" % (tmp_dir))

    if sam_file.endswith(".sam"):
        # Convert to bam
        ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
        print("[ %s ] -----Converting to bam...." % (ts))
        bam_file = tmp_dir + "/all_reads.bam"
        pysam.view("-b",
                   "-S",
                   "-@",
                   str(n_threads),
                   "-o",
                   bam_file,
                   sam_file,
                   catch_stdout=False)
    elif sam_file.endswith(".bam"):
        bam_file = sam_file
    else:
        raise ValueError("Please provide a .sam or .bam file")

    # Index the file if no index exists
    if not os.path.isfile(bam_file + ".bai"):
        ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
        print("[ %s ] -----Sorting and indexing..." % (ts))
        sorted_bam = tmp_dir + "/all_reads.sorted.bam"
        pysam.sort("-@", str(n_threads), "-o", sorted_bam, bam_file)
        bam_file = sorted_bam
        pysam.index(bam_file)

    # Open bam file
    tmp_dir += "/chroms"
    os.system("mkdir -p %s" % (tmp_dir))
    read_files = []
    ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
    print("[ %s ] -----Writing chrom files..." % (ts))
    with pysam.AlignmentFile(bam_file, "rb") as bam:
        # Iterate over chromosomes and write a reads file for each
        chromosomes = [ x.contig for x in bam.get_index_statistics() \
                        if x.mapped > 0 ]
        for chrom in chromosomes:
            records = bam.fetch(chrom)
            fname = tmp_dir + "/" + chrom + ".sam"
            with pysam.AlignmentFile(fname, "w", template=bam) as o:
                for record in records:
                    o.write(record)
            read_files.append(fname)

    return read_files
Example #23
0
def make_bam_view(sam_path):
    sam_file = os.path.basename(sam_path)
    sam_base, sam_ext = os.path.splitext(sam_file)
    sam_dir = os.path.dirname(sam_path)
    bam_file = sam_base + '.bam'
    bam_path = os.path.join(sam_dir, bam_file)

    pysam.view('-o', bam_path, '-bh', sam_path, save_stdout=bam_file)
    return bam_path
Example #24
0
def writeSplitBam(two):
    chrom = two[0]
    new_bam = two[1]
    idx = chrs.index(chrom)

    # Split into per-chromosome bam files
    pysam.view(bamname, chrom, '-b', '-o', new_bam, catch_stdout=False)
    pysam.index(new_bam)
    return (chrom)
Example #25
0
def convert_bam_to_sam(in_file):
    if not is_bam(in_file):
        raise ValueError("Non BAM file passed to convert_sam_to_bam: "
                         "%s" % (in_file))
    out_file = replace_suffix(in_file, ".sam")
    if file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tmp_out_file:
        pysam.view("-h", "-o" + tmp_out_file, in_file)
    return out_file
Example #26
0
def compare_contents(file1, file2, ftype="bed"):
    if ftype == "bed":
        with open(file1) as f:
            contents1 = f.readlines()
        with open(file2) as f:
            contents2 = f.readlines()
    else:
        contents1 = pysam.view(file1)
        contents2 = pysam.view(file2)
    return contents1 == contents2
Example #27
0
def bam2sam(in_file):
    """
    converts a bam file to a sam file
    bam2sam("file.bam") -> "file.sam"
    """
    out_file = replace_suffix(in_file, ".sam")
    if file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tmp_out_file:
        pysam.view("-h", "-o" + tmp_out_file, in_file)
    return out_file
Example #28
0
def get_region(infile, ref_name, start, end, outfile):
    '''Writes BAM file of the given region'''
    region = ref_name + ':' + str(start + 1) + '-' + str(end + 1)
    pysam.view('-b',
               '-F',
               '0x4',
               '-o',
               outfile,
               infile,
               region,
               catch_stdout=False)
Example #29
0
def get_region(infile, ref_name, start, end, outfile):
    """Writes BAM file of the given region"""
    region = ref_name + ":" + str(start + 1) + "-" + str(end + 1)
    pysam.view("-b",
               "-F",
               "0x4",
               "-o",
               outfile,
               infile,
               region,
               catch_stdout=False)
Example #30
0
    def slice(self, region=None):
        args = ["-b", "-h", self.filename]
        if region:
            range_string = region['chr'] + ":" + str(
                region['start']) + "-" + str(region['end'])
            args.append(range_string)

        args2 = [self.filename, range_string]
        samview = pysam.view(*args2)

        return pysam.view(*args)
Example #31
0
def bam2sam(in_file):
    """
    converts a bam file to a sam file
    bam2sam("file.bam") -> "file.sam"
    """
    out_file = replace_suffix(in_file, ".sam")
    if file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tmp_out_file:
        pysam.view("-h", "-o" + tmp_out_file, in_file)
    return out_file
Example #32
0
def ngmlral():
    ngmlr_ext_dir = os.path.join(work_dir, 'ngmlr_alignments')
    if not os.path.exists(ngmlr_ext_dir):
        os.makedirs(ngmlr_ext_dir)
    bam_ngmlr_file = os.path.join(ngmlr_ext_dir, (prefix + '.bam'))
    if not os.path.exists(bam_ngmlr_file):
        if stats_trigger in ['y', 'yes']:
            log.debug('[Alignment][ngmlral] - ngmlr -t %s -r %s -q %s -x ont' %
                      (th, ref, fast_Q_file))
            ngmlrline = subprocess.Popen([
                'ngmlr', '-t',
                str(th), '-r',
                str(ref), '-q',
                str(fast_Q_file), '-x ont'
            ],
                                         stdout=subprocess.PIPE)
            PairDict = sam_parser(ngmlrline, ngmlr_ext_dir)
        else:
            sam_nglmr_file = os.path.join(ngmlr_ext_dir, (prefix + '.sam'))
            log.debug(
                '[Alignment][ngmlral] - ngmlr -t %s -r %s -q %s -o %s -x ont' %
                (th, ref, fast_Q_file, sam_ngmlr_file))
            ngmlrline = subprocess.Popen([
                'ngmlr', '-t',
                str(th), '-r',
                str(ref), '-q',
                str(fast_Q_file), '-o',
                str(sam_ngmlr_file), '-x ont'
            ],
                                         stdout=subprocess.PIPE).wait()
            outputfilebam = os.path.join(ngmlr_ext_dir, (prefix + '.tmp.bam'))
            log.debug(
                '[Alignment][ngmlral] - samtools view -Sb -@ %s %s -o %s' %
                (th, sam_nglmr_file, outputfilebam))
            pysam.view("-Sb",
                       "-@%s" % str(th),
                       sam_nglmr_file,
                       "-o%s" % outputfilebam,
                       catch_stdout=False)
            os.remove(sam_nglmr_file)
            pysam.sort(outputfilebam,
                       "-o%s" % bam_ngmlr_file,
                       catch_stdout=False)
            log.debug('[Alignment][ngmlral] - samtools index %s -@%s' %
                      (bam_ngmlr_file, str(th)))
            pysam.index(bam_ngmlr_file, "-@%s" % str(th), catch_stdout=False)
            os.remove(outputfilebam)
    else:
        log.warning('[Alignment][ngmlral] - file %s already exists!' %
                    bam_ngmlr_file)
    try:
        shutil.move(ngmlr_ext_dir, os.path.join(work_dir, out_dir))
    except shutil.Error:
        log.error("Unable to move %s" % ngmlr_ext_dir)
Example #33
0
def filterBam(bam_file, size_min, size_max, bam_filter_file, sample):
    if not Path(bam_file + ".bai").exists(): pysam.index(bam_file)  # index le bam si besoin
    # Création du fichier des readgroups à garder:
    with NamedTemporaryFile(mode='w', delete=False) as fp:
        # with open(Path(bam_filter_file).parent.joinpath("readGroup.txt"), mode='w') as fp:
        # print(fp.name)
        for i in range(int(size_min), int(size_max) + 1, 1):
            print(f"size{i}")
            fp.write(f"size{i}\n")
    # utilisation de pysam view généré un bam avec les reads groups garder au dessus
    pysam.view("-b", "-h", "-R", fp.name, "-o", bam_filter_file, bam_file, catch_stdout=False)
    pysam.index(bam_filter_file)
Example #34
0
def generate_sample_bams(n, filename_prefix, cycles, barcodes, barcode_len=8,
                         gc_pos=0.7, gc_neg=0.3, length=250):
    generate_sample_sams(n, filename_prefix, cycles, barcodes, barcode_len, gc_pos, gc_neg, length)

    for c in cycles:
        for b in barcodes:
            filename = filename_prefix + "{}_{}.bam".format(c, b)
            filename_sam = filename_prefix + "{}_{}.sam".format(c, b)
            # create the file upfront, so pysam can open it
            with open(filename, 'w') as fp:
                pass
            pysam.view("-bS", "-o", filename, filename_sam,  save_stdout=filename)
Example #35
0
 def good_header(self):
   try:#Test file integrity
     header = pysam.view("-H",self.inputFilePath)
     content = pysam.view(self.inputFilePath)
     outFile = open(self.outputFileRoot+".header","w+")
     outFile.write(''.join(header))
     outFile.write(''.join(content))
     outFile.close()
     return True
   except Exception as e:
     print str(e)
     print >> sys.stderr, "Cannot read binary header, please check BAM file.)"
     return False
Example #36
0
def _bam_to_sam(local_name, temp_name):
    temp_local = tempfile.NamedTemporaryFile(suffix='.sam', prefix='local_bam_converted_to_sam_')
    fd, temp_temp = tempfile.mkstemp(suffix='.sam', prefix='history_bam_converted_to_sam_')
    os.close(fd)
    try:
        pysam.view('-h', '-o%s' % temp_local.name, local_name)
    except Exception as e:
        raise Exception("Converting local (test-data) BAM to SAM failed: %s" % e)
    try:
        pysam.view('-h', '-o%s' % temp_temp, temp_name)
    except Exception as e:
        raise Exception("Converting history BAM to SAM failed: %s" % e)
    os.remove(temp_name)
    return temp_local, temp_temp
Example #37
0
def combine_samfiles(multi=False, clipped=False):
	#Seperate out clipped and unclipped!
	#Look at naming!
	if multi:
		sam1 = "unclipped_multimap.sam"
		sam2 = "clipped_multimap.sam"
		bam1 = "unclipped_multimap.bam"
		bam2 = "clipped_multimap.bam"
		out = open("multi_mapped.sam", "w")
	else:
		sam1 = "unclipped_unique.sam"
		sam2 = "clipped_unique.sam"
		bam1 = "unclipped_unique.bam"
		bam2 = "clipped_unique.bam"
		out = open("unique_mapped.sam", "w")
	#Convert unclipped sam to bam

	#Converts sam to bam
	bam1_o = open(bam1, "w")
	a = pysam.view("-bS", sam1)
	for r in a:                                     
		bam1_o.write(r)
	bam1_o.close()
	#Converts clipped sam to bam
	if clipped == True:
		if os.stat(sam2).st_size > 0: #Checking file is not empty
			try:
				bam2_o = open(bam2, "w")
				b = pysam.view("-bS", sam2)
				for r in b:                                     
					bam2_o.write(r)
				bam2_o.close()
			except:
				print "Samtools raised error, will assume Sam file is empty!"
			#Merge clipped and unclipped
			input_filenames = ["-f", bam1, bam2]
			output_filename = "tmp1.bam"
			merge_parameters = [output_filename] + input_filenames
			pysam.merge(*merge_parameters)
			pysam.sort("-n", "tmp1.bam", "tmp2" )
			subprocess.call(["rm", sam2, bam2])
	else:
		#If no clipped bam, just sort 
		pysam.sort("-n", bam1, "tmp2" )
	#Converts file to sam
	d = pysam.view("-h", "tmp2.bam")
	for r in d:                                     
		out.write(r)
	subprocess.call(["rm", "tmp2.bam", "tmp1.bam", sam1, bam1])
def bed_from_sam(samIN, name):
    file = open(name, 'wa')
    pysam.view("-bS", "-o"+name, samIN)
    pysam.sort(name, name)
    Bamname = name+".bam"
    pysam.index(Bamname)
    #delete Sam file
    os.remove(samIN)
    bamfile = pysam.AlignmentFile(Bamname, "rb")
    for read in bamfile.fetch():
        if read.mapq > mapQ:
            #this may eliminate reads that aligned more than once, to cout how may use the XS flag
            line = "%s\t%i\t%i\n" % (str(read).split("\t")[0], read.reference_start, read.reference_end)
            file.write(line)
    file.close()
Example #39
0
def check_inputs(file):
	"""check that input files exist and identify appropriate naming convention for filtering reads by chromosome"""
	try:
		samfile = pysam.Samfile(file, "rb")
	except IOError:
		print 'file not found'
		pass
		
	try:
		pysam.view("-X", file, "chr11:1000-1100")
		head = "chr"
	except NameError:
		head = ""
	
	return samfile, head
Example #40
0
def _get_sort_order(in_bam, config):
    for line in pysam.view("-H", in_bam).split("\r\n"):
        if line.startswith("@HD"):
            for keyval in line.split()[1:]:
                key, val = keyval.split(":")
                if key == "SO":
                    return val
Example #41
0
    def test_bam_extract_01(self):
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()

        input_file = TEST_DIR + "test_terg_02.bam"
        output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
        output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"
        test_file = TEST_DIR + "test_terg_02.filtered.sam"

        # c = BAMExtract(input_file)
        # c.extract("chr21:39000000-40000000", "chr5:1-2", output_file)
        command = ["bin/dr-disco",
                   "bam-extract",
                   "chr21:39000000-40000000",
                   "chr5:1-2",
                   output_file,
                   input_file]

        self.assertEqual(subprocess.call(command), 0)

        # Bam2Sam
        fhq = open(output_file_s, "w")
        fhq.write(pysam.view(output_file))
        fhq.close()

        if not filecmp.cmp(output_file_s, test_file):
            print 'diff \'' + output_file_s + '\' \'' + test_file + '\''

        self.assertTrue(filecmp.cmp(output_file_s, test_file))
    def test_01(self):
        basename = 'multilength_fragments_per_position_001'

        if not os.path.exists('tmp/' + basename + '.bam'):
            fhq = open('tmp/' + basename + '.bam', "wb")
            fhq.write(pysam.view('-bS', 'tests/data/' + basename + ".sam"))
            fhq.close()

        args = CLI(['tmp/' + basename + '.bam', '--verbose'])

        args.parameters.left_padding = 0
        args.parameters.right_padding = 0

        flaimapper = FlaiMapper(args)
        i = 0
        for region in flaimapper.regions():
            self.assertEqual(region.region[0], 'SNORD78')
            for result in region:
                if i == 0:
                    self.assertEqual(region.region[1] + result.start, 11)
                    self.assertEqual(region.region[1] + result.stop, 11 + 61)
                elif i == 1:
                    self.assertEqual(region.region[1] + result.start, 44)
                    self.assertEqual(region.region[1] + result.stop, 44 + 28)
                i += 1
        self.assertEqual(i, 2)
Example #43
0
def create_bam(filename):
    """
    Function that create a BAM file from a SAM file.

    Args :
        filename [STR] = SAM filename

    Returns:
        bamfile [STR] = BAM filename
    """
    # name of the bam file to create
    bamfile = os.path.dirname(filename)[:-3] + "bam/" + os.path.basename(filename)[:-3] + "bam" 
    # convert sam to bam using pysam
    pysam.view('-Sb',filename, '-o', bamfile, catch_stdout=False)

    return bamfile
Example #44
0
def convert_sam_to_bam():
    """
    This method should take a newly create .sam file from alignment and
        - convert it to .bam
        - sort .bam
        - index .bam
    """
    ids = generate_ids()
    for id in ids:
        start_time = time()
        print 'converting: %s'%id
        base_path = os.path.join(SAMPLE_DIR, id)
        sam_path = os.path.join(base_path, id+'-bwape.sam')
        bam_path = os.path.join(base_path, id+'-bwape.bam')

        bam_content = pysam.view('-bS', sam_path)
        bam_file = open(bam_path, 'w+')
        bam_file.writelines(bam_content)
        bam_file.close()

        pysam.sort(bam_path, bam_path+'_sorted')
        pysam.index(bam_path+'_sorted.bam')

        # indexing creates file.bam.bam. Move it to file.bam
        bam_call = "mv {0} {1}".format(bam_path+'_sorted.bam', bam_path)
        index_call = "mv {0} {1}".format(bam_path+'_sorted.bam.bai',
                                         bam_path+'.bam.bai')
        subprocess.call(bam_call, shell=True)
        subprocess.call(index_call, shell=True)
        end_time = time()
        print 'completed: %.3fs'%(end_time-start_time)
Example #45
0
def convert_bam_bed(bam, name, paired, outdir):
	count = 0
	print "==> Converting bam to bed...\n"
#	if aligner=="T":
	outbam = open("{}/{}.unique.bam".format(outdir, name), "wb")
	filtered_bam = pysam.view( "-bq 50", bam) ##Filters for uniquely aligned reads!
	for read in filtered_bam:
		count += 1 
		outbam.write(read)

	inbam = pybedtools.BedTool("{}/{}.unique.bam".format(outdir, name))
	bed = inbam.bam_to_bed(split=True)
	bed.saveas("{}/{}.BED".format(outdir, name))
	#STAR conversion
	#elif aligner=="S":
#		samfile = pysam.Samfile(name+".bam", "rb")
#		for alignedread in samfile.fetch():
#			count += 1
#		samfile.close()
#		inbam = pybedtools.BedTool(name+".bam")
#		bed = inbam.bam_to_bed(split=True)
#		bed.saveas(name+".BED")
	if paired:
		count /= 2
	return count
Example #46
0
def main(BAM):
#   retreive the region from filename
    geo=BAM.split('.')[4]
#   create two pipe files.
    bampipe =  BAM.rsplit('.',1)[0] + '.pipe'
    bampipetohadoop =  BAM.rsplit('.',1)[0] + '.hadooppipe'
    os.mkfifo(bampipe)
    os.mkfifo(bampipetohadoop)
#   Start 2 subprocesses, one to download file fron swift and one to pipe filterd data to hdfs
    command = 'swift download GenomeData ' + BAM + ' -o -  > '  +  bampipe
    p = subprocess.Popen(command, shell=True)
    command2 = '/usr/local/hadoop/bin/hadoop fs -put -f  - < ' + bampipetohadoop +  '  /genome/' + BAM
    p2 = subprocess.Popen(command2, shell=True)
#   open the hadoop pipe
    f=open(bampipetohadoop,'w')
#   read the swift pipe
    rows = pysam.view('-B', bampipe)
#   call the filter function
    for r in rows:
        fline(r,f,geo)
    f.close()
#   remove pipe files.
    os.remove(bampipe)
    os.remove(bampipetohadoop)
#   create and empty file so that the job can be restarted without processing all files again.
    open( BAM,  'w').close()
Example #47
0
	def sam_to_bam(self,samfile,bamfile):
		''' samtools view -bS '''
		bamout = pysam.view('-bS',samfile)
		
	 	with open(bamfile,'w') as handle:
	 		handle.write("".join(bamout))
	 		
	 	return
Example #48
0
def only_mapped(filename):
    """
    Function that keep only mapped reads in the BAM file

    Args:
        filename [STR] = BAM file, containing all alignments

    Returns:
        mappedfile [STR] = BAM file with only the mapped reads
    """
    # new bamfile name
    mappedfile = '{0}/filtered_{1}'.format(os.path.dirname(filename),
                                       os.path.basename(filename)[7:])
    # get only mapped reads
    pysam.view('-b', '-F', '4', filename, '-o', mappedfile, catch_stdout=False)
    
    return mappedfile
Example #49
0
def convert_bam_bed(name, paired):
	count = 0
	filtered_bam = pysam.view( "-bq 50", name+".bam") ##Filters for uniquely aligned reads!
	for read in filtered_bam:
		count += 1 
	if paired:
		count /= 2
	return count
 def testIterate(self):
     '''compare results from iterator with those from samtools.'''
     ps = list(self.samfile.fetch())
     sa = list(pysam.view( "ex1.bam", raw = True) )
     self.assertEqual( len(ps), len(sa), "unequal number of results: %i != %i" % (len(ps), len(sa) ))
     # check if the same reads are returned
     for line, pair in enumerate( zip( ps, sa ) ):
         data = pair[1].split("\t")
         self.assertEqual( pair[0].qname, data[0], "read id mismatch in line %i: %s != %s" % (line, pair[0].rname, data[0]) )
Example #51
0
    def testReturnValueData(self):
        args = "-O BAM {}".format(os.path.join(DATADIR, "ex1.bam")).split(" ")
        retval = pysam.view(*args)

        if IS_PYTHON3:
            self.assertTrue(isinstance(retval, bytes))
            self.assertFalse(isinstance(retval, str))
        else:
            self.assertTrue(isinstance(retval, bytes))
            self.assertTrue(isinstance(retval, basestring))
Example #52
0
	def testInput(self):
		'''Test the input file format, modify self.filepath and return bool
		Status: True:file is ready to use; False: wrong file, program stop
		'''
		#test if file has header
		try:
			self.header = pysam.view("-H",self.filepath)
		except:
			try:
				self.header = pysam.view("-SH",self.filepath)
			except:
				logging.error("Input file does not have header, please check your file. Program quit")
				return (False,"None")
		#Header test passed, test if it is BAM
		try:
			infile = gzip.open(self.filepath)
			infile.readline(10)
		except:#cannot read line, should be sam
			logging.info("Input is SAM, converting to BAM...")
			bamout = ".".join(self.filepath.split(".")[0:-1])+"."+"bam"
			infile = pysam.Samfile(self.filepath,"r",header=self.header)
			#print >> sys.stderr,pysam.view("-SH",infile)
			outfile = pysam.Samfile(bamout,"wb",template=infile)
			for i in infile.fetch():
				outfile.write(i)
			self.filepath = bamout
		#Now the infile is BAM,check if it is sorted
		if Utils.is_sorted(self.header):
			pysam.index(self.filepath)
			return True
		else:#sort the BAM
			logging.info("Input is not sorted, sorting file...")
			bamsort = ".".join(self.filepath.split(".")[0:-1])+"."+"sort"
			pysam.sort(self.filepath,bamsort)
			pysam.index(bamsort+".bam")
			self.filepath = bamsort+".bam" # change input file path
			self.header = pysam.view("-H",bamsort+".bam")
			logging.info("Input file sorted")
			#if Utils.is_sorted(self.header):
			#	print >> sys.stderr, "The file is sorted"
			return True
 def sam_to_bam(self, sam_path, bam_path_prefix):
     if self._sam_file_is_empty(sam_path) is True:
         # pysam will generate an error if an emtpy SAM file will
         # be converted. Due to this an empty bam file with the
         # same header information will be generated from scratch
         self._generate_empty_bam_file(sam_path, bam_path_prefix)
         # Remove SAM file
         os.remove(sam_path)
         return
     temp_unsorted_bam_path = self._temp_unsorted_bam_path(
         bam_path_prefix)
     # Generate unsorted BAM file
     pysam.view("-Sb", "-o%s" % temp_unsorted_bam_path, sam_path)
     # Generate sorted BAM file
     pysam.sort(temp_unsorted_bam_path, "-o", bam_path_prefix + ".bam")
     # Generate index for BAM file
     pysam.index("%s.bam" % bam_path_prefix)
     # Remove unsorted BAM file
     os.remove(temp_unsorted_bam_path)
     # Remove SAM file
     os.remove(sam_path)
Example #54
0
def SAM_to_BAM(samfile_name, bamfile_name):
    '''Converts a SAM file into an ordered and indexed BAM file.'''
    unsortedbamfile_name = samfile_name[:-4] + "_unsorted.bam"

    bamfile = open(unsortedbamfile_name, "wb")
    bamfile.write(pysam.view("-b", "-S", samfile_name))
    bamfile.close()

    if bamfile_name.endswith(".bam"):
        bamfile_name = bamfile_name[:-4]
    pysam.sort(unsortedbamfile_name, bamfile_name)
    pysam.index(bamfile_name + ".bam")
Example #55
0
    def test_02_e(self):
        input_file = TEST_DIR + "test_terg_02.bam"
        output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
        output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"

        c = BAMExtract(input_file, False)
        c.extract("chr12:151000000-153000000", "chr5:1-2", output_file)

        # Bam2Sam
        fhq = open(output_file_s, "w")
        fhq.write(pysam.view(output_file))
        fhq.close()

        with open(output_file_s, "r") as fh:
            self.assertEqual(fh.read(), "")  # empty file check
Example #56
0
    def test_02_b(self):
        input_file = TEST_DIR + "test_terg_02.bam"
        output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
        output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"
        test_file = TEST_DIR + "test_terg_02.filtered.sam"

        c = BAMExtract(input_file, False)
        c.extract("chr5:1-2", "chr21:39000000-40000000", output_file)

        # Bam2Sam
        fhq = open(output_file_s, "w")
        fhq.write(pysam.view(output_file))
        fhq.close()

        if not filecmp.cmp(output_file_s, test_file):
            print 'diff \'' + output_file_s + '\' \'' + test_file + '\''

        self.assertTrue(filecmp.cmp(output_file_s, test_file))
    def test_03(self):
        if not os.path.exists("tmp"):
            os.mkdir("tmp")

        input_file = "tests/fix-chimeric/test_terg_03.filtered.bam"
        test_file = "tests/fix-chimeric/test_terg_03.filtered.fixed.sam"

        output_file = "tmp/test_terg_03.filtered.fixed.bam"
        output_file_s = "tmp/test_terg_03.filtered.fixed.sam"

        alignment_handle = ChimericAlignment(input_file)
        alignment_handle.convert(output_file, "tmp")

        # Bam2Sam
        fhq = open(output_file_s, "w")
        fhq.write(pysam.view(output_file))
        fhq.close()

        self.assertTrue(filecmp.cmp(test_file, output_file_s), msg="diff '" + test_file + "' '" + output_file_s + "':\n" + subprocess.Popen(['diff', test_file, output_file_s], stdout=subprocess.PIPE).stdout.read())