def map_reads_with_gmap(reference_fpath, reads_fpath, out_bam_fpath,
                        parameters):
    'It maps the reads with gmap'
    threads = parameters['threads']
    tmp_dir = parameters['tmp_dir'] if 'tmp_dir' in parameters else None
    reference_dir, reference_file_name = os.path.split(reference_fpath)
    reference_name = reference_file_name.split('.')[0]
    if not reference_dir:
        reference_dir = '.'

    if not os.path.exists(os.path.join(reference_dir, reference_name,
                                       reference_name + '.chromosome')):
        create_gmap_reference(reference_dir, reference_fpath, reference_name,
                              parameters)

    cmd  = ['gmap', '-d', reference_name, '-D', reference_dir, '-f', 'samse']
    # this gmap options doesn' detect deletions close to introns
    cmd.append('--canonical-mode=0')
    if threads:
        cmd.extend(['-t', str(threads)])
    cmd.append(reads_fpath)
    out_sam_fhand = NamedTemporaryFile(suffix='.sam', dir=tmp_dir)
    call(cmd, stdout=out_sam_fhand, raise_on_error=True)
    if 'unmapped_fhand' in parameters and parameters['unmapped_fhand'] is not None:
        out_sam_fhand2 = NamedTemporaryFile(dir=tmp_dir, suffix='.sam')
        get_out_unmapped(out_sam_fhand, parameters['unmapped_fhand'],
                         out_sam_fhand2)
        out_sam_fhand = out_sam_fhand2

    sam2bam(out_sam_fhand.name, out_bam_fpath)
    out_sam_fhand.close()
def main():
    'The script itself'
    #set parameters
    work_dir, output, reference = set_parameters()

    # make a working tempfir
    temp_dir = NamedTemporaryDir()

    # add readgroup tag to each alignment in bam
    add_header_and_tags_bams(work_dir, temp_dir.name)

    # Prepare files to merge
    sams = get_opened_sams_from_dir(temp_dir.name)
    temp_sam = NamedTemporaryFile()

    # merge all the sam in one
    merge_sam(sams, temp_sam, reference)

    # Convert sam into a bam,(Temporary)
    temp_bam = NamedTemporaryFile(suffix='.bam')
    sam2bam(temp_sam.name, temp_bam.name)

    # finally we need to order the bam
    sort_bam_sam(temp_bam.name, output)

    # and make and index of the bam
    call(['samtools', 'index', output], raise_on_error=True)

    temp_dir.close()
    def test_general_mapping_stats():
        'General mapping statistics'
        sam = NamedTemporaryFile(suffix='.sam')
        sam.write(SAM)
        sam.flush()
        bam_fhand = NamedTemporaryFile()
        sam2bam(sam.name, bam_fhand.name)

        out_fhand = StringIO()

        bam_general_stats(bam_fhand, out_fhand)
        result = out_fhand.getvalue()
        assert 'illumina\t3\t100.0' in result
        assert 'Secondary alignments: 1' in result
        assert 'Reads with one X0 best alignment: 1' in result
        assert 'Total number of reads: 7' in result

        out_fhand = StringIO()
        unmapped_fhand = NamedTemporaryFile()
        unmapped_fhand.write('1\n2\n3\n')
        unmapped_fhand.flush()
        bam_general_stats(bam_fhand, out_fhand, unmapped_fhand)
        result = out_fhand.getvalue()
        assert 'illumina\t3\t100.0' in result
        assert 'Secondary alignments: 1' in result
        assert 'Reads with one X0 best alignment: 1' in result
        assert 'Total number of reads: 10' in result
Example #4
0
def sam_creator(fhand, out_bam_path, out_ref_path, read_repeats=None):
    """it creates a sam using an alignment file. The format of the alignment
    file is:
        ref      aggttttataaaacAAAAaattaagtctacagagcaacta
        sample   aggttttataaaacAAA-aattaagtctacagagcaacta
        read1    aggttttataaaacAA-Aaattaagtctacagagcaacta
        read2    aggttttataaaacA-AAaattaagtctacagagcaacta
        read3    aggttttataaaac-AAAaattaagtctacagagcaacta
    """
    mapq = "250"

    out_sam = NamedTemporaryFile(suffix=".sam")
    header_done = False

    ref_name = "ref"
    if read_repeats is None:
        read_repeats = 1
    count = 0
    for ref, read in _reads_in_alignment(fhand):
        ref_seq = ref.replace("-", "").replace("*", "").strip()
        if not header_done:
            out_sam.write("@SQ\tSN:%s\tLN:%d\n" % (ref_name, len(ref_seq)))
            header_done = True
        cigar = _get_cigar(ref, read)
        pos = _get_alignment_start(read)

        for i in range(read_repeats):
            count += 1
            read_name = "read%d" % count
            flag = "0"
            rnext = "*"
            pnext = "0"
            tlen = "0"
            seq = read.replace("-", "").replace("*", "").strip()
            qual = "=" * len(seq)
            sam_line = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
                read_name,
                flag,
                ref_name,
                pos,
                mapq,
                cigar,
                rnext,
                pnext,
                tlen,
                seq,
                qual,
            )
            out_sam.write(sam_line)

    out_sam.flush()
    sam2bam(out_sam.name, out_bam_path)

    ref_fhand = open(out_ref_path, "w")
    ref_fhand.write(">ref\n%s" % ref_seq)
    ref_fhand.flush()
    def testsam2bam():
        'It test sam2bam function'
        bampath = os.path.join(TEST_DATA_DIR, 'seq.bam')
        sampath = NamedTemporaryFile(suffix='.sam').name
        bam2sam(bampath, sampath, header=True)
        assert 'SN:SGN-U572743' in open(sampath).readline()

        newbam = NamedTemporaryFile(suffix='.bam')
        sam2bam(sampath, newbam.name)
        newsam = NamedTemporaryFile(suffix='.sam')
        bam2sam(newbam.name, newsam.name, header=True)
        newsam_content = open(newsam.name).read()
        oldsam_content = open(sampath).read()

        assert newsam_content == oldsam_content
    def test_sample_bam():
        'it tests sample bam function'
        sam = NamedTemporaryFile(suffix='.sam')
        sam.write(SAM)
        sam.flush()
        bam_fhand = NamedTemporaryFile()
        sam2bam(sam.name, bam_fhand.name)
        bam_fhand.flush()
        out_bam = NamedTemporaryFile(suffix='.bam')
        sample_bam(bam_fhand, out_bam, 2)
        out_sam = NamedTemporaryFile(suffix='.sam')
        bam2sam(out_bam.name, out_sam.name, header=True)

        sam = open(out_sam.name).read().splitlines()
        assert len(sam) == 6
def map_reads_with_bwa(reference_fpath, reads_fpath, bam_fpath, parameters):
    'It maps the reads to the reference using bwa and returns a bam file'
    colorspace   = parameters['colorspace']
    reads_length = parameters['reads_length']
    threads      = parameters['threads']
    java_conf    = parameters['java_conf']
    tmp_dir      = parameters['tmp_dir'] if 'tmp_dir' in parameters else None

    threads = get_num_threads(threads)
    #the reference should have an index
    bwt_fpath = reference_fpath + '.bwt'
    if not os.path.exists(bwt_fpath):
        create_bwa_reference(reference_fpath, color=colorspace)

    output_ali = 'output.ali'
    bam_file_bam = 'bam_file.bam'
    output_sai = 'output.sai'
    if reads_length == 'short':
        cmd = ['bwa', 'aln', reference_fpath, reads_fpath,
               '-t', str(threads)]
        if colorspace:
            cmd.append('-c')
        sai_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_sai, mode='wb')
        call(cmd, stdout=sai_fhand, raise_on_error=True)

        cmd = ['bwa', 'samse', reference_fpath, sai_fhand.name, reads_fpath]
        ali_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_ali, mode='w')
        call(cmd, stdout=ali_fhand, raise_on_error=True)

    elif reads_length == 'long':
        cmd = ['bwa', 'dbwtsw', reference_fpath, reads_fpath,
               '-t', str(threads)]
        ali_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_ali)
        call(cmd, stdout=ali_fhand, raise_on_error=True)
    else:
        raise ValueError('Reads length: short or long')

    if 'unmapped_fhand' in parameters and parameters['unmapped_fhand'] is not None:
        out_ali_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_ali)
        get_out_unmapped(ali_fhand, parameters['unmapped_fhand'], out_ali_fhand)
        ali_fhand = out_ali_fhand
    # From sam to Bam
#    unsorted_bam = os.path.join(temp_dir.name, bam_file_bam)
    unsorted_bam = NamedTemporaryFile(dir=tmp_dir, suffix=bam_file_bam)
    sam2bam(ali_fhand.name, unsorted_bam.name)
    # sort bam file
    sort_bam_sam(unsorted_bam.name, bam_fpath, sort_method='coordinate',
                 java_conf=java_conf, strict_validation=False, tmp_dir=tmp_dir)
    def test_get_read_group_info():
        'Tests get_read_group_info'
        sam_sample = '''@SQ\tSN:SGN-U576692\tLN:1714
@SQ\tSN:SGN-U572743\tLN:833
@RG\tID:g1\tLB:g1\tSM:g1\tPL:sanger
@RG\tID:g3\tLB:g3\tSM:g3\tPL:sanger
SGN-E200000\t0\tSGN-U572743\t317\t226\t14M\t*\t0\t0\tGGATGATKTTAGAG\t*\tAS:i:250\tXS:i:0\tXF:i:0\tXE:i:7\tXN:i:0\tRG:Z:g1
SGN-E40000\t0\tSGN-U576692\t1416\t207\t10M\t*\t0\t0\tAGCCTGATAA\t,,09377777\tAS:i:160\tXS:i:0\tXF:i:3\tXE:i:4\tXN:i:0\tRG:Z:g3
SGN-E40000\t20\tSGN-U576692\t1416\t207\t10M\t*\t0\t0\tAGCCTGATAA\t,,09377777\tAS:i:160\tXS:i:0\tXF:i:3\tXE:i:4\tXN:i:0\tRG:Z:g3
'''
        sam_fhand = NamedTemporaryFile(suffix='.sam')
        sam_fhand.write(sam_sample)
        sam_fhand.flush()
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        sam2bam(sam_fhand.name, bam_fhand.name)
        bam_fhand.flush()
        bam = pysam.Samfile(bam_fhand.name, 'rb')
        read_gro_i = get_read_group_info(bam)
        assert read_gro_i == {'g3': {'LB': 'g3', 'SM': 'g3', 'PL': 'sanger'},
                              'g1': {'LB': 'g1', 'SM': 'g1', 'PL': 'sanger'}}
    def test_remove_unmapped_reads():
        'Tests remove_unmapped_reads'
        sam = NamedTemporaryFile(suffix='.sam')
        sam.write(SAM)
        sam.flush()
        bam_fhand = NamedTemporaryFile()
        sam2bam(sam.name, bam_fhand.name)

        out_bam_fhand = NamedTemporaryFile()
        out_removed_reads_fhand = NamedTemporaryFile()
        remove_unmapped_reads(bam_fhand, out_bam_fhand, out_removed_reads_fhand)
        reads = open(out_removed_reads_fhand.name).read()
        assert '@SGN-E221406' in reads
        assert 'FFMMMJJ@@755225889>0.' in reads

        out_sam = NamedTemporaryFile(suffix='.sam')
        bam2sam(out_bam_fhand.name, out_sam.name, header=True)
        sam_out = open(out_sam.name).read()
        assert 'SGN-U572743' in sam_out
        assert 'SGN-E221403' in sam_out
    def test_bam_distribs():
        'test bam coverage distrib'
        sam = NamedTemporaryFile(suffix='.sam')
        sam.write(SAM)
        sam.flush()
        bam_fhand = NamedTemporaryFile()
        sam2bam(sam.name, bam_fhand.name)

        summary_fhand = StringIO()

        distribs = bam_distribs(bam_fhand, 'coverage',
                                summary_fhand=summary_fhand)
        expected = [2547]
        assert distribs[('platform', '454')]['distrib'] == expected
        assert 'average: 0.13' in  summary_fhand.getvalue()

        distribs = bam_distribs(bam_fhand, 'mapq')
        assert distribs[('platform', '454')]['distrib'][0] == 1

        distribs = bam_distribs(bam_fhand, 'mapq', sample_size=100)
        assert distribs[('platform', '454')]['distrib'][0] == 1

        distribs = bam_distribs(bam_fhand, 'edit_distance')
        assert distribs[('platform', '454')]['distrib'][0] == 1
Example #11
0
    def run(self):
        '''It runs the analysis.'''
        self._log({'analysis_started':True})
        settings = self._project_settings
        project_path = settings['General_settings']['project_path']
        tmp_dir      = settings['General_settings']['tmpdir']

        inputs = self._get_input_fpaths()
        bam_paths = inputs['bams']
        reference_path = inputs['reference']

        output_dir = self._create_output_dirs()['result']
        merged_bam_path = VersionedPath(os.path.join(output_dir,
                                        BACKBONE_BASENAMES['merged_bam']))

        merged_bam_fpath = merged_bam_path.next_version

        #Do we have to add the default qualities to the sam file?
        #do we have characters different from ACTGN?
        add_qualities = settings['Sam_processing']['add_default_qualities']
        #memory for the java programs
        java_mem = settings['Other_settings']['java_memory']
        picard_path = settings['Other_settings']['picard_path']

        if add_qualities:
            default_sanger_quality = settings['Other_settings']['default_sanger_quality']
            default_sanger_quality = int(default_sanger_quality)
        else:
            default_sanger_quality = None

        temp_dir = NamedTemporaryDir()
        for bam_path in bam_paths:
            bam_basename = bam_path.basename
            temp_sam = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                          suffix='.sam')
            sam_fpath = os.path.join(temp_dir.name, bam_basename + '.sam')
            bam2sam(bam_path.last_version, temp_sam.name)
            sam_fhand = open(sam_fpath, 'w')
            # First we need to create the sam with added tags and headers
            add_header_and_tags_to_sam(temp_sam, sam_fhand)
            temp_sam.close()
            sam_fhand.close()
            #the standardization
            temp_sam2 = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                           suffix='.sam', delete=False)
            standardize_sam(open(sam_fhand.name), temp_sam2,
                            default_sanger_quality,
                            add_def_qual=add_qualities,
                            only_std_char=True)
            temp_sam2.flush()
            shutil.move(temp_sam2.name, sam_fhand.name)

            temp_sam2.close()

        get_sam_fpaths = lambda dir_: [os.path.join(dir_, fname) for fname in os.listdir(dir_) if fname.endswith('.sam')]

        # Once the headers are ready we are going to merge
        sams = get_sam_fpaths(temp_dir.name)
        sams = [open(sam) for sam in sams]

        temp_sam = NamedTemporaryFile(suffix='.sam')
        reference_fhand = open(reference_path.last_version)
        try:
            merge_sam(sams, temp_sam, reference_fhand)
        except Exception:
            if os.path.exists(merged_bam_fpath):
                os.remove(merged_bam_fpath)
            raise
        reference_fhand.close()

        # close files
        for sam in sams:
            sam.close()
        # Convert sam into a bam,(Temporary)
        temp_bam = NamedTemporaryFile(suffix='.bam')
        sam2bam(temp_sam.name, temp_bam.name)

        # finally we need to order the bam
        #print 'unsorted.bam', temp_bam.name
        #raw_input()
        sort_bam_sam(temp_bam.name, merged_bam_fpath,
                     java_conf={'java_memory':java_mem,
                                'picard_path':picard_path}, tmp_dir=tmp_dir )
        temp_bam.close()
        temp_sam.close()
        create_bam_index(merged_bam_fpath)

        self._log({'analysis_finished':True})