def	test_readgroup_to_sam():
        'It test that we can add the readgroup the and the header to a bam'
        sam_value	=	'''@SQ	SN:SGN-U576692	LN:1714
@SQ	SN:SGN-U572743	LN:833
SGN-E221403	0	SGN-U576692	1416	207	168M	*	0	0	AGCCTGATAAAGGTCTGCCTACGTGTTTTAAGTGGAATCCGTTTCCCCATGTCCAAACCTTCTAAATAGTTTTTTGTGTTAGTTCTTGTATGCCACATACAAAAATTAACAAACTCTTTTGCCACATATGTTCCAGCACGTCAAAGCAACATGTATTTGAGCTACTTT	558<///035EB@;550300094>>FBF>>88>BBB200>@FFMMMJJ@@755225889>0..14444::FMF@@764444448@;;84444<//,4,.,<<QFBB;::/,,,.69FBB>9:2/.409;@@>88.7,//55;BDK@11,,093777777884241<:7	AS:i:160	XS:i:0	XF:i:3	XE:i:4	XN:i:0
SGN-E221664	0	SGN-U572743	317	226	254M24S	*	0	0	GGATGATCTTAGAGCTGCCATTCAAAAGATGTTAGACACTCCTGGGCCATACTTGTTGGATGTGATTGTACCTCATCAGGAGCATGTTCTACCGATGATTCCCAGTGGCGGTGCTTTCAAAAATGTGATTACGGAGGGTGATGGGAGACGTTCCTATTGACTTTGAGAAGCTACATAACTAGTTCAAGGCATTGTATTATCTAAAATAAACTTAATATTTATGTTTACTTAAAAGTTTTTCATTGTGTGAAGGAAAAAAAAAAAAAAAAAAAAAAAAA	999@7<22-2***-,206433>:?9<,,,66:>00066=??EEAAA?B200002<<@@@=DB99777864:..0::@833099???<@488>></...<:B<<.,,8881288@BBDDBD885@@;;9:/9.,,,99B99233885558=?DKKKDDAA??DKBB=440/0<8?DEDFBB??6@152@@FBMFIIDDDDDDKKKOK@@@@DD:N688BBDDDBBBKKDEDDBN977?<9<111:<??==BKMPKKBB==99>QQYYYYYYYYYYYYQQ	AS:i:250	XS:i:0	XF:i:0	XE:i:7	XN:i:0
'''

        insam = NamedTemporaryFile(prefix='lb_group1.', suffix='.sam')
        insam.write(sam_value)

        outsam = NamedTemporaryFile()
        add_header_and_tags_to_sam(insam,	outsam)

        out_content	=	open(outsam.name).read()
        assert 'RG:Z:group1' in out_content
        assert 'SM:group1' in out_content

        insam = NamedTemporaryFile(prefix='sm_sample1.lb_group1.', suffix='.sam')
        insam.write(sam_value)

        outsam = NamedTemporaryFile()
        add_header_and_tags_to_sam(insam,    outsam)

        out_content = open(outsam.name).read()
        #print out_content
        assert 'RG:Z:sample1_group1' in out_content
        assert 'SM:sample1' in out_content
def add_header_and_tags_bams(work_dir, output_dir):
    'it adds readgroupto bams and return added reaadgroups'
    #add to each of the bams the readgroup_tag
    for bam in os.listdir(work_dir):
        if bam.endswith('.bam'):
            #get the readgroup from the name:
            prefix = ".".join(bam.split('.')[:-1])
            sam = open(os.path.join(output_dir, prefix + '.sam'), 'w')

            temp_sam = NamedTemporaryFile(prefix='%s.' % prefix , suffix='.sam')

            bam2sam(os.path.join(work_dir, bam), temp_sam.name)

            add_header_and_tags_to_sam(temp_sam, sam)

            # close and remove temporal stuff
            sam.close()
            temp_sam.close()
Example #3
0
    def run(self):
        '''It runs the analysis.'''
        self._log({'analysis_started':True})
        settings = self._project_settings
        project_path = settings['General_settings']['project_path']
        tmp_dir      = settings['General_settings']['tmpdir']

        inputs = self._get_input_fpaths()
        bam_paths = inputs['bams']
        reference_path = inputs['reference']

        output_dir = self._create_output_dirs()['result']
        merged_bam_path = VersionedPath(os.path.join(output_dir,
                                        BACKBONE_BASENAMES['merged_bam']))

        merged_bam_fpath = merged_bam_path.next_version

        #Do we have to add the default qualities to the sam file?
        #do we have characters different from ACTGN?
        add_qualities = settings['Sam_processing']['add_default_qualities']
        #memory for the java programs
        java_mem = settings['Other_settings']['java_memory']
        picard_path = settings['Other_settings']['picard_path']

        if add_qualities:
            default_sanger_quality = settings['Other_settings']['default_sanger_quality']
            default_sanger_quality = int(default_sanger_quality)
        else:
            default_sanger_quality = None

        temp_dir = NamedTemporaryDir()
        for bam_path in bam_paths:
            bam_basename = bam_path.basename
            temp_sam = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                          suffix='.sam')
            sam_fpath = os.path.join(temp_dir.name, bam_basename + '.sam')
            bam2sam(bam_path.last_version, temp_sam.name)
            sam_fhand = open(sam_fpath, 'w')
            # First we need to create the sam with added tags and headers
            add_header_and_tags_to_sam(temp_sam, sam_fhand)
            temp_sam.close()
            sam_fhand.close()
            #the standardization
            temp_sam2 = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                           suffix='.sam', delete=False)
            standardize_sam(open(sam_fhand.name), temp_sam2,
                            default_sanger_quality,
                            add_def_qual=add_qualities,
                            only_std_char=True)
            temp_sam2.flush()
            shutil.move(temp_sam2.name, sam_fhand.name)

            temp_sam2.close()

        get_sam_fpaths = lambda dir_: [os.path.join(dir_, fname) for fname in os.listdir(dir_) if fname.endswith('.sam')]

        # Once the headers are ready we are going to merge
        sams = get_sam_fpaths(temp_dir.name)
        sams = [open(sam) for sam in sams]

        temp_sam = NamedTemporaryFile(suffix='.sam')
        reference_fhand = open(reference_path.last_version)
        try:
            merge_sam(sams, temp_sam, reference_fhand)
        except Exception:
            if os.path.exists(merged_bam_fpath):
                os.remove(merged_bam_fpath)
            raise
        reference_fhand.close()

        # close files
        for sam in sams:
            sam.close()
        # Convert sam into a bam,(Temporary)
        temp_bam = NamedTemporaryFile(suffix='.bam')
        sam2bam(temp_sam.name, temp_bam.name)

        # finally we need to order the bam
        #print 'unsorted.bam', temp_bam.name
        #raw_input()
        sort_bam_sam(temp_bam.name, merged_bam_fpath,
                     java_conf={'java_memory':java_mem,
                                'picard_path':picard_path}, tmp_dir=tmp_dir )
        temp_bam.close()
        temp_sam.close()
        create_bam_index(merged_bam_fpath)

        self._log({'analysis_finished':True})