Esempio n. 1
0
    def test_standarize_sam():
        'It test that we can add default qualities to the sanger reads'
        sam_fhand = StringIO('''@SQ\tSN:SGN-U576692\tLN:1714
@SQ\tSN:SGN-U572743\tLN:833
@RG\tID:g1\tLB:g1\tSM:g1\tPL:sanger
@RG\tID:g3\tLB:g3\tSM:g3\tPL:sanger
SGN-E200000\t64\tSGN-U572743\t317\t226\t254M24S\t*\t0\t0\tGGATGATKTTAGAG\t*\tAS:i:250\tXS:i:0\tXF:i:0\tXE:i:7\tXN:i:0\tRG:Z:g1
SGN-E40000\t0\tSGN-U576692\t1416\t207\t168M\t*\t0\t0\tAGCCTGATAA\t,,09377777\tAS:i:160\tXS:i:0\tXF:i:3\tXE:i:4\tXN:i:0\tRG:Z:g3
SGN-E40000\t20\tSGN-U576692\t1416\t207\t168M\t*\t0\t0\tAGCCTGATAA\t,,09377777\tAS:i:160\tXS:i:0\tXF:i:3\tXE:i:4\tXN:i:0\tRG:Z:g3
''')
        out_fhand = StringIO()
        standardize_sam(sam_fhand, out_fhand, 20, add_def_qual=True)
        lines = out_fhand.getvalue().splitlines()
        assert 'GGATGATNTTAGAG\t55555555555555\t' in lines[4]
        assert lines[6].startswith('SGN-E40000\t20\t*\t0\t0\t*\t*\t0\t0\t')
        assert lines[4].startswith('SGN-E200000\t0')
Esempio n. 2
0
    def run(self):
        '''It runs the analysis.'''
        self._log({'analysis_started':True})
        settings = self._project_settings
        project_path = settings['General_settings']['project_path']
        tmp_dir      = settings['General_settings']['tmpdir']

        inputs = self._get_input_fpaths()
        bam_paths = inputs['bams']
        reference_path = inputs['reference']

        output_dir = self._create_output_dirs()['result']
        merged_bam_path = VersionedPath(os.path.join(output_dir,
                                        BACKBONE_BASENAMES['merged_bam']))

        merged_bam_fpath = merged_bam_path.next_version

        #Do we have to add the default qualities to the sam file?
        #do we have characters different from ACTGN?
        add_qualities = settings['Sam_processing']['add_default_qualities']
        #memory for the java programs
        java_mem = settings['Other_settings']['java_memory']
        picard_path = settings['Other_settings']['picard_path']

        if add_qualities:
            default_sanger_quality = settings['Other_settings']['default_sanger_quality']
            default_sanger_quality = int(default_sanger_quality)
        else:
            default_sanger_quality = None

        temp_dir = NamedTemporaryDir()
        for bam_path in bam_paths:
            bam_basename = bam_path.basename
            temp_sam = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                          suffix='.sam')
            sam_fpath = os.path.join(temp_dir.name, bam_basename + '.sam')
            bam2sam(bam_path.last_version, temp_sam.name)
            sam_fhand = open(sam_fpath, 'w')
            # First we need to create the sam with added tags and headers
            add_header_and_tags_to_sam(temp_sam, sam_fhand)
            temp_sam.close()
            sam_fhand.close()
            #the standardization
            temp_sam2 = NamedTemporaryFile(prefix='%s.' % bam_basename,
                                           suffix='.sam', delete=False)
            standardize_sam(open(sam_fhand.name), temp_sam2,
                            default_sanger_quality,
                            add_def_qual=add_qualities,
                            only_std_char=True)
            temp_sam2.flush()
            shutil.move(temp_sam2.name, sam_fhand.name)

            temp_sam2.close()

        get_sam_fpaths = lambda dir_: [os.path.join(dir_, fname) for fname in os.listdir(dir_) if fname.endswith('.sam')]

        # Once the headers are ready we are going to merge
        sams = get_sam_fpaths(temp_dir.name)
        sams = [open(sam) for sam in sams]

        temp_sam = NamedTemporaryFile(suffix='.sam')
        reference_fhand = open(reference_path.last_version)
        try:
            merge_sam(sams, temp_sam, reference_fhand)
        except Exception:
            if os.path.exists(merged_bam_fpath):
                os.remove(merged_bam_fpath)
            raise
        reference_fhand.close()

        # close files
        for sam in sams:
            sam.close()
        # Convert sam into a bam,(Temporary)
        temp_bam = NamedTemporaryFile(suffix='.bam')
        sam2bam(temp_sam.name, temp_bam.name)

        # finally we need to order the bam
        #print 'unsorted.bam', temp_bam.name
        #raw_input()
        sort_bam_sam(temp_bam.name, merged_bam_fpath,
                     java_conf={'java_memory':java_mem,
                                'picard_path':picard_path}, tmp_dir=tmp_dir )
        temp_bam.close()
        temp_sam.close()
        create_bam_index(merged_bam_fpath)

        self._log({'analysis_finished':True})