def test_standarize_sam(): 'It test that we can add default qualities to the sanger reads' sam_fhand = StringIO('''@SQ\tSN:SGN-U576692\tLN:1714 @SQ\tSN:SGN-U572743\tLN:833 @RG\tID:g1\tLB:g1\tSM:g1\tPL:sanger @RG\tID:g3\tLB:g3\tSM:g3\tPL:sanger SGN-E200000\t64\tSGN-U572743\t317\t226\t254M24S\t*\t0\t0\tGGATGATKTTAGAG\t*\tAS:i:250\tXS:i:0\tXF:i:0\tXE:i:7\tXN:i:0\tRG:Z:g1 SGN-E40000\t0\tSGN-U576692\t1416\t207\t168M\t*\t0\t0\tAGCCTGATAA\t,,09377777\tAS:i:160\tXS:i:0\tXF:i:3\tXE:i:4\tXN:i:0\tRG:Z:g3 SGN-E40000\t20\tSGN-U576692\t1416\t207\t168M\t*\t0\t0\tAGCCTGATAA\t,,09377777\tAS:i:160\tXS:i:0\tXF:i:3\tXE:i:4\tXN:i:0\tRG:Z:g3 ''') out_fhand = StringIO() standardize_sam(sam_fhand, out_fhand, 20, add_def_qual=True) lines = out_fhand.getvalue().splitlines() assert 'GGATGATNTTAGAG\t55555555555555\t' in lines[4] assert lines[6].startswith('SGN-E40000\t20\t*\t0\t0\t*\t*\t0\t0\t') assert lines[4].startswith('SGN-E200000\t0')
def run(self): '''It runs the analysis.''' self._log({'analysis_started':True}) settings = self._project_settings project_path = settings['General_settings']['project_path'] tmp_dir = settings['General_settings']['tmpdir'] inputs = self._get_input_fpaths() bam_paths = inputs['bams'] reference_path = inputs['reference'] output_dir = self._create_output_dirs()['result'] merged_bam_path = VersionedPath(os.path.join(output_dir, BACKBONE_BASENAMES['merged_bam'])) merged_bam_fpath = merged_bam_path.next_version #Do we have to add the default qualities to the sam file? #do we have characters different from ACTGN? add_qualities = settings['Sam_processing']['add_default_qualities'] #memory for the java programs java_mem = settings['Other_settings']['java_memory'] picard_path = settings['Other_settings']['picard_path'] if add_qualities: default_sanger_quality = settings['Other_settings']['default_sanger_quality'] default_sanger_quality = int(default_sanger_quality) else: default_sanger_quality = None temp_dir = NamedTemporaryDir() for bam_path in bam_paths: bam_basename = bam_path.basename temp_sam = NamedTemporaryFile(prefix='%s.' % bam_basename, suffix='.sam') sam_fpath = os.path.join(temp_dir.name, bam_basename + '.sam') bam2sam(bam_path.last_version, temp_sam.name) sam_fhand = open(sam_fpath, 'w') # First we need to create the sam with added tags and headers add_header_and_tags_to_sam(temp_sam, sam_fhand) temp_sam.close() sam_fhand.close() #the standardization temp_sam2 = NamedTemporaryFile(prefix='%s.' % bam_basename, suffix='.sam', delete=False) standardize_sam(open(sam_fhand.name), temp_sam2, default_sanger_quality, add_def_qual=add_qualities, only_std_char=True) temp_sam2.flush() shutil.move(temp_sam2.name, sam_fhand.name) temp_sam2.close() get_sam_fpaths = lambda dir_: [os.path.join(dir_, fname) for fname in os.listdir(dir_) if fname.endswith('.sam')] # Once the headers are ready we are going to merge sams = get_sam_fpaths(temp_dir.name) sams = [open(sam) for sam in sams] temp_sam = NamedTemporaryFile(suffix='.sam') reference_fhand = open(reference_path.last_version) try: merge_sam(sams, temp_sam, reference_fhand) except Exception: if os.path.exists(merged_bam_fpath): os.remove(merged_bam_fpath) raise reference_fhand.close() # close files for sam in sams: sam.close() # Convert sam into a bam,(Temporary) temp_bam = NamedTemporaryFile(suffix='.bam') sam2bam(temp_sam.name, temp_bam.name) # finally we need to order the bam #print 'unsorted.bam', temp_bam.name #raw_input() sort_bam_sam(temp_bam.name, merged_bam_fpath, java_conf={'java_memory':java_mem, 'picard_path':picard_path}, tmp_dir=tmp_dir ) temp_bam.close() temp_sam.close() create_bam_index(merged_bam_fpath) self._log({'analysis_finished':True})