def test_readgroup_to_sam(): 'It test that we can add the readgroup the and the header to a bam' sam_value = '''@SQ SN:SGN-U576692 LN:1714 @SQ SN:SGN-U572743 LN:833 SGN-E221403 0 SGN-U576692 1416 207 168M * 0 0 AGCCTGATAAAGGTCTGCCTACGTGTTTTAAGTGGAATCCGTTTCCCCATGTCCAAACCTTCTAAATAGTTTTTTGTGTTAGTTCTTGTATGCCACATACAAAAATTAACAAACTCTTTTGCCACATATGTTCCAGCACGTCAAAGCAACATGTATTTGAGCTACTTT 558<///035EB@;550300094>>FBF>>88>BBB200>@FFMMMJJ@@755225889>0..14444::FMF@@764444448@;;84444<//,4,.,<<QFBB;::/,,,.69FBB>9:2/.409;@@>88.7,//55;BDK@11,,093777777884241<:7 AS:i:160 XS:i:0 XF:i:3 XE:i:4 XN:i:0 SGN-E221664 0 SGN-U572743 317 226 254M24S * 0 0 GGATGATCTTAGAGCTGCCATTCAAAAGATGTTAGACACTCCTGGGCCATACTTGTTGGATGTGATTGTACCTCATCAGGAGCATGTTCTACCGATGATTCCCAGTGGCGGTGCTTTCAAAAATGTGATTACGGAGGGTGATGGGAGACGTTCCTATTGACTTTGAGAAGCTACATAACTAGTTCAAGGCATTGTATTATCTAAAATAAACTTAATATTTATGTTTACTTAAAAGTTTTTCATTGTGTGAAGGAAAAAAAAAAAAAAAAAAAAAAAAA 999@7<22-2***-,206433>:?9<,,,66:>00066=??EEAAA?B200002<<@@@=DB99777864:..0::@833099???<@488>></...<:B<<.,,8881288@BBDDBD885@@;;9:/9.,,,99B99233885558=?DKKKDDAA??DKBB=440/0<8?DEDFBB??6@152@@FBMFIIDDDDDDKKKOK@@@@DD:N688BBDDDBBBKKDEDDBN977?<9<111:<??==BKMPKKBB==99>QQYYYYYYYYYYYYQQ AS:i:250 XS:i:0 XF:i:0 XE:i:7 XN:i:0 ''' insam = NamedTemporaryFile(prefix='lb_group1.', suffix='.sam') insam.write(sam_value) outsam = NamedTemporaryFile() add_header_and_tags_to_sam(insam, outsam) out_content = open(outsam.name).read() assert 'RG:Z:group1' in out_content assert 'SM:group1' in out_content insam = NamedTemporaryFile(prefix='sm_sample1.lb_group1.', suffix='.sam') insam.write(sam_value) outsam = NamedTemporaryFile() add_header_and_tags_to_sam(insam, outsam) out_content = open(outsam.name).read() #print out_content assert 'RG:Z:sample1_group1' in out_content assert 'SM:sample1' in out_content
def add_header_and_tags_bams(work_dir, output_dir): 'it adds readgroupto bams and return added reaadgroups' #add to each of the bams the readgroup_tag for bam in os.listdir(work_dir): if bam.endswith('.bam'): #get the readgroup from the name: prefix = ".".join(bam.split('.')[:-1]) sam = open(os.path.join(output_dir, prefix + '.sam'), 'w') temp_sam = NamedTemporaryFile(prefix='%s.' % prefix , suffix='.sam') bam2sam(os.path.join(work_dir, bam), temp_sam.name) add_header_and_tags_to_sam(temp_sam, sam) # close and remove temporal stuff sam.close() temp_sam.close()
def run(self): '''It runs the analysis.''' self._log({'analysis_started':True}) settings = self._project_settings project_path = settings['General_settings']['project_path'] tmp_dir = settings['General_settings']['tmpdir'] inputs = self._get_input_fpaths() bam_paths = inputs['bams'] reference_path = inputs['reference'] output_dir = self._create_output_dirs()['result'] merged_bam_path = VersionedPath(os.path.join(output_dir, BACKBONE_BASENAMES['merged_bam'])) merged_bam_fpath = merged_bam_path.next_version #Do we have to add the default qualities to the sam file? #do we have characters different from ACTGN? add_qualities = settings['Sam_processing']['add_default_qualities'] #memory for the java programs java_mem = settings['Other_settings']['java_memory'] picard_path = settings['Other_settings']['picard_path'] if add_qualities: default_sanger_quality = settings['Other_settings']['default_sanger_quality'] default_sanger_quality = int(default_sanger_quality) else: default_sanger_quality = None temp_dir = NamedTemporaryDir() for bam_path in bam_paths: bam_basename = bam_path.basename temp_sam = NamedTemporaryFile(prefix='%s.' % bam_basename, suffix='.sam') sam_fpath = os.path.join(temp_dir.name, bam_basename + '.sam') bam2sam(bam_path.last_version, temp_sam.name) sam_fhand = open(sam_fpath, 'w') # First we need to create the sam with added tags and headers add_header_and_tags_to_sam(temp_sam, sam_fhand) temp_sam.close() sam_fhand.close() #the standardization temp_sam2 = NamedTemporaryFile(prefix='%s.' % bam_basename, suffix='.sam', delete=False) standardize_sam(open(sam_fhand.name), temp_sam2, default_sanger_quality, add_def_qual=add_qualities, only_std_char=True) temp_sam2.flush() shutil.move(temp_sam2.name, sam_fhand.name) temp_sam2.close() get_sam_fpaths = lambda dir_: [os.path.join(dir_, fname) for fname in os.listdir(dir_) if fname.endswith('.sam')] # Once the headers are ready we are going to merge sams = get_sam_fpaths(temp_dir.name) sams = [open(sam) for sam in sams] temp_sam = NamedTemporaryFile(suffix='.sam') reference_fhand = open(reference_path.last_version) try: merge_sam(sams, temp_sam, reference_fhand) except Exception: if os.path.exists(merged_bam_fpath): os.remove(merged_bam_fpath) raise reference_fhand.close() # close files for sam in sams: sam.close() # Convert sam into a bam,(Temporary) temp_bam = NamedTemporaryFile(suffix='.bam') sam2bam(temp_sam.name, temp_bam.name) # finally we need to order the bam #print 'unsorted.bam', temp_bam.name #raw_input() sort_bam_sam(temp_bam.name, merged_bam_fpath, java_conf={'java_memory':java_mem, 'picard_path':picard_path}, tmp_dir=tmp_dir ) temp_bam.close() temp_sam.close() create_bam_index(merged_bam_fpath) self._log({'analysis_finished':True})