def test_tag_sort_bam_dash_t_specified_multiple_times(): args = [ "-i", data_dir + "unsorted.bam", "-o", "test_sorted.bam", "-t", consts.CELL_BARCODE_TAG_KEY, "-t", consts.GENE_NAME_TAG_KEY, "-t", consts.MOLECULE_BARCODE_TAG_KEY, ] return_call = platform.GenericPlatform.tag_sort_bam(args) assert return_call == 0 tag_keys = [ consts.CELL_BARCODE_TAG_KEY, consts.GENE_NAME_TAG_KEY, consts.MOLECULE_BARCODE_TAG_KEY, ] with pysam.AlignmentFile("test_sorted.bam", "rb") as f: segments = f.fetch(until_eof=True) tag_sortable_record_generator = ( bam.TagSortableRecord.from_aligned_segment(s, tag_keys) for s in segments) bam.verify_sort(tag_sortable_record_generator, tag_keys) for f in glob.glob("test_sorted*"): os.remove(f)
def test_tag_sort_bam_no_tags(): args = ["-i", data_dir + "unsorted.bam", "-o", "test_sorted.bam"] return_call = platform.GenericPlatform.tag_sort_bam(args) assert return_call == 0 tag_keys = [] with pysam.AlignmentFile("test_sorted.bam", "rb") as f: segments = f.fetch(until_eof=True) tag_sortable_records = (bam.TagSortableRecord.from_aligned_segment( s, tag_keys) for s in segments) bam.verify_sort(tag_sortable_records, tag_keys) for f in glob.glob("test_sorted*"): os.remove(f)
def verify_bam_sort(cls, args: Iterable = None) -> int: """Command line entrypoint for verifying bam is properly sorted by zero or more tags, followed by queryname. Parameters ---------- args : Iterable[str], optional arguments list, for testing (see test/test_entrypoints.py for example). The default value of None, when passed to `parser.parse_args` causes the parser to read `sys.argv` Returns ------- return_call : 0 return call if the program completes successfully """ description = "Verifies whether bam is sorted by the list of zero or more tags, followed by query name" parser = argparse.ArgumentParser(description=description) parser.add_argument("-i", "--input_bam", required=True, help="input bamfile") parser.add_argument( "-t", "--tags", nargs="+", action="append", help= "tag(s) to use to verify sorting, separated by space, e.g. -t CB GE UB", ) if args is not None: args = parser.parse_args(args) else: args = parser.parse_args() tags = cls.get_tags(args.tags) with pysam.AlignmentFile(args.input_bam, "rb") as f: aligned_segments = f.fetch(until_eof=True) sortable_records = (bam.TagSortableRecord.from_aligned_segment( r, tags) for r in aligned_segments) bam.verify_sort(sortable_records, tags) print("{0} is correctly sorted by {1} and query name".format( args.input_bam, tags)) return 0
def test_tag_sort_bam(): args = [ '-i', data_dir + 'unsorted.bam', '-o', 'test_sorted.bam', '-t', consts.CELL_BARCODE_TAG_KEY, consts.GENE_NAME_TAG_KEY, consts.MOLECULE_BARCODE_TAG_KEY ] return_call = platform.GenericPlatform.tag_sort_bam(args) assert return_call == 0 tag_keys = [ consts.CELL_BARCODE_TAG_KEY, consts.GENE_NAME_TAG_KEY, consts.MOLECULE_BARCODE_TAG_KEY ] with pysam.AlignmentFile('test_sorted.bam', 'rb') as f: segments = f.fetch(until_eof=True) tag_sortable_records = (bam.TagSortableRecord.from_aligned_segment( s, tag_keys) for s in segments) bam.verify_sort(tag_sortable_records, tag_keys) for f in glob.glob('test_sorted*'): os.remove(f)