def test_2(self): # now some errors file_name = op.join(DATA_DIR, "tst_2_subreads.bam") e, c = bam.validate_bam(file_name, validate_index=True) errors = sorted(list(set([type(err).__name__ for err in e]))) self.assertEqual(errors, [ 'AlignmentCigarError', 'AlignmentCigarMatchError', 'AlignmentNotUniqueError', 'AlignmentUnmappedError', 'BasecallerVersionError', 'MissingCodecError', 'MissingIndexError', 'MissingPlatformError', 'QnameFormatError', 'QnameRangeError', 'ReadGroupChemistryError', 'ReadGroupIdMismatchError', "ReadLengthError", 'TagValueError', 'UninitializedSNRError', 'UnsortedError' ]) e, c = bam.validate_bam(file_name, validate_index=True, permissive_headers=True) errors2 = set([type(err).__name__ for err in e]) self.assertEqual(len(errors2), len(errors) - 2) self.assertFalse("ReadGroupChemistryError" in errors2) self.assertFalse("BasecallerVersionError" in errors2)
def test_2(self): # now some errors file_name = op.join(DATA_DIR, "tst_2_subreads.bam") e, c = bam.validate_bam(file_name, validate_index=True) errors = sorted(list(set([type(err).__name__ for err in e]))) self.assertEqual(errors, ['AlignmentCigarError', 'AlignmentCigarMatchError', 'AlignmentNotUniqueError', 'AlignmentUnmappedError', 'BasecallerVersionError', 'MissingCodecError', 'MissingIndexError', 'MissingPlatformError', 'QnameFormatError', 'QnameRangeError', 'ReadGroupChemistryError', 'ReadGroupIdMismatchError', "ReadLengthError", 'TagValueError', 'UninitializedSNRError', 'UnsortedError']) e, c = bam.validate_bam(file_name, validate_index=True, permissive_headers=True) errors2 = set([type(err).__name__ for err in e]) self.assertEqual(len(errors2), len(errors) - 2) self.assertFalse("ReadGroupChemistryError" in errors2) self.assertFalse("BasecallerVersionError" in errors2)
def test_4_unmapped(self): file_name = op.join(DATA_DIR, "tst_4_subreads.bam") e, c = bam.validate_bam(file_name) errors1 = sorted([type(err).__name__ for err in e]) assert errors1 == [ 'BasecallerVersionError', 'MissingCodecError', 'QnameHoleNumberError', 'QnameMovieError', 'ReadGroupChemistryError', 'UninitializedSNRError', 'UnmappedPropertiesError', 'UnsortedError', 'WrongPlatformError' ] e, c = bam.validate_bam(file_name, aligned=True) errors2 = sorted([type(err).__name__ for err in e]) assert errors2 == [ 'BasecallerVersionError', 'FileNotAlignedError', 'MissingCodecError', 'QnameHoleNumberError', 'QnameMovieError', 'ReadGroupChemistryError', 'UninitializedSNRError', 'UnsortedError', 'WrongPlatformError' ] # this should yield the same result as the first run e, c = bam.validate_bam(file_name, aligned=False) errors3 = sorted([type(err).__name__ for err in e]) assert errors3 == errors1
def test_4_unmapped(self): file_name = op.join(DATA_DIR, "tst_4_subreads.bam") e, c = bam.validate_bam(file_name) errors1 = sorted([type(err).__name__ for err in e]) self.assertEqual(errors1, ['BasecallerVersionError', 'MissingCodecError', 'PulseFeatureError', 'QnameHoleNumberError', 'QnameMovieError', 'ReadGroupChemistryError', 'UninitializedSNRError', 'UnmappedPropertiesError', 'UnsortedError', 'WrongPlatformError']) e, c = bam.validate_bam(file_name, aligned=True) errors2 = sorted([type(err).__name__ for err in e]) self.assertEqual(errors2, ['BasecallerVersionError', 'FileNotAlignedError', 'MissingCodecError', 'PulseFeatureError', 'QnameHoleNumberError', 'QnameMovieError', 'ReadGroupChemistryError', 'UninitializedSNRError', 'UnsortedError', 'WrongPlatformError']) # this should yield the same result as the first run e, c = bam.validate_bam(file_name, aligned=False) errors3 = sorted([type(err).__name__ for err in e]) self.assertEqual(errors3, errors1)
def test_1c_reference_fasta(self): file_name = op.join(DATA_DIR, "tst_1_subreads.bam") fasta_file = op.join(DATA_DIR, "tst1.fasta") e, c = bam.validate_bam(file_name, reference=fasta_file) self.assertEqual(len(e), 0) e, c = bam.validate_bam(file_name, aligned=False)
def test_1b(self): file_name = op.join(DATA_DIR, "tst_1_subreads.bam") e, c = bam.validate_bam(file_name, aligned=False, contents="CCS") errors = sorted([type(err).__name__ for err in e]) self.assertEqual(errors, ['FileAlignedError', 'FileContentMismatchError'])
def test_zero_length_scrap(self): BAM = "/pbi/dept/secondary/siv/testdata/SA3-Sequel/ecoli/EmptyRecords/m54043_180414_094215.scraps.bam" with warnings.catch_warnings(record=True) as w: e, c = bam.validate_bam(BAM, aligned=False) assert len(e) == 0 assert len(w) == 7
def test_overlapping_alignments(self): BAM = "/pbi/dept/secondary/siv/testdata/pbreports-unittest/data/mapping_stats/pbmm2/aligned.bam" e, c = bam.validate_bam(BAM, aligned=True) errors2 = list(set(sorted([type(err).__name__ for err in e]))) assert errors2 == ["AlignmentNotUniqueError"]
def test_transcript_bam(self): BAM = "/pbi/dept/secondary/siv/testdata/isoseqs/TranscriptSet/unpolished.bam" e, c = bam.validate_bam(BAM, max_records=10) assert len(e) == 0
def test_valid_ccs_strand_suffix(self): file_name = op.join(DATA_DIR, "tst_6.ccs.bam") e, c = bam.validate_bam(file_name) assert len(e) == 0
def test_bad_encoding(self): file_name = op.join(DATA_DIR, "tst_5_subreads.bam") e, c = bam.validate_bam(file_name) errors1 = sorted([type(err).__name__ for err in e]) assert errors1 == ["BadEncodingError"]
def test_3_unmapped(self): file_name = op.join(DATA_DIR, "tst_3_subreads.bam") e, c = bam.validate_bam(file_name) assert len(e) == 0
def test_1c_reference_fasta(self): file_name = op.join(DATA_DIR, "tst_1_subreads.bam") fasta_file = op.join(DATA_DIR, "tst1.fasta") e, c = bam.validate_bam(file_name, reference=fasta_file) assert len(e) == 0 e, c = bam.validate_bam(file_name, aligned=False)
def test_1b(self): file_name = op.join(DATA_DIR, "tst_1_subreads.bam") e, c = bam.validate_bam(file_name, aligned=False, contents="CCS") errors = sorted([type(err).__name__ for err in e]) assert errors == ['FileAlignedError', 'FileContentMismatchError']
def test_3_unmapped(self): file_name = op.join(DATA_DIR, "tst_3_subreads.bam") e, c = bam.validate_bam(file_name) self.assertEqual(len(e), 0)
def test_invalid_ccs_strand_suffix(self): file_name = op.join(DATA_DIR, "tst_7.ccs.bam") e, c = bam.validate_bam(file_name) errors1 = sorted(list(set([type(err).__name__ for err in e]))) assert errors1 == ['QnameFormatError']
def test_1(self): file_name = op.join(DATA_DIR, "tst_1_subreads.bam") e, c = bam.validate_bam(file_name) self.assertEqual(len(e), 0)