Ejemplo n.º 1
0
 def test_2(self):
     # now some errors
     file_name = op.join(DATA_DIR, "tst_2_subreads.bam")
     e, c = bam.validate_bam(file_name, validate_index=True)
     errors = sorted(list(set([type(err).__name__ for err in e])))
     self.assertEqual(errors, [
         'AlignmentCigarError', 'AlignmentCigarMatchError',
         'AlignmentNotUniqueError', 'AlignmentUnmappedError',
         'BasecallerVersionError', 'MissingCodecError', 'MissingIndexError',
         'MissingPlatformError', 'QnameFormatError', 'QnameRangeError',
         'ReadGroupChemistryError', 'ReadGroupIdMismatchError',
         "ReadLengthError", 'TagValueError', 'UninitializedSNRError',
         'UnsortedError'
     ])
     e, c = bam.validate_bam(file_name,
                             validate_index=True,
                             permissive_headers=True)
     errors2 = set([type(err).__name__ for err in e])
     self.assertEqual(len(errors2), len(errors) - 2)
     self.assertFalse("ReadGroupChemistryError" in errors2)
     self.assertFalse("BasecallerVersionError" in errors2)
Ejemplo n.º 2
0
 def test_2(self):
     # now some errors
     file_name = op.join(DATA_DIR, "tst_2_subreads.bam")
     e, c = bam.validate_bam(file_name, validate_index=True)
     errors = sorted(list(set([type(err).__name__ for err in e])))
     self.assertEqual(errors,
                      ['AlignmentCigarError', 'AlignmentCigarMatchError',
                       'AlignmentNotUniqueError',
                       'AlignmentUnmappedError',
                       'BasecallerVersionError',
                       'MissingCodecError', 'MissingIndexError',
                       'MissingPlatformError', 'QnameFormatError',
                       'QnameRangeError', 'ReadGroupChemistryError',
                       'ReadGroupIdMismatchError', "ReadLengthError",
                       'TagValueError',
                       'UninitializedSNRError', 'UnsortedError'])
     e, c = bam.validate_bam(file_name, validate_index=True,
                             permissive_headers=True)
     errors2 = set([type(err).__name__ for err in e])
     self.assertEqual(len(errors2), len(errors) - 2)
     self.assertFalse("ReadGroupChemistryError" in errors2)
     self.assertFalse("BasecallerVersionError" in errors2)
Ejemplo n.º 3
0
 def test_4_unmapped(self):
     file_name = op.join(DATA_DIR, "tst_4_subreads.bam")
     e, c = bam.validate_bam(file_name)
     errors1 = sorted([type(err).__name__ for err in e])
     assert errors1 == [
         'BasecallerVersionError', 'MissingCodecError',
         'QnameHoleNumberError', 'QnameMovieError',
         'ReadGroupChemistryError', 'UninitializedSNRError',
         'UnmappedPropertiesError', 'UnsortedError', 'WrongPlatformError'
     ]
     e, c = bam.validate_bam(file_name, aligned=True)
     errors2 = sorted([type(err).__name__ for err in e])
     assert errors2 == [
         'BasecallerVersionError', 'FileNotAlignedError',
         'MissingCodecError', 'QnameHoleNumberError', 'QnameMovieError',
         'ReadGroupChemistryError', 'UninitializedSNRError',
         'UnsortedError', 'WrongPlatformError'
     ]
     # this should yield the same result as the first run
     e, c = bam.validate_bam(file_name, aligned=False)
     errors3 = sorted([type(err).__name__ for err in e])
     assert errors3 == errors1
Ejemplo n.º 4
0
 def test_4_unmapped(self):
     file_name = op.join(DATA_DIR, "tst_4_subreads.bam")
     e, c = bam.validate_bam(file_name)
     errors1 = sorted([type(err).__name__ for err in e])
     self.assertEqual(errors1, ['BasecallerVersionError',
                                'MissingCodecError',
                                'PulseFeatureError', 'QnameHoleNumberError',                                    'QnameMovieError', 'ReadGroupChemistryError',
                                'UninitializedSNRError',
                                'UnmappedPropertiesError', 'UnsortedError',
                                'WrongPlatformError'])
     e, c = bam.validate_bam(file_name, aligned=True)
     errors2 = sorted([type(err).__name__ for err in e])
     self.assertEqual(errors2,
                      ['BasecallerVersionError',
                       'FileNotAlignedError', 'MissingCodecError',
                       'PulseFeatureError',
                       'QnameHoleNumberError', 'QnameMovieError',
                       'ReadGroupChemistryError',
                       'UninitializedSNRError', 'UnsortedError',
                       'WrongPlatformError'])
     # this should yield the same result as the first run
     e, c = bam.validate_bam(file_name, aligned=False)
     errors3 = sorted([type(err).__name__ for err in e])
     self.assertEqual(errors3, errors1)
Ejemplo n.º 5
0
 def test_1c_reference_fasta(self):
     file_name = op.join(DATA_DIR, "tst_1_subreads.bam")
     fasta_file = op.join(DATA_DIR, "tst1.fasta")
     e, c = bam.validate_bam(file_name, reference=fasta_file)
     self.assertEqual(len(e), 0)
     e, c = bam.validate_bam(file_name, aligned=False)
Ejemplo n.º 6
0
 def test_1b(self):
     file_name = op.join(DATA_DIR, "tst_1_subreads.bam")
     e, c = bam.validate_bam(file_name, aligned=False, contents="CCS")
     errors = sorted([type(err).__name__ for err in e])
     self.assertEqual(errors,
                      ['FileAlignedError', 'FileContentMismatchError'])
Ejemplo n.º 7
0
 def test_zero_length_scrap(self):
     BAM = "/pbi/dept/secondary/siv/testdata/SA3-Sequel/ecoli/EmptyRecords/m54043_180414_094215.scraps.bam"
     with warnings.catch_warnings(record=True) as w:
         e, c = bam.validate_bam(BAM, aligned=False)
         assert len(e) == 0
         assert len(w) == 7
Ejemplo n.º 8
0
 def test_overlapping_alignments(self):
     BAM = "/pbi/dept/secondary/siv/testdata/pbreports-unittest/data/mapping_stats/pbmm2/aligned.bam"
     e, c = bam.validate_bam(BAM, aligned=True)
     errors2 = list(set(sorted([type(err).__name__ for err in e])))
     assert errors2 == ["AlignmentNotUniqueError"]
Ejemplo n.º 9
0
 def test_transcript_bam(self):
     BAM = "/pbi/dept/secondary/siv/testdata/isoseqs/TranscriptSet/unpolished.bam"
     e, c = bam.validate_bam(BAM, max_records=10)
     assert len(e) == 0
Ejemplo n.º 10
0
 def test_valid_ccs_strand_suffix(self):
     file_name = op.join(DATA_DIR, "tst_6.ccs.bam")
     e, c = bam.validate_bam(file_name)
     assert len(e) == 0
Ejemplo n.º 11
0
 def test_bad_encoding(self):
     file_name = op.join(DATA_DIR, "tst_5_subreads.bam")
     e, c = bam.validate_bam(file_name)
     errors1 = sorted([type(err).__name__ for err in e])
     assert errors1 == ["BadEncodingError"]
Ejemplo n.º 12
0
 def test_3_unmapped(self):
     file_name = op.join(DATA_DIR, "tst_3_subreads.bam")
     e, c = bam.validate_bam(file_name)
     assert len(e) == 0
Ejemplo n.º 13
0
 def test_1c_reference_fasta(self):
     file_name = op.join(DATA_DIR, "tst_1_subreads.bam")
     fasta_file = op.join(DATA_DIR, "tst1.fasta")
     e, c = bam.validate_bam(file_name, reference=fasta_file)
     assert len(e) == 0
     e, c = bam.validate_bam(file_name, aligned=False)
Ejemplo n.º 14
0
 def test_1b(self):
     file_name = op.join(DATA_DIR, "tst_1_subreads.bam")
     e, c = bam.validate_bam(file_name, aligned=False, contents="CCS")
     errors = sorted([type(err).__name__ for err in e])
     assert errors == ['FileAlignedError', 'FileContentMismatchError']
Ejemplo n.º 15
0
 def test_3_unmapped(self):
     file_name = op.join(DATA_DIR, "tst_3_subreads.bam")
     e, c = bam.validate_bam(file_name)
     self.assertEqual(len(e), 0)
Ejemplo n.º 16
0
 def test_invalid_ccs_strand_suffix(self):
     file_name = op.join(DATA_DIR, "tst_7.ccs.bam")
     e, c = bam.validate_bam(file_name)
     errors1 = sorted(list(set([type(err).__name__ for err in e])))
     assert errors1 == ['QnameFormatError']
Ejemplo n.º 17
0
 def test_1(self):
     file_name = op.join(DATA_DIR, "tst_1_subreads.bam")
     e, c = bam.validate_bam(file_name)
     self.assertEqual(len(e), 0)