def testCorrectNumberOfAlignmentsTwoMatchesMissingEnd(self): """ A JSONRecordsReader must return the expected number of alignments, if two reads at the end don't have any matches. (Those reads will not be examined by the JSONRecordsReader.) """ reads = Reads([ AARead( 'id1 1', 'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG' 'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'), AARead( 'id2 2', 'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC' 'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'), AARead( 'id3 3', 'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA' 'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'), AARead( 'id4 4', 'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT' 'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'), ]) mockOpener = mockOpen(read_data=JSON_TWO_END) with patch.object(builtins, 'open', mockOpener): reader = JSONRecordsReader('file.json') alignments = list(reader.readAlignments(reads)) self.assertEqual(2, len(alignments))
def testSpacesMustBePreserved(self): """ A JSONRecordsReader must return the right query and subject titles, even if they have spaces. """ reads = Reads([ AARead( 'id1 1', 'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG' 'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'), AARead( 'id2 2', 'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC' 'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'), AARead( 'id3 3', 'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA' 'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'), AARead( 'id4 4', 'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT' 'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'), ]) mockOpener = mockOpen(read_data=JSON_TWO_END) with patch.object(builtins, 'open', mockOpener): reader = JSONRecordsReader('file.json') alignment = list(reader.readAlignments(reads))[0] self.assertEqual('id1 1', alignment.read.id)
def testSpaceInReadIdNotInJSONRecord(self): """ A JSONRecordsReader must return the right query and subject titles, when the read ids have spaces in them but the titles in the JSON have been truncated at the first space (as in the SAM format output of the BWA 'mem' command). """ reads = Reads([ AARead( 'id1 1', 'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG' 'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'), AARead( 'id2 2', 'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC' 'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'), AARead( 'id3 3', 'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA' 'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'), AARead( 'id4 4', 'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT' 'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'), ]) mockOpener = mockOpen(read_data=JSON) with patch.object(builtins, 'open', mockOpener): reader = JSONRecordsReader('file.json') alignment = list(reader.readAlignments(reads))[0] self.assertEqual('id1 1', alignment.read.id)
def testCorrectNumberOfAlignmentsMatchMissingStart(self): """ A JSONRecordsReader must return the expected number of alignments, if the first read has no matches. """ reads = Reads([ AARead( 'id1', 'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG' 'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'), AARead( 'id2', 'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC' 'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'), AARead( 'id3', 'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA' 'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'), AARead( 'id4', 'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT' 'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'), ]) mockOpener = mockOpen(read_data=JSON_ONE_START) with patch.object(builtins, 'open', mockOpener): reader = JSONRecordsReader('file.json') alignments = list(reader.readAlignments(reads)) self.assertEqual(4, len(alignments))
def testCorrectNumberOfAlignmentsMatchMissingEnd(self): """ A JSONRecordsReader must return the expected number of alignments, if the last read has no matches. (That read will not be examined by the JSONRecordsReader.) """ reads = Reads([ AARead( 'id1', 'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG' 'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'), AARead( 'id2', 'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC' 'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'), AARead( 'id3', 'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA' 'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'), AARead( 'id4', 'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT' 'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'), ]) mockOpener = mockOpen(read_data=JSON_ONE_END) with patch.object(builtins, 'open', mockOpener): reader = JSONRecordsReader('file.json') alignments = list(reader.readAlignments(reads)) self.assertEqual(3, len(alignments))
def testCorrectNumberOfAlignmentsMatchMissingMiddle(self): """ A JSONRecordsReader must return the expected number of alignments, if a match is missing in the middle of the JSON file. """ reads = Reads([ AARead( 'id1', 'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG' 'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'), AARead( 'id2', 'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC' 'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'), AARead( 'id3', 'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA' 'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'), AARead( 'id4', 'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT' 'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'), ]) mockOpener = mockOpen(read_data=JSON_ONE_MIDDLE) with patch.object(builtins, 'open', mockOpener): reader = JSONRecordsReader('file.json') alignments = list(reader.readAlignments(reads)) self.assertEqual(4, len(alignments))
def _getReader(self, filename, scoreClass): """ Obtain a JSON record reader for DIAMOND records. @param filename: The C{str} file name holding the JSON. @param scoreClass: A class to hold and compare scores (see scores.py). """ if filename.endswith('.json') or filename.endswith('.json.bz2'): return JSONRecordsReader(filename, scoreClass) else: raise ValueError( 'Unknown DIAMOND record file suffix for file %r.' % filename)