def readAlignments(self, reads): """ Read lines of JSON from self._filename, convert them to read alignments and yield them. @param reads: An iterable of L{Read} instances, corresponding to the reads that were given to DIAMOND. @raise ValueError: If any of the lines in the file cannot be converted to JSON. @return: A generator that yields C{dark.alignments.ReadAlignments} instances. """ if self._fp is None: self._open(self._filename) reads = iter(reads) try: for lineNumber, line in enumerate(self._fp, start=2): try: record = loads(line[:-1]) except ValueError as e: raise ValueError( 'Could not convert line %d of %r to JSON (%s). ' 'Line is %r.' % (lineNumber, self._filename, e, line[:-1])) else: while True: # Iterate through the input reads until we find the # one that matches this DIAMOND record. try: read = next(reads) except StopIteration: raise ValueError( 'Read generator failed to yield a read ' 'with id \'%s\' as found in record number %d ' 'during parsing of DIAMOND output file %r.' % (record['query'], lineNumber - 1, self._filename)) else: if read.id == record['query']: alignments = self._dictToAlignments(record, read) yield ReadAlignments(read, alignments) break else: # This is an input read that received no # matches from DIAMOND. So it does not # appear in the DIAMOND output. Emit an # empty ReadAlignments for it. yield ReadAlignments(read, []) finally: self._fp.close() self._fp = None
def testNoAlignments(self): """ An read alignments must be able to have no alignments. """ read = Read('id', 'ACGT') readAlignments = ReadAlignments(read) self.assertEqual(0, len(readAlignments))
def testRead(self): """ An read alignments must store its read. """ read = Read('id', 'ACGT') readAlignments = ReadAlignments(read) self.assertEqual(read, readAlignments.read)
def readAlignments(self): """ Read lines of JSON from self._filename, convert them to read alignments and yield them. @raise ValueError: If any of the lines in the file cannot be converted to JSON. @return: A generator that yields C{dark.alignments.ReadAlignments} instances. """ if self._fp is None: self._open(self._filename) try: for lineNumber, line in enumerate(self._fp, start=2): try: record = loads(line[:-1]) except ValueError as e: raise ValueError( 'Could not convert line %d of %r to JSON (%s). ' 'Line is %r.' % (lineNumber, self._filename, e, line[:-1])) else: read = AARead(record['queryId'], record['querySequence']) alignments = jsonDictToAlignments(record, self._database) yield ReadAlignments(read, alignments) finally: self._fp.close() self._fp = None
def iter(self): """ Extract DIAMOND records and yield C{ReadAlignments} instances. @return: A generator that yields C{ReadAlignments} instances. """ # Note that self._reader is already initialized (in __init__) for # the first input file. This is less clean than it could be, but it # makes testing easier, since open() is then only called once for # each input file. reads = iter(self.reads) first = True for filename in self.filenames: if first: # The first file has already been opened, in __init__. first = False reader = self._reader else: reader = self._getReader(filename, self.scoreClass) for readAlignments in reader.readAlignments(reads): yield readAlignments # Any remaining query reads must have had no subject matches. for read in reads: yield ReadAlignments(read, [])
def testAlignments(self): """ An read alignments must store its alignments. """ read = Read('id', 'ACGT') alignment1 = Alignment(45, 'title1') alignment2 = Alignment(55, 'title2') readAlignments = ReadAlignments(read, [alignment1, alignment2]) self.assertEqual([alignment1, alignment2], readAlignments)
def testOneAlignment(self): """ When one alignment is present that alignment must be returned by bestAlignment. """ alignment = Alignment(44, 'Seq 1') alignment.addHsp(HSP(10)) alignment.addHsp(HSP(9)) alignments = [alignment] hit = ReadAlignments(Read('id1', 'aaa'), alignments) best = bestAlignment(hit) self.assertEqual('Seq 1', best.subjectTitle) self.assertEqual(44, best.subjectLength)
def readAlignments(self, reads): """ Read lines of JSON from self._filename, convert them to read alignments and yield them. @param reads: An iterable of L{Read} instances, corresponding to the reads that were given to BLAST. @raise ValueError: If any of the lines in the file cannot be converted to JSON. @return: A generator that yields C{dark.alignments.ReadAlignments} instances. """ if self._fp is None: self._open(self._filename) reads = iter(reads) try: for lineNumber, line in enumerate(self._fp, start=2): try: record = loads(line[:-1]) except ValueError as e: raise ValueError( 'Could not convert line %d of %r to JSON (%s). ' 'Line is %r.' % (lineNumber, self._filename, e, line[:-1])) else: try: read = next(reads) except StopIteration: raise ValueError( 'Read generator failed to yield read number %d ' 'during parsing of BLAST file %r.' % (lineNumber - 1, self._filename)) else: alignments = self._dictToAlignments(record, read) yield ReadAlignments(read, alignments) finally: self._fp.close() self._fp = None
def testThreeAlignments(self): """ When three alignments are present, the one with the highest first HSP must be returned by bestAlignment. """ alignment1 = Alignment(33, 'Seq 1') alignment1.addHsp(HSP(10)) alignment1.addHsp(HSP(9)) alignment2 = Alignment(44, 'Seq 2') alignment2.addHsp(HSP(30)) alignment2.addHsp(HSP(29)) alignment3 = Alignment(55, 'Seq 3') alignment3.addHsp(HSP(20)) alignment3.addHsp(HSP(19)) alignments = [alignment1, alignment2, alignment3] hit = ReadAlignments(Read('id1', 'aaa'), alignments) best = bestAlignment(hit) self.assertEqual('Seq 2', best.subjectTitle) self.assertEqual(44, best.subjectLength)
def readAlignments(self, reads): """ Read lines of JSON from self._filename, convert them to read alignments and yield them. @param reads: An iterable of L{Read} instances, corresponding to the reads that were given to DIAMOND. @raise ValueError: If any of the lines in the file cannot be converted to JSON. @return: A generator that yields C{dark.alignments.ReadAlignments} instances. """ if self._fp is None: self._open(self._filename) reads = iter(reads) try: for lineNumber, line in enumerate(self._fp, start=2): try: record = loads(line[:-1]) except ValueError as e: raise ValueError( 'Could not convert line %d of %r to JSON (%s). ' 'Line is %r.' % (lineNumber, self._filename, e, line[:-1])) else: recordTitle = record['query'] while True: # Iterate through the input reads until we find the # one that matches this DIAMOND record. try: read = next(reads) except StopIteration: raise ValueError( 'Read generator failed to yield a read ' 'with id \'%s\' as found in record number %d ' 'during parsing of DIAMOND output file %r.' % (recordTitle, lineNumber - 1, self._filename)) else: # Look for an exact read id / subject title match. # If that doesn't work, allow for the case where # the JSON record has a truncated query (i.e., # read) id. This covers the situation where a tool # we use (e.g., bwa mem) unconditionally does this # truncation in the output it writes. if (read.id == recordTitle or read.id.split()[0] == recordTitle): alignments = self._dictToAlignments( record, read) yield ReadAlignments(read, alignments) break else: # This is an input read that had no DIAMOND # matches. So it does not appear in the # DIAMOND's output. Yield an empty # ReadAlignments for it. yield ReadAlignments(read, []) finally: self._fp.close() self._fp = None