def testAddHsp(self): """ It must be possible to add an HSP to an alignment. """ alignment = Alignment(45, 'title') alignment.addHsp(HSP(3)) self.assertEqual(HSP(3), alignment.hsps[0])
def _dictToAlignments(self, blastDict, read): """ Take a dict (made by XMLRecordsReader._convertBlastRecordToDict) and convert it to a list of alignments. @param blastDict: A C{dict}, from convertBlastRecordToDict. @param read: A C{Read} instance, containing the read that BLAST used to create this record. @raise ValueError: If the query id in the BLAST dictionary does not match the id of the read. @return: A C{list} of L{dark.alignment.Alignment} instances. """ if (blastDict['query'] != read.id and blastDict['query'].split()[0] != read.id): raise ValueError( 'The reads you have provided do not match the BLAST output: ' 'BLAST record query id (%s) does not match the id of the ' 'supposedly corresponding read (%s).' % (blastDict['query'], read.id)) alignments = [] getScore = itemgetter('bits' if self._hspClass is HSP else 'expect') for blastAlignment in blastDict['alignments']: alignment = Alignment(blastAlignment['length'], blastAlignment['title']) alignments.append(alignment) for blastHsp in blastAlignment['hsps']: score = getScore(blastHsp) normalized = normalizeHSP(blastHsp, len(read), self.application) hsp = self._hspClass( score, readStart=normalized['readStart'], readEnd=normalized['readEnd'], readStartInSubject=normalized['readStartInSubject'], readEndInSubject=normalized['readEndInSubject'], readFrame=blastHsp['frame'][0], subjectStart=normalized['subjectStart'], subjectEnd=normalized['subjectEnd'], subjectFrame=blastHsp['frame'][1], readMatchedSequence=blastHsp['query'], subjectMatchedSequence=blastHsp['sbjct'], # Use blastHsp.get on identicalCount and positiveCount # because they were added in version 2.0.3 and will not # be present in any of our JSON output generated before # that. Those values will be None for those JSON files, # but that's much better than no longer being able to # read all that data. identicalCount=blastHsp.get('identicalCount'), positiveCount=blastHsp.get('positiveCount')) alignment.addHsp(hsp) return alignments
def testOneAlignment(self): """ When one alignment is present that alignment must be returned by bestAlignment. """ alignment = Alignment(44, 'Seq 1') alignment.addHsp(HSP(10)) alignment.addHsp(HSP(9)) alignments = [alignment] hit = ReadAlignments(Read('id1', 'aaa'), alignments) best = bestAlignment(hit) self.assertEqual('Seq 1', best.subjectTitle) self.assertEqual(44, best.subjectLength)
def _dictToAlignments(self, diamondDict, read): """ Take a dict (made by DiamondTabularFormatReader.records) and convert it to a list of alignments. @param diamondDict: A C{dict}, from records(). @param read: A C{Read} instance, containing the read that DIAMOND used to create this record. @return: A C{list} of L{dark.alignment.Alignment} instances. """ alignments = [] getScore = itemgetter('bits' if self._hspClass is HSP else 'expect') for diamondAlignment in diamondDict['alignments']: alignment = Alignment(diamondAlignment['length'], diamondAlignment['title']) alignments.append(alignment) for diamondHsp in diamondAlignment['hsps']: score = getScore(diamondHsp) normalized = normalizeHSP(diamondHsp, len(read), self.diamondTask) hsp = self._hspClass( score, readStart=normalized['readStart'], readEnd=normalized['readEnd'], readStartInSubject=normalized['readStartInSubject'], readEndInSubject=normalized['readEndInSubject'], readFrame=diamondHsp['frame'], subjectStart=normalized['subjectStart'], subjectEnd=normalized['subjectEnd'], readMatchedSequence=diamondHsp['query'], subjectMatchedSequence=diamondHsp['sbjct'], # Use diamondHsp.get on identicalCount, positiveCount, # percentPositive, and percentIdentical because they # were either added in version 2.0.3 or we didn't start # using them until much later and so will not be # present in any of our JSON output generated before # that. Those values will be None when reading those # JSON files, but that's much better than no longer # being able to read all that earlier data. identicalCount=diamondHsp.get('identicalCount'), positiveCount=diamondHsp.get('positiveCount'), percentIdentical=diamondHsp.get('percentIdentical'), percentPositive=diamondHsp.get('percentPositive')) alignment.addHsp(hsp) return alignments
def _dictToAlignments(self, blastDict, read): """ Take a dict (made by XMLRecordsReader._convertBlastRecordToDict) and convert it to a list of alignments. @param blastDict: A C{dict}, from convertBlastRecordToDict. @param read: A C{Read} instance, containing the read that BLAST used to create this record. @raise ValueError: If the query id in the BLAST dictionary does not match the id of the read. @return: A C{list} of L{dark.alignment.Alignment} instances. """ if (blastDict['query'] != read.id and blastDict['query'].split()[0] != read.id): raise ValueError( 'The reads you have provided do not match the BLAST output: ' 'BLAST record query id (%s) does not match the id of the ' 'supposedly corresponding read (%s).' % (blastDict['query'], read.id)) alignments = [] getScore = itemgetter('bits' if self._hspClass is HSP else 'expect') for blastAlignment in blastDict['alignments']: alignment = Alignment(blastAlignment['length'], blastAlignment['title']) alignments.append(alignment) for blastHsp in blastAlignment['hsps']: score = getScore(blastHsp) normalized = normalizeHSP(blastHsp, len(read), self.application) hsp = self._hspClass( score, readStart=normalized['readStart'], readEnd=normalized['readEnd'], readStartInSubject=normalized['readStartInSubject'], readEndInSubject=normalized['readEndInSubject'], readFrame=blastHsp['frame'][0], subjectStart=normalized['subjectStart'], subjectEnd=normalized['subjectEnd'], subjectFrame=blastHsp['frame'][1], readMatchedSequence=blastHsp['query'], subjectMatchedSequence=blastHsp['sbjct']) alignment.addHsp(hsp) return alignments
def _dictToAlignments(self, diamondDict, read): """ Take a dict (made by DiamondTabularFormatReader.records) and convert it to a list of alignments. @param diamondDict: A C{dict}, from records(). @param read: A C{Read} instance, containing the read that DIAMOND used to create this record. @raise ValueError: If the query id in the DIAMOND dictionary does not match the id of the read. @return: A C{list} of L{dark.alignment.Alignment} instances. """ if (diamondDict['query'] != read.id and diamondDict['query'].split()[0] != read.id): raise ValueError( 'The reads you have provided do not match the DIAMOND output: ' 'DIAMOND record query id (%s) does not match the id of the ' 'supposedly corresponding read (%s).' % (diamondDict['query'], read.id)) alignments = [] getScore = itemgetter('bits' if self._hspClass is HSP else 'expect') for diamondAlignment in diamondDict['alignments']: alignment = Alignment(diamondAlignment['length'], diamondAlignment['title']) alignments.append(alignment) for diamondHsp in diamondAlignment['hsps']: score = getScore(diamondHsp) normalized = normalizeHSP(diamondHsp, len(read), self.diamondTask) hsp = self._hspClass( score, readStart=normalized['readStart'], readEnd=normalized['readEnd'], readStartInSubject=normalized['readStartInSubject'], readEndInSubject=normalized['readEndInSubject'], readFrame=diamondHsp['frame'], subjectStart=normalized['subjectStart'], subjectEnd=normalized['subjectEnd'], readMatchedSequence=diamondHsp['query'], subjectMatchedSequence=diamondHsp['sbjct']) alignment.addHsp(hsp) return alignments
def _dictToAlignments(self, diamondDict, read): """ Take a dict (made by DiamondTabularFormatReader.records) and convert it to a list of alignments. @param diamondDict: A C{dict}, from records(). @param read: A C{Read} instance, containing the read that DIAMOND used to create this record. @return: A C{list} of L{dark.alignment.Alignment} instances. """ alignments = [] getScore = itemgetter('bits' if self._hspClass is HSP else 'expect') for diamondAlignment in diamondDict['alignments']: alignment = Alignment(diamondAlignment['length'], diamondAlignment['title']) alignments.append(alignment) for diamondHsp in diamondAlignment['hsps']: score = getScore(diamondHsp) normalized = normalizeHSP(diamondHsp, len(read), self.diamondTask) hsp = self._hspClass( score, readStart=normalized['readStart'], readEnd=normalized['readEnd'], readStartInSubject=normalized['readStartInSubject'], readEndInSubject=normalized['readEndInSubject'], readFrame=diamondHsp['frame'], subjectStart=normalized['subjectStart'], subjectEnd=normalized['subjectEnd'], readMatchedSequence=diamondHsp['query'], subjectMatchedSequence=diamondHsp['sbjct'], # Use blastHsp.get on identicalCount and positiveCount # because they were added in version 2.0.3 and will not # be present in any of our JSON output generated before # that. Those values will be None for those JSON files, # but that's much better than no longer being able to # read all that data. identicalCount=diamondHsp.get('identicalCount'), positiveCount=diamondHsp.get('positiveCount')) alignment.addHsp(hsp) return alignments
def testThreeAlignments(self): """ When three alignments are present, the one with the highest first HSP must be returned by bestAlignment. """ alignment1 = Alignment(33, 'Seq 1') alignment1.addHsp(HSP(10)) alignment1.addHsp(HSP(9)) alignment2 = Alignment(44, 'Seq 2') alignment2.addHsp(HSP(30)) alignment2.addHsp(HSP(29)) alignment3 = Alignment(55, 'Seq 3') alignment3.addHsp(HSP(20)) alignment3.addHsp(HSP(19)) alignments = [alignment1, alignment2, alignment3] hit = ReadAlignments(Read('id1', 'aaa'), alignments) best = bestAlignment(hit) self.assertEqual('Seq 2', best.subjectTitle) self.assertEqual(44, best.subjectLength)