def testAddHsp(self):
     """
     It must be possible to add an HSP to an alignment.
     """
     alignment = Alignment(45, 'title')
     alignment.addHsp(HSP(3))
     self.assertEqual(HSP(3), alignment.hsps[0])
Beispiel #2
0
 def testAddHsp(self):
     """
     It must be possible to add an HSP to an alignment.
     """
     alignment = Alignment(45, 'title')
     alignment.addHsp(HSP(3))
     self.assertEqual(HSP(3), alignment.hsps[0])
Beispiel #3
0
 def testAlignments(self):
     """
     An read alignments must store its alignments.
     """
     read = Read('id', 'ACGT')
     alignment1 = Alignment(45, 'title1')
     alignment2 = Alignment(55, 'title2')
     readAlignments = ReadAlignments(read, [alignment1, alignment2])
     self.assertEqual([alignment1, alignment2], readAlignments)
Beispiel #4
0
    def _dictToAlignments(self, blastDict, read):
        """
        Take a dict (made by XMLRecordsReader._convertBlastRecordToDict)
        and convert it to a list of alignments.

        @param blastDict: A C{dict}, from convertBlastRecordToDict.
        @param read: A C{Read} instance, containing the read that BLAST used
            to create this record.
        @raise ValueError: If the query id in the BLAST dictionary does not
            match the id of the read.
        @return: A C{list} of L{dark.alignment.Alignment} instances.
        """
        if (blastDict['query'] != read.id and
                blastDict['query'].split()[0] != read.id):
            raise ValueError(
                'The reads you have provided do not match the BLAST output: '
                'BLAST record query id (%s) does not match the id of the '
                'supposedly corresponding read (%s).' %
                (blastDict['query'], read.id))

        alignments = []
        getScore = itemgetter('bits' if self._hspClass is HSP else 'expect')

        for blastAlignment in blastDict['alignments']:
            alignment = Alignment(blastAlignment['length'],
                                  blastAlignment['title'])
            alignments.append(alignment)
            for blastHsp in blastAlignment['hsps']:
                score = getScore(blastHsp)
                normalized = normalizeHSP(blastHsp, len(read),
                                          self.application)
                hsp = self._hspClass(
                    score,
                    readStart=normalized['readStart'],
                    readEnd=normalized['readEnd'],
                    readStartInSubject=normalized['readStartInSubject'],
                    readEndInSubject=normalized['readEndInSubject'],
                    readFrame=blastHsp['frame'][0],
                    subjectStart=normalized['subjectStart'],
                    subjectEnd=normalized['subjectEnd'],
                    subjectFrame=blastHsp['frame'][1],
                    readMatchedSequence=blastHsp['query'],
                    subjectMatchedSequence=blastHsp['sbjct'],
                    # Use blastHsp.get on identicalCount and positiveCount
                    # because they were added in version 2.0.3 and will not
                    # be present in any of our JSON output generated before
                    # that. Those values will be None for those JSON files,
                    # but that's much better than no longer being able to
                    # read all that data.
                    identicalCount=blastHsp.get('identicalCount'),
                    positiveCount=blastHsp.get('positiveCount'))

                alignment.addHsp(hsp)

        return alignments
    def testOneAlignment(self):
        """
        When one alignment is present that alignment must be returned by
        bestAlignment.
        """
        alignment = Alignment(44, 'Seq 1')
        alignment.addHsp(HSP(10))
        alignment.addHsp(HSP(9))

        alignments = [alignment]
        hit = ReadAlignments(Read('id1', 'aaa'), alignments)
        best = bestAlignment(hit)
        self.assertEqual('Seq 1', best.subjectTitle)
        self.assertEqual(44, best.subjectLength)
Beispiel #6
0
    def _dictToAlignments(self, diamondDict, read):
        """
        Take a dict (made by DiamondTabularFormatReader.records)
        and convert it to a list of alignments.

        @param diamondDict: A C{dict}, from records().
        @param read: A C{Read} instance, containing the read that DIAMOND used
            to create this record.
        @return: A C{list} of L{dark.alignment.Alignment} instances.
        """
        alignments = []
        getScore = itemgetter('bits' if self._hspClass is HSP else 'expect')

        for diamondAlignment in diamondDict['alignments']:
            alignment = Alignment(diamondAlignment['length'],
                                  diamondAlignment['title'])
            alignments.append(alignment)
            for diamondHsp in diamondAlignment['hsps']:
                score = getScore(diamondHsp)
                normalized = normalizeHSP(diamondHsp, len(read),
                                          self.diamondTask)
                hsp = self._hspClass(
                    score,
                    readStart=normalized['readStart'],
                    readEnd=normalized['readEnd'],
                    readStartInSubject=normalized['readStartInSubject'],
                    readEndInSubject=normalized['readEndInSubject'],
                    readFrame=diamondHsp['frame'],
                    subjectStart=normalized['subjectStart'],
                    subjectEnd=normalized['subjectEnd'],
                    readMatchedSequence=diamondHsp['query'],
                    subjectMatchedSequence=diamondHsp['sbjct'],
                    # Use diamondHsp.get on identicalCount, positiveCount,
                    # percentPositive, and percentIdentical because they
                    # were either added in version 2.0.3 or we didn't start
                    # using them until much later and so will not be
                    # present in any of our JSON output generated before
                    # that. Those values will be None when reading those
                    # JSON files, but that's much better than no longer
                    # being able to read all that earlier data.
                    identicalCount=diamondHsp.get('identicalCount'),
                    positiveCount=diamondHsp.get('positiveCount'),
                    percentIdentical=diamondHsp.get('percentIdentical'),
                    percentPositive=diamondHsp.get('percentPositive'))

                alignment.addHsp(hsp)

        return alignments
Beispiel #7
0
 def testExpectedAttrs(self):
     """
     An alignment must have the expected attributes.
     """
     alignment = Alignment(45, 'title')
     self.assertEqual('title', alignment.subjectTitle)
     self.assertEqual(45, alignment.subjectLength)
Beispiel #8
0
    def _dictToAlignments(self, blastDict, read):
        """
        Take a dict (made by XMLRecordsReader._convertBlastRecordToDict)
        and convert it to a list of alignments.

        @param blastDict: A C{dict}, from convertBlastRecordToDict.
        @param read: A C{Read} instance, containing the read that BLAST used
            to create this record.
        @raise ValueError: If the query id in the BLAST dictionary does not
            match the id of the read.
        @return: A C{list} of L{dark.alignment.Alignment} instances.
        """
        if (blastDict['query'] != read.id
                and blastDict['query'].split()[0] != read.id):
            raise ValueError(
                'The reads you have provided do not match the BLAST output: '
                'BLAST record query id (%s) does not match the id of the '
                'supposedly corresponding read (%s).' %
                (blastDict['query'], read.id))

        alignments = []
        getScore = itemgetter('bits' if self._hspClass is HSP else 'expect')

        for blastAlignment in blastDict['alignments']:
            alignment = Alignment(blastAlignment['length'],
                                  blastAlignment['title'])
            alignments.append(alignment)
            for blastHsp in blastAlignment['hsps']:
                score = getScore(blastHsp)
                normalized = normalizeHSP(blastHsp, len(read),
                                          self.application)
                hsp = self._hspClass(
                    score,
                    readStart=normalized['readStart'],
                    readEnd=normalized['readEnd'],
                    readStartInSubject=normalized['readStartInSubject'],
                    readEndInSubject=normalized['readEndInSubject'],
                    readFrame=blastHsp['frame'][0],
                    subjectStart=normalized['subjectStart'],
                    subjectEnd=normalized['subjectEnd'],
                    subjectFrame=blastHsp['frame'][1],
                    readMatchedSequence=blastHsp['query'],
                    subjectMatchedSequence=blastHsp['sbjct'])

                alignment.addHsp(hsp)

        return alignments
Beispiel #9
0
    def _dictToAlignments(self, blastDict, read):
        """
        Take a dict (made by XMLRecordsReader._convertBlastRecordToDict)
        and convert it to a list of alignments.

        @param blastDict: A C{dict}, from convertBlastRecordToDict.
        @param read: A C{Read} instance, containing the read that BLAST used
            to create this record.
        @raise ValueError: If the query id in the BLAST dictionary does not
            match the id of the read.
        @return: A C{list} of L{dark.alignment.Alignment} instances.
        """
        if (blastDict['query'] != read.id and
                blastDict['query'].split()[0] != read.id):
            raise ValueError(
                'The reads you have provided do not match the BLAST output: '
                'BLAST record query id (%s) does not match the id of the '
                'supposedly corresponding read (%s).' %
                (blastDict['query'], read.id))

        alignments = []
        getScore = itemgetter('bits' if self._hspClass is HSP else 'expect')

        for blastAlignment in blastDict['alignments']:
            alignment = Alignment(blastAlignment['length'],
                                  blastAlignment['title'])
            alignments.append(alignment)
            for blastHsp in blastAlignment['hsps']:
                score = getScore(blastHsp)
                normalized = normalizeHSP(blastHsp, len(read),
                                          self.application)
                hsp = self._hspClass(
                    score,
                    readStart=normalized['readStart'],
                    readEnd=normalized['readEnd'],
                    readStartInSubject=normalized['readStartInSubject'],
                    readEndInSubject=normalized['readEndInSubject'],
                    readFrame=blastHsp['frame'][0],
                    subjectStart=normalized['subjectStart'],
                    subjectEnd=normalized['subjectEnd'],
                    subjectFrame=blastHsp['frame'][1],
                    readMatchedSequence=blastHsp['query'],
                    subjectMatchedSequence=blastHsp['sbjct'])

                alignment.addHsp(hsp)

        return alignments
Beispiel #10
0
    def _dictToAlignments(self, diamondDict, read):
        """
        Take a dict (made by DiamondTabularFormatReader.records)
        and convert it to a list of alignments.

        @param diamondDict: A C{dict}, from records().
        @param read: A C{Read} instance, containing the read that DIAMOND used
            to create this record.
        @raise ValueError: If the query id in the DIAMOND dictionary does not
            match the id of the read.
        @return: A C{list} of L{dark.alignment.Alignment} instances.
        """
        if (diamondDict['query'] != read.id and
                diamondDict['query'].split()[0] != read.id):
            raise ValueError(
                'The reads you have provided do not match the DIAMOND output: '
                'DIAMOND record query id (%s) does not match the id of the '
                'supposedly corresponding read (%s).' %
                (diamondDict['query'], read.id))

        alignments = []
        getScore = itemgetter('bits' if self._hspClass is HSP else 'expect')

        for diamondAlignment in diamondDict['alignments']:
            alignment = Alignment(diamondAlignment['length'],
                                  diamondAlignment['title'])
            alignments.append(alignment)
            for diamondHsp in diamondAlignment['hsps']:
                score = getScore(diamondHsp)
                normalized = normalizeHSP(diamondHsp, len(read),
                                          self.diamondTask)
                hsp = self._hspClass(
                    score,
                    readStart=normalized['readStart'],
                    readEnd=normalized['readEnd'],
                    readStartInSubject=normalized['readStartInSubject'],
                    readEndInSubject=normalized['readEndInSubject'],
                    readFrame=diamondHsp['frame'],
                    subjectStart=normalized['subjectStart'],
                    subjectEnd=normalized['subjectEnd'],
                    readMatchedSequence=diamondHsp['query'],
                    subjectMatchedSequence=diamondHsp['sbjct'])

                alignment.addHsp(hsp)

        return alignments
Beispiel #11
0
    def _dictToAlignments(self, diamondDict, read):
        """
        Take a dict (made by DiamondTabularFormatReader.records)
        and convert it to a list of alignments.

        @param diamondDict: A C{dict}, from records().
        @param read: A C{Read} instance, containing the read that DIAMOND used
            to create this record.
        @return: A C{list} of L{dark.alignment.Alignment} instances.
        """
        alignments = []
        getScore = itemgetter('bits' if self._hspClass is HSP else 'expect')

        for diamondAlignment in diamondDict['alignments']:
            alignment = Alignment(diamondAlignment['length'],
                                  diamondAlignment['title'])
            alignments.append(alignment)
            for diamondHsp in diamondAlignment['hsps']:
                score = getScore(diamondHsp)
                normalized = normalizeHSP(diamondHsp, len(read),
                                          self.diamondTask)
                hsp = self._hspClass(
                    score,
                    readStart=normalized['readStart'],
                    readEnd=normalized['readEnd'],
                    readStartInSubject=normalized['readStartInSubject'],
                    readEndInSubject=normalized['readEndInSubject'],
                    readFrame=diamondHsp['frame'],
                    subjectStart=normalized['subjectStart'],
                    subjectEnd=normalized['subjectEnd'],
                    readMatchedSequence=diamondHsp['query'],
                    subjectMatchedSequence=diamondHsp['sbjct'],
                    # Use blastHsp.get on identicalCount and positiveCount
                    # because they were added in version 2.0.3 and will not
                    # be present in any of our JSON output generated before
                    # that. Those values will be None for those JSON files,
                    # but that's much better than no longer being able to
                    # read all that data.
                    identicalCount=diamondHsp.get('identicalCount'),
                    positiveCount=diamondHsp.get('positiveCount'))

                alignment.addHsp(hsp)

        return alignments
Beispiel #12
0
    def testOneAlignment(self):
        """
        When one alignment is present that alignment must be returned by
        bestAlignment.
        """
        alignment = Alignment(44, 'Seq 1')
        alignment.addHsp(HSP(10))
        alignment.addHsp(HSP(9))

        alignments = [alignment]
        hit = ReadAlignments(Read('id1', 'aaa'), alignments)
        best = bestAlignment(hit)
        self.assertEqual('Seq 1', best.subjectTitle)
        self.assertEqual(44, best.subjectLength)
Beispiel #13
0
    def testThreeAlignments(self):
        """
        When three alignments are present, the one with the highest first HSP
        must be returned by bestAlignment.
        """
        alignment1 = Alignment(33, 'Seq 1')
        alignment1.addHsp(HSP(10))
        alignment1.addHsp(HSP(9))

        alignment2 = Alignment(44, 'Seq 2')
        alignment2.addHsp(HSP(30))
        alignment2.addHsp(HSP(29))

        alignment3 = Alignment(55, 'Seq 3')
        alignment3.addHsp(HSP(20))
        alignment3.addHsp(HSP(19))

        alignments = [alignment1, alignment2, alignment3]
        hit = ReadAlignments(Read('id1', 'aaa'), alignments)
        best = bestAlignment(hit)
        self.assertEqual('Seq 2', best.subjectTitle)
        self.assertEqual(44, best.subjectLength)
Beispiel #14
0
 def testNoHspsWhenCreated(self):
     """
     An alignment must have no HSPs when it is created.
     """
     alignment = Alignment(45, 'title')
     self.assertEqual(0, len(alignment.hsps))
    def testThreeAlignments(self):
        """
        When three alignments are present, the one with the highest first HSP
        must be returned by bestAlignment.
        """
        alignment1 = Alignment(33, 'Seq 1')
        alignment1.addHsp(HSP(10))
        alignment1.addHsp(HSP(9))

        alignment2 = Alignment(44, 'Seq 2')
        alignment2.addHsp(HSP(30))
        alignment2.addHsp(HSP(29))

        alignment3 = Alignment(55, 'Seq 3')
        alignment3.addHsp(HSP(20))
        alignment3.addHsp(HSP(19))

        alignments = [alignment1, alignment2, alignment3]
        hit = ReadAlignments(Read('id1', 'aaa'), alignments)
        best = bestAlignment(hit)
        self.assertEqual('Seq 2', best.subjectTitle)
        self.assertEqual(44, best.subjectLength)