Esempio n. 1
0
    def readAlignments(self, reads):
        """
        Read lines of JSON from self._filename, convert them to read alignments
        and yield them.

        @param reads: An iterable of L{Read} instances, corresponding to the
            reads that were given to DIAMOND.
        @raise ValueError: If any of the lines in the file cannot be converted
            to JSON.
        @return: A generator that yields C{dark.alignments.ReadAlignments}
            instances.
        """
        if self._fp is None:
            self._open(self._filename)

        reads = iter(reads)

        try:
            for lineNumber, line in enumerate(self._fp, start=2):
                try:
                    record = loads(line[:-1])
                except ValueError as e:
                    raise ValueError(
                        'Could not convert line %d of %r to JSON (%s). '
                        'Line is %r.' %
                        (lineNumber, self._filename, e, line[:-1]))
                else:
                    while True:
                        # Iterate through the input reads until we find the
                        # one that matches this DIAMOND record.
                        try:
                            read = next(reads)
                        except StopIteration:
                            raise ValueError(
                                'Read generator failed to yield a read '
                                'with id \'%s\' as found in record number %d '
                                'during parsing of DIAMOND output file %r.' %
                                (record['query'], lineNumber - 1,
                                 self._filename))
                        else:
                            if read.id == record['query']:
                                alignments = self._dictToAlignments(record,
                                                                    read)
                                yield ReadAlignments(read, alignments)
                                break
                            else:
                                # This is an input read that received no
                                # matches from DIAMOND. So it does not
                                # appear in the DIAMOND output. Emit an
                                # empty ReadAlignments for it.
                                yield ReadAlignments(read, [])

        finally:
            self._fp.close()
            self._fp = None
Esempio n. 2
0
 def testNoAlignments(self):
     """
     An read alignments must be able to have no alignments.
     """
     read = Read('id', 'ACGT')
     readAlignments = ReadAlignments(read)
     self.assertEqual(0, len(readAlignments))
Esempio n. 3
0
 def testRead(self):
     """
     An read alignments must store its read.
     """
     read = Read('id', 'ACGT')
     readAlignments = ReadAlignments(read)
     self.assertEqual(read, readAlignments.read)
Esempio n. 4
0
    def readAlignments(self):
        """
        Read lines of JSON from self._filename, convert them to read alignments
        and yield them.

        @raise ValueError: If any of the lines in the file cannot be converted
            to JSON.
        @return: A generator that yields C{dark.alignments.ReadAlignments}
            instances.
        """
        if self._fp is None:
            self._open(self._filename)

        try:
            for lineNumber, line in enumerate(self._fp, start=2):
                try:
                    record = loads(line[:-1])
                except ValueError as e:
                    raise ValueError(
                        'Could not convert line %d of %r to JSON (%s). '
                        'Line is %r.' %
                        (lineNumber, self._filename, e, line[:-1]))
                else:
                    read = AARead(record['queryId'], record['querySequence'])
                    alignments = jsonDictToAlignments(record, self._database)
                    yield ReadAlignments(read, alignments)
        finally:
            self._fp.close()
            self._fp = None
Esempio n. 5
0
    def iter(self):
        """
        Extract DIAMOND records and yield C{ReadAlignments} instances.

        @return: A generator that yields C{ReadAlignments} instances.
        """
        # Note that self._reader is already initialized (in __init__) for
        # the first input file. This is less clean than it could be, but it
        # makes testing easier, since open() is then only called once for
        # each input file.

        reads = iter(self.reads)
        first = True

        for filename in self.filenames:
            if first:
                # The first file has already been opened, in __init__.
                first = False
                reader = self._reader
            else:
                reader = self._getReader(filename, self.scoreClass)

            for readAlignments in reader.readAlignments(reads):
                yield readAlignments

        # Any remaining query reads must have had no subject matches.
        for read in reads:
            yield ReadAlignments(read, [])
Esempio n. 6
0
 def testAlignments(self):
     """
     An read alignments must store its alignments.
     """
     read = Read('id', 'ACGT')
     alignment1 = Alignment(45, 'title1')
     alignment2 = Alignment(55, 'title2')
     readAlignments = ReadAlignments(read, [alignment1, alignment2])
     self.assertEqual([alignment1, alignment2], readAlignments)
Esempio n. 7
0
    def testOneAlignment(self):
        """
        When one alignment is present that alignment must be returned by
        bestAlignment.
        """
        alignment = Alignment(44, 'Seq 1')
        alignment.addHsp(HSP(10))
        alignment.addHsp(HSP(9))

        alignments = [alignment]
        hit = ReadAlignments(Read('id1', 'aaa'), alignments)
        best = bestAlignment(hit)
        self.assertEqual('Seq 1', best.subjectTitle)
        self.assertEqual(44, best.subjectLength)
Esempio n. 8
0
    def readAlignments(self, reads):
        """
        Read lines of JSON from self._filename, convert them to read alignments
        and yield them.

        @param reads: An iterable of L{Read} instances, corresponding to the
            reads that were given to BLAST.
        @raise ValueError: If any of the lines in the file cannot be converted
            to JSON.
        @return: A generator that yields C{dark.alignments.ReadAlignments}
            instances.
        """
        if self._fp is None:
            self._open(self._filename)

        reads = iter(reads)

        try:
            for lineNumber, line in enumerate(self._fp, start=2):
                try:
                    record = loads(line[:-1])
                except ValueError as e:
                    raise ValueError(
                        'Could not convert line %d of %r to JSON (%s). '
                        'Line is %r.' %
                        (lineNumber, self._filename, e, line[:-1]))
                else:
                    try:
                        read = next(reads)
                    except StopIteration:
                        raise ValueError(
                            'Read generator failed to yield read number %d '
                            'during parsing of BLAST file %r.' %
                            (lineNumber - 1, self._filename))
                    else:
                        alignments = self._dictToAlignments(record, read)
                        yield ReadAlignments(read, alignments)
        finally:
            self._fp.close()
            self._fp = None
Esempio n. 9
0
    def testThreeAlignments(self):
        """
        When three alignments are present, the one with the highest first HSP
        must be returned by bestAlignment.
        """
        alignment1 = Alignment(33, 'Seq 1')
        alignment1.addHsp(HSP(10))
        alignment1.addHsp(HSP(9))

        alignment2 = Alignment(44, 'Seq 2')
        alignment2.addHsp(HSP(30))
        alignment2.addHsp(HSP(29))

        alignment3 = Alignment(55, 'Seq 3')
        alignment3.addHsp(HSP(20))
        alignment3.addHsp(HSP(19))

        alignments = [alignment1, alignment2, alignment3]
        hit = ReadAlignments(Read('id1', 'aaa'), alignments)
        best = bestAlignment(hit)
        self.assertEqual('Seq 2', best.subjectTitle)
        self.assertEqual(44, best.subjectLength)
Esempio n. 10
0
    def readAlignments(self, reads):
        """
        Read lines of JSON from self._filename, convert them to read alignments
        and yield them.

        @param reads: An iterable of L{Read} instances, corresponding to the
            reads that were given to DIAMOND.
        @raise ValueError: If any of the lines in the file cannot be converted
            to JSON.
        @return: A generator that yields C{dark.alignments.ReadAlignments}
            instances.
        """
        if self._fp is None:
            self._open(self._filename)

        reads = iter(reads)

        try:
            for lineNumber, line in enumerate(self._fp, start=2):
                try:
                    record = loads(line[:-1])
                except ValueError as e:
                    raise ValueError(
                        'Could not convert line %d of %r to JSON (%s). '
                        'Line is %r.' %
                        (lineNumber, self._filename, e, line[:-1]))
                else:
                    recordTitle = record['query']
                    while True:
                        # Iterate through the input reads until we find the
                        # one that matches this DIAMOND record.
                        try:
                            read = next(reads)
                        except StopIteration:
                            raise ValueError(
                                'Read generator failed to yield a read '
                                'with id \'%s\' as found in record number %d '
                                'during parsing of DIAMOND output file %r.' %
                                (recordTitle, lineNumber - 1, self._filename))
                        else:
                            # Look for an exact read id / subject title match.
                            # If that doesn't work, allow for the case where
                            # the JSON record has a truncated query (i.e.,
                            # read) id. This covers the situation where a tool
                            # we use (e.g., bwa mem) unconditionally does this
                            # truncation in the output it writes.
                            if (read.id == recordTitle
                                    or read.id.split()[0] == recordTitle):
                                alignments = self._dictToAlignments(
                                    record, read)
                                yield ReadAlignments(read, alignments)
                                break
                            else:
                                # This is an input read that had no DIAMOND
                                # matches. So it does not appear in the
                                # DIAMOND's output. Yield an empty
                                # ReadAlignments for it.
                                yield ReadAlignments(read, [])

        finally:
            self._fp.close()
            self._fp = None