Example #1
0
    def testDictLookupGzipDataWithBGZsuffix(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the index file is in BGZF format and has a .bgz
        suffix.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 1:
                    self.test.assertEqual('filename.fasta.bgz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(b'>id0\nAC\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.bgz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            index.close()
Example #2
0
    def testDictLookupWithTwoFiles(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences are added from two files.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 2 or self.count == 3:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1 or self.count == 4:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>seq3\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            index.addFile('filename2.fasta')
            self.assertEqual(DNARead('id1', 'ACTG'), index['id1'])
            self.assertEqual(DNARead('id2', 'AACCTTGG'), index['id2'])
            self.assertEqual(DNARead('seq3', 'AAACCC'), index['seq3'])
            index.close()
Example #3
0
    def testDictLookupSpecificReadClass(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read type.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nMM\n>id2\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:', readClass=AARead)
            index.addFile('filename.fasta')
            result = index['id1']
            self.assertTrue(isinstance(result, AARead))
            self.assertEqual(AARead('id1', 'MM'), result)
            index.close()
Example #4
0
    def testDictLookupWithFastaDirectory(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read, obtained from the expected file name, when a FASTA base
        directory is specified.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('/tmp/f.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                if self.count == 1:
                    self.test.assertEqual(
                        os.path.join('/usr/local/fasta', 'f.fasta'), filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:', fastaDirectory='/usr/local/fasta')
            index.addFile('/tmp/f.fasta')
            self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1'])
            index.close()
Example #5
0
    def testFindWithTwoFiles(self):
        """"
        The _find method must return the expected filename and offset when
        sequences are added from two files.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>sequence3\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            index.addFile('filename2.fasta')
            self.assertEqual(('filename1.fasta', 5), index._find('id1'))
            self.assertEqual(('filename1.fasta', 15), index._find('id2'))
            self.assertEqual(('filename2.fasta', 11), index._find('sequence3'))
            index.close()
Example #6
0
    def testAddDuplicateFile(self):
        """"
        If a filename is passed to addFile more than once, a ValueError must
        be raised.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            self.assertEqual(2, index.addFile('filename.fasta'))
            error = "^Duplicate file name: 'filename\\.fasta'$"
            assertRaisesRegex(self, ValueError, error, index._addFilename,
                              'filename.fasta')
            index.close()
Example #7
0
    def testDictLookupSequenceMiddleOfThree(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the sequence spans multiple lines and is the middle
        one of three sequences in the input file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO(
                        '>id1\nACTG\nCCCC\n>id2\nAACCTG\nAAA\n>id3\nAAA\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta')
            self.assertEqual(DNARead('id2', 'AACCTGAAA'), index['id2'])
            index.close()
Example #8
0
    def testDictLookupWithTwoFiles(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences are added from two files.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 2 or self.count == 3:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1 or self.count == 4:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>seq3\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            index.addFile('filename2.fasta')
            self.assertEqual(DNARead('id1', 'ACTG'), index['id1'])
            self.assertEqual(DNARead('id2', 'AACCTTGG'), index['id2'])
            self.assertEqual(DNARead('seq3', 'AAACCC'), index['seq3'])
            index.close()
Example #9
0
    def testDictLookupWithFastaDirectory(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read, obtained from the expected file name, when a FASTA base
        directory is specified.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('/tmp/f.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                if self.count == 1:
                    self.test.assertEqual(
                        os.path.join('/usr/local/fasta', 'f.fasta'), filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:', fastaDirectory='/usr/local/fasta')
            index.addFile('/tmp/f.fasta')
            self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1'])
            index.close()
Example #10
0
    def testAddDuplicateFile(self):
        """"
        If a filename is passed to addFile more than once, a ValueError must
        be raised.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            self.assertEqual(2, index.addFile('filename.fasta'))
            error = "^Duplicate file name: 'filename.fasta'$"
            self.assertRaisesRegexp(ValueError, error, index._addFilename,
                                    'filename.fasta')
            index.close()
Example #11
0
    def testAddOneFile(self):
        """"
        Test the creation of an index with sequences added from one file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            self.assertEqual(2, index.addFile('filename.fasta'))
            index.close()
Example #12
0
    def testAddFilesWithDuplicateSequence(self):
        """"
        If a sequence id occurs in more than one FASTA file, a ValueError must
        be raised.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>id2\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            error = ("^FASTA sequence id 'id2', found in file "
                     "'filename2\.fasta', was previously added from file "
                     "'filename1\.fasta'\.$")
            self.assertRaisesRegexp(ValueError, error, index.addFile,
                                    'filename2.fasta')
            index.close()
Example #13
0
    def testDictLookupSequenceCrossesNewlines(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the sequence spans multiple lines of the input file,
        including lines ending in \n and \r\n.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta')
            self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1'])
            index.close()
Example #14
0
    def testDictLookupSpecificReadClass(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read type.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nMM\n>id2\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:', readClass=AARead)
            index.addFile('filename.fasta')
            result = index['id1']
            self.assertTrue(isinstance(result, AARead))
            self.assertEqual(AARead('id1', 'MM'), result)
            index.close()
Example #15
0
    def testDictLookupGzipDataWithBGZsuffix(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the index file is in BGZF format and has a .bgz
        suffix.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 1:
                    self.test.assertEqual('filename.fasta.bgz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(b'>id0\nAC\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.bgz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            index.close()
Example #16
0
    def testAddFilesWithDuplicateSequence(self):
        """"
        If a sequence id occurs in more than one FASTA file, a ValueError must
        be raised.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>id2\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            error = ("^FASTA sequence id 'id2', found in file "
                     "'filename2\\.fasta', was previously added from file "
                     "'filename1\\.fasta'\\.$")
            assertRaisesRegex(self, ValueError, error, index.addFile,
                              'filename2.fasta')
            index.close()
Example #17
0
    def testFindWithTwoFiles(self):
        """"
        The _find method must return the expected filename and offset when
        sequences are added from two files.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>sequence3\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            index.addFile('filename2.fasta')
            self.assertEqual(('filename1.fasta', 5), index._find('id1'))
            self.assertEqual(('filename1.fasta', 15), index._find('id2'))
            self.assertEqual(('filename2.fasta', 11), index._find('sequence3'))
            index.close()
Example #18
0
    def testAddOneFile(self):
        """"
        Test the creation of an index with sequences added from one file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            self.assertEqual(2, index.addFile('filename.fasta'))
            index.close()
Example #19
0
 def testGetFileNumber(self):
     """"
     The internal _getFileNumber method must return the expected result.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(1, index._addFilename('filename.fasta'))
     self.assertEqual(1, index._getFileNumber('filename.fasta'))
     index.close()
Example #20
0
 def testGetNonexistentFileNumber(self):
     """"
     If the internal _getFileNumber method is called with a file whose name
     has not been added, it must return None.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(None, index._getFileNumber('filename.fasta'))
     index.close()
Example #21
0
 def testAddFilename(self):
     """"
     Test the internal _addFilename method.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(1, index._addFilename('filename1.fasta'))
     self.assertEqual(2, index._addFilename('filename2.fasta'))
     index.close()
Example #22
0
 def testGetFileNumber(self):
     """"
     The internal _getFileNumber method must return the expected result.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(1, index._addFilename('filename.fasta'))
     self.assertEqual(1, index._getFileNumber('filename.fasta'))
     index.close()
Example #23
0
 def testGetNonexistentFileNumber(self):
     """"
     If the internal _getFileNumber method is called with a file whose name
     has not been added, it must return None.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(None, index._getFileNumber('filename.fasta'))
     index.close()
Example #24
0
 def testAddFilename(self):
     """"
     Test the internal _addFilename method.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(1, index._addFilename('filename1.fasta'))
     self.assertEqual(2, index._addFilename('filename2.fasta'))
     index.close()
Example #25
0
    def testDictLookupGzipData(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences span multiple lines of the input file,
        and include lines ending in \n and \r\n and have been compressed with
        bgzip, including when sequences are more than 64K bytes into the input
        file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 4:
                    self.test.assertEqual('filename.fasta.gz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(
                        b'>id0\nAC\n' +
                        b'>id1\n' + (b'A' * 70000) + b'\n' +
                        b'>id2\r\nACTG\r\nCCCC\r\nGGG\r\n' +
                        b'>id3\nAACCTG\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.gz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            self.assertEqual(DNARead('id1', 'A' * 70000), index['id1'])
            self.assertEqual(DNARead('id2', 'ACTGCCCCGGG'), index['id2'])
            self.assertEqual(DNARead('id3', 'AACCTG'), index['id3'])
            index.close()
Example #26
0
    def testDictLookupGzipData(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences span multiple lines of the input file,
        and include lines ending in \n and \r\n and have been compressed with
        bgzip, including when sequences are more than 64K bytes into the input
        file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 4:
                    self.test.assertEqual('filename.fasta.gz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(b'>id0\nAC\n' + b'>id1\n' + (b'A' * 70000) +
                                 b'\n' + b'>id2\r\nACTG\r\nCCCC\r\nGGG\r\n' +
                                 b'>id3\nAACCTG\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.gz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            self.assertEqual(DNARead('id1', 'A' * 70000), index['id1'])
            self.assertEqual(DNARead('id2', 'ACTGCCCCGGG'), index['id2'])
            self.assertEqual(DNARead('id3', 'AACCTG'), index['id3'])
            index.close()