Example #1
0
class ProgressTests(TestCase):
    def setUp(self):
        self.root = TempDir()
        xphyle.configure(progress=False)

    def tearDown(self):
        self.root.close()
        ITERABLE_PROGRESS.enabled = False
        ITERABLE_PROGRESS.wrapper = None
        PROCESS_PROGRESS.enabled = False
        PROCESS_PROGRESS.wrapper = None

    def test_progress(self):
        progress = MockProgress()
        xphyle.configure(progress=True, progress_wrapper=progress)
        path = self.root.make_file()
        with open(path, 'wt') as o:
            for i in range(100):
                o.write(random_text())
        compress_file(path, compression='gz', use_system=False)
        assert 100 == progress.count

    def test_progress_delmited(self):
        progress = MockProgress()
        xphyle.configure(progress=True, progress_wrapper=progress)
        path = self.root.make_file()
        with open(path, 'wt') as o:
            for i in range(100):
                o.write('row\t{}\n'.format(i))
        rows = list(read_delimited(path))
        assert 100 == len(rows)
        assert 100 == progress.count

    def test_iter_stream(self):
        progress = MockProgress()
        xphyle.configure(progress=True, progress_wrapper=progress)
        with intercept_stdin('foo\nbar\nbaz'):
            with xopen(STDIN, 'rt', context_wrapper=True,
                       compression=False) as o:
                lines = list(o)
                self.assertListEqual(['foo\n', 'bar\n', 'baz\n'], lines)
        assert 3 == progress.count
Example #2
0
class XphyleTests(TestCase):
    def setUp(self):
        self.root = TempDir()

    def tearDown(self):
        self.root.close()
        ITERABLE_PROGRESS.enabled = False
        ITERABLE_PROGRESS.wrapper = None
        PROCESS_PROGRESS.enabled = False
        PROCESS_PROGRESS.wrapper = None
        THREADS.update(1)
        EXECUTABLE_CACHE.reset_search_path()
        EXECUTABLE_CACHE.cache = {}

    def test_configure(self):
        def wrapper(a, b, c):
            pass

        configure(progress=True,
                  progress_wrapper=wrapper,
                  system_progress=True,
                  system_progress_wrapper='foo',
                  threads=2,
                  executable_path=['foo'])
        self.assertEqual(wrapper, ITERABLE_PROGRESS.wrapper)
        self.assertEqual(('foo', ), PROCESS_PROGRESS.wrapper)
        self.assertEqual(2, THREADS.threads)
        self.assertTrue('foo' in EXECUTABLE_CACHE.search_path)

        configure(threads=False)
        self.assertEqual(1, THREADS.threads)

        import multiprocessing
        configure(threads=True)
        self.assertEqual(multiprocessing.cpu_count(), THREADS.threads)

    def test_guess_format(self):
        with self.assertRaises(ValueError):
            guess_file_format(STDOUT)
        with self.assertRaises(ValueError):
            guess_file_format(STDERR)
        path = self.root.make_file(suffix='.gz')
        with gzip.open(path, 'wt') as o:
            o.write('foo')
        self.assertEqual(guess_file_format(path), 'gzip')
        path = self.root.make_file()
        with gzip.open(path, 'wt') as o:
            o.write('foo')
        self.assertEqual(guess_file_format(path), 'gzip')

    def test_open_(self):
        path = self.root.make_file(contents='foo')
        with self.assertRaises(ValueError):
            with open_(path, wrap_fileobj=False):
                pass
        with open_(path, compression=False) as fh:
            self.assertEqual(fh.read(), 'foo')
        with open_(path, compression=False) as fh:
            self.assertEqual(next(fh), 'foo')
        with open(path) as fh:
            with open_(fh, compression=False, context_wrapper=True) as fh2:
                self.assertTrue(isinstance(fh2, FileLikeWrapper))
                self.assertEqual(fh2.read(), 'foo')
        with open(path) as fh3:
            with open_(fh, wrap_fileobj=False, context_wrapper=True):
                self.assertFalse(isinstance(fh3, FileLikeWrapper))

    def test_open_safe(self):
        with self.assertRaises(IOError):
            with open_('foobar', mode='r', errors=True) as fh:
                pass
        with self.assertRaises(ValueError):
            with open_(None, mode='r', errors=True) as fh:
                pass
        with open_('foobar', mode='r', errors=False) as fh:
            self.assertIsNone(fh)
        with open_(None, mode='r', errors=False) as fh:
            self.assertIsNone(fh)

    def test_xopen_invalid(self):
        # invalid mode
        with self.assertRaises(ValueError):
            xopen('foo', 'z')
        with self.assertRaises(ValueError):
            xopen('foo', 'rz')
        with self.assertRaises(ValueError):
            xopen('foo', 'rU', newline='\n')
        with self.assertRaises(ValueError):
            xopen(STDOUT, 'w', compression=True)
        with self.assertRaises(ValueError):
            xopen('foo.bar', 'w', compression=True)
        with self.assertRaises(ValueError):
            xopen('foo', file_type=FileType.STDIO)
        with self.assertRaises(ValueError):
            xopen(STDOUT, file_type=FileType.LOCAL)
        with self.assertRaises(ValueError):
            xopen('foo', file_type=FileType.URL)
        with self.assertRaises(IOError):
            xopen('http://foo.com', file_type=FileType.LOCAL)
        with self.assertRaises(ValueError):
            xopen('xyz', file_type=FileType.FILELIKE)
        path = self.root.make_file(contents='foo')
        with open(path, 'r') as fh:
            with self.assertRaises(ValueError):
                xopen(fh, 'w')
            f = xopen(fh, context_wrapper=True)
            self.assertEquals('r', f.mode)
        f = xopen(path, context_wrapper=True)
        f.close()
        with self.assertRaises(IOError):
            with f:
                pass
        with self.assertRaises(ValueError):
            with open(path, 'rt') as fh:
                xopen(fh, 'rt', compression=True)
        # can't guess compression without a name
        with self.assertRaises(ValueError):
            b = BytesIO()
            b.mode = 'wb'
            xopen(b, 'wt')
        # can't read from stderr
        with self.assertRaises(ValueError):
            xopen(STDERR, 'rt')

    def test_xopen_std(self):
        # Try stdin
        with intercept_stdin('foo\n'):
            with xopen(STDIN, 'r', context_wrapper=True,
                       compression=False) as i:
                content = i.read()
                self.assertEqual(content, 'foo\n')
        # Try stdout
        with intercept_stdout() as i:
            with xopen(STDOUT, 'w', context_wrapper=True,
                       compression=False) as o:
                o.write('foo')
            self.assertEqual(i.getvalue(), 'foo')
        # Try stderr
        with intercept_stderr() as i:
            with xopen(STDERR, 'w', context_wrapper=True,
                       compression=False) as o:
                o.write('foo')
            self.assertEqual(i.getvalue(), 'foo')

        # Try binary
        with intercept_stdout(True) as i:
            with xopen(STDOUT, 'wb', context_wrapper=True,
                       compression=False) as o:
                o.write(b'foo')
            self.assertEqual(i.getvalue(), b'foo')

        # Try compressed
        with intercept_stdout(True) as i:
            with xopen(STDOUT, 'wt', context_wrapper=True,
                       compression='gz') as o:
                self.assertEqual(o.compression, 'gzip')
                o.write('foo')
            self.assertEqual(gzip.decompress(i.getvalue()), b'foo')

    def test_xopen_compressed_stream(self):
        # Try autodetect compressed
        with intercept_stdin(gzip.compress(b'foo\n'), is_bytes=True):
            with xopen(STDIN, 'rt', compression=True,
                       context_wrapper=True) as i:
                self.assertEqual(i.compression, 'gzip')
                self.assertEqual(i.read(), 'foo\n')

    def test_xopen_file(self):
        with self.assertRaises(IOError):
            xopen('foobar', 'r')
        path = self.root.make_file(suffix='.gz')
        with xopen(path, 'rU') as i:
            self.assertEquals('rt', i.mode)
        with xopen(path, 'w', compression=True, context_wrapper=True) as o:
            self.assertEqual(o.compression, 'gzip')
            o.write('foo')
        with gzip.open(path, 'rt') as i:
            self.assertEqual(i.read(), 'foo')
        with self.assertRaises(ValueError):
            with xopen(path, 'rt', compression='bz2', validate=True):
                pass

    def test_xopen_fileobj(self):
        path = self.root.make_file(suffix='.gz')
        with open(path, 'wb') as out1:
            with open_(out1, 'wt') as out2:
                out2.write('foo')
        with gzip.open(path, 'rt') as i:
            self.assertEquals('foo', i.read())

    def test_xopen_buffer(self):
        buf = BytesIO(b'foo')
        f = xopen(buf, 'rb')
        self.assertEquals(b'foo', f.read(3))
        with self.assertRaises(ValueError):
            xopen(buf, 'wb')

        with open_(str) as buf:
            buf.write('foo')
        self.assertEquals('foo', buf.getvalue())

        with open_(bytes) as buf:
            buf.write(b'foo')
        self.assertEquals(b'foo', buf.getvalue())

        # with compression
        with self.assertRaises(ValueError):
            with open_(bytes, compression=True) as buf:
                pass
        with self.assertRaises(ValueError):
            with open_(str, compression='gzip') as buf:
                pass

        with open_(bytes, mode='wt', compression='gzip') as buf:
            buf.write('foo')
        self.assertEquals(b'foo', gzip.decompress(buf.getvalue()))

        # from string/bytes
        with self.assertRaises(ValueError):
            xopen('foo', 'wt', file_type=FileType.BUFFER)
        with self.assertRaises(ValueError):
            xopen('foo', 'rb', file_type=FileType.BUFFER)
        with open_('foo', file_type=FileType.BUFFER,
                   context_wrapper=True) as buf:
            self.assertEquals('foo', buf.read())

        with self.assertRaises(ValueError):
            xopen(b'foo', 'rt', file_type=FileType.BUFFER)
        with open_(b'foo', file_type=FileType.BUFFER,
                   context_wrapper=True) as buf:
            self.assertEquals(b'foo', buf.read())

    @skipIf(no_internet(), "No internet connection")
    def test_xopen_url(self):
        badurl = 'http://google.com/__badurl__'
        with self.assertRaises(ValueError):
            xopen(badurl)
        url = 'https://github.com/jdidion/xphyle/blob/master/tests/foo.gz?raw=True'
        with self.assertRaises(ValueError):
            xopen(url, 'w')
        with open_(url, 'rt') as i:
            self.assertEqual('gzip', i.compression)
            self.assertEqual('foo\n', i.read())

    def test_open_process(self):
        with open_('|cat', 'wt') as p:
            p.write('foo\n')
        self.assertEquals(b'foo\n', p.stdout)

    def test_peek(self):
        path = self.root.make_file()
        with self.assertRaises(IOError):
            with open_(path, 'w') as o:
                o.peek()
        path = self.root.make_file(contents='foo')
        with open_(path, 'rb') as i:
            self.assertEqual(b'f', i.peek(1))
            self.assertEqual(b'foo', next(i))
        with open_(path, 'rt') as i:
            self.assertEqual('f', i.peek(1))
            self.assertEqual('foo', next(i))
        with intercept_stdin('foo'):
            with open_(STDIN, validate=False, compression=False) as i:
                self.assertEqual('f', i.peek(1))
                self.assertEqual('foo\n', next(i))

    def test_event_listeners(self):
        class MockEventListener(EventListener):
            def execute(self, file_wrapper: FileLikeWrapper, **kwargs):
                self.executed = True

        std_listener = MockEventListener()
        with intercept_stdin('foo'):
            f = xopen(STDIN, context_wrapper=True)
            try:
                f.register_listener(EventType.CLOSE, std_listener)
            finally:
                f.close()
            self.assertTrue(std_listener.executed)

        file_listener = MockEventListener()
        path = self.root.make_file()
        f = xopen(path, 'w', context_wrapper=True)
        try:
            f.register_listener(EventType.CLOSE, file_listener)
        finally:
            f.close()
        self.assertTrue(file_listener.executed)

    def test_process(self):
        with Process('cat', stdin=PIPE, stdout=PIPE, stderr=PIPE) as p:
            self.assertIsNotNone(p.get_writer())
            self.assertIsNotNone(p.get_reader('stdout'))
            self.assertIsNotNone(p.get_reader('stderr'))
            self.assertFalse(p.seekable())
            self.assertEquals((p.stdout, p.stderr), p.get_readers())
            p.write(b'foo\n')
            p.flush()
        self.assertEqual(b'foo\n', p.stdout)
        self.assertFalse(p.stderr)

        # wrap pipes
        with Process(('zcat', '-cd'), stdin=PIPE, stdout=PIPE) as p:
            self.assertTrue(p.readable())
            self.assertTrue(p.writable())
            with self.assertRaises(ValueError):
                p.is_wrapped('foo')
            with self.assertRaises(ValueError):
                p.wrap_pipes(foo=dict(mode='wt'))
            p.wrap_pipes(stdin=dict(mode='wt', compression='gzip'))
            self.assertTrue(p.is_wrapped('stdin'))
            p.write('foo')
        self.assertEqual(b'foo', p.stdout)

    def test_process_with_files(self):
        inp = self.root.make_file(suffix='.gz')
        with gzip.open(inp, 'wt') as o:
            o.write('foo')
        out = self.root.make_file(suffix='.gz')
        with self.assertRaises(OSError):
            with gzip.open(inp, 'rt') as o, open(out, 'wt') as i:
                with Process('cat', stdin=o, stdout=i) as p:
                    p.wrap_pipes(stdin=dict(mode='wt'))
        with gzip.open(out, 'rt') as i:
            self.assertEquals('foo', i.read())
        with popen(('echo', 'abc\n123'), stdout=PIPE) as p:
            self.assertListEqual([b'abc\n', b'123\n'],
                                 list(line for line in p))
        with popen(('echo', 'abc\n123'), stdout=PIPE) as p:
            self.assertEquals(b'abc\n', next(p))
            self.assertEquals(b'123\n', next(p))
        with popen(('echo', 'abc\n123'), stdout=(PIPE, 'rt')) as p:
            self.assertEquals('abc\n', next(p))
            self.assertEquals('123\n', next(p))

    def test_process_invalid(self):
        with self.assertRaises(ValueError):
            xopen('|cat', 'wt', compression=True)

    def test_process_read(self):
        with Process(('echo', 'foo'), stdout=PIPE) as p:
            self.assertEquals(b'foo\n', p.read())
        with open_('|echo foo', 'rt') as p:
            self.assertEquals('foo\n', p.read())

    def test_process_communicate(self):
        with Process('cat', stdin=PIPE, stdout=PIPE, stderr=PIPE) as p:
            self.assertTupleEqual((b'foo\n', b''), p.communicate(b'foo\n'))

    def test_process_del(self):
        class MockProcessListener(EventListener):
            def execute(self, process: Process, **kwargs) -> None:
                self.executed = True

        listener = MockProcessListener()
        p = Process('cat', stdin=PIPE, stdout=PIPE)
        p.register_listener(EventType.CLOSE, listener)
        del p
        self.assertTrue(listener.executed)

    def test_process_close(self):
        p = Process('cat', stdin=PIPE, stdout=PIPE)
        self.assertFalse(p.closed)
        p.close()
        self.assertTrue(p.closed)
        self.assertIsNone(p.close1(raise_on_error=False))
        with self.assertRaises(IOError):
            p.close1(raise_on_error=True)

    def test_process_close_hung(self):
        p = Process(('sleep', '5'))
        with self.assertRaises(Exception):
            p.close(timeout=1, terminate=False)
        p = Process(('sleep', '5'))
        p.close1(timeout=1, terminate=True)
        self.assertTrue(p.closed)

    def test_process_error(self):
        p = popen(('exit', '2'), shell=True)
        with self.assertRaises(IOError):
            p.close1(raise_on_error=True)
        self.assertFalse(p.returncode == 0)
Example #3
0
class UtilsTests(TestCase):
    def setUp(self):
        self.root = TempDir()
        self.system_args = sys.argv

    def tearDown(self):
        self.root.close()
        ITERABLE_PROGRESS.enabled = False
        ITERABLE_PROGRESS.wrapper = None
        PROCESS_PROGRESS.enabled = False
        PROCESS_PROGRESS.wrapper = None
        THREADS.update(1)
        EXECUTABLE_CACHE.reset_search_path()
        EXECUTABLE_CACHE.cache = {}

    def test_read_lines(self):
        self.assertListEqual(list(read_lines('foobar', errors=False)), [])

        path = self.root.make_file()
        with open(path, 'wt') as o:
            o.write("1\n2\n3")
        self.assertListEqual(list(read_lines(path)), ['1', '2', '3'])
        self.assertListEqual(list(read_lines(path, convert=int)), [1, 2, 3])

    def test_read_chunked(self):
        self.assertListEqual([], list(read_bytes('foobar', errors=False)))
        path = self.root.make_file()
        with open(path, 'wt') as o:
            o.write("1234567890")
        chunks = list(read_bytes(path, 3))
        self.assertListEqual([b'123', b'456', b'789', b'0'], chunks)

    def test_write_lines(self):
        linesep_len = len(os.linesep)
        path = self.root.make_file()
        self.assertEquals(3, write_lines(['foo'], path, linesep=None))
        self.assertEqual(list(read_lines(path)), ['foo'])
        path = self.root.make_file()
        self.assertEquals(
            9 + (2 * linesep_len),
            write_lines(('foo', 'bar', 'baz'), path, linesep=None))
        self.assertEqual(list(read_lines(path)), ['foo', 'bar', 'baz'])
        path = self.root.make_file()
        self.assertEquals(
            11, write_lines(('foo', 'bar', 'baz'), path, linesep='|'))
        self.assertEqual(list(read_lines(path)), ['foo|bar|baz'])
        path = self.root.make_file(permissions='r')
        self.assertEqual(-1, write_lines(['foo'], path, errors=False))

    def test_write_bytes(self):
        path = self.root.make_file()
        linesep_len = len(os.linesep)
        self.assertEquals(3, write_bytes([b'foo'], path))
        self.assertEqual(list(read_bytes(path)), [b'foo'])
        path = self.root.make_file()
        self.assertEquals(9 + (2 * linesep_len),
                          write_bytes(('foo', 'bar', 'baz'), path, sep=None))
        self.assertEqual(os.linesep.encode().join((b'foo', b'bar', b'baz')),
                         b''.join(read_bytes(path)))
        path = self.root.make_file(permissions='r')
        self.assertEqual(-1, write_bytes([b'foo'], path, errors=False))

    def test_read_dict(self):
        path = self.root.make_file()
        with open(path, 'wt') as o:
            o.write("# This is a comment\n")
            o.write("foo=1\n")
            o.write("bar=2\n")
        d = read_dict(path, convert=int, ordered=True)
        self.assertEqual(len(d), 2)
        self.assertEqual(d['foo'], 1)
        self.assertEqual(d['bar'], 2)
        self.assertEqual(list(d.items()), [('foo', 1), ('bar', 2)])

    def test_write_dict(self):
        path = self.root.make_file()
        write_dict(OrderedDict([('foo', 1), ('bar', 2)]), path, linesep=None)
        self.assertEqual(list(read_lines(path)), ['foo=1', 'bar=2'])

    def test_tsv(self):
        self.assertListEqual([], list(read_delimited('foobar', errors=False)))

        path = self.root.make_file()
        with open(path, 'wt') as o:
            o.write('a\tb\tc\n')
            o.write('1\t2\t3\n')
            o.write('4\t5\t6\n')

        with self.assertRaises(ValueError):
            list(read_delimited(path, header=False, converters='int'))
        with self.assertRaises(ValueError):
            list(
                read_delimited(path,
                               header=False,
                               converters=int,
                               row_type='dict',
                               yield_header=False))

        self.assertListEqual([['a', 'b', 'c'], [1, 2, 3], [4, 5, 6]],
                             list(
                                 read_delimited(path,
                                                header=True,
                                                converters=int)))
        self.assertListEqual([['a', 'b', 'c'], (1, 2, 3), (4, 5, 6)],
                             list(
                                 read_delimited(path,
                                                header=True,
                                                converters=int,
                                                row_type='tuple')))
        self.assertListEqual([['a', 'b', 'c'], (1, 2, 3), (4, 5, 6)],
                             list(
                                 read_delimited(path,
                                                header=True,
                                                converters=int,
                                                row_type=tuple)))
        self.assertListEqual(
            [dict(a=1, b=2, c=3), dict(a=4, b=5, c=6)],
            list(
                read_delimited(path,
                               header=True,
                               converters=int,
                               row_type='dict',
                               yield_header=False)))

    def test_tsv_dict(self):
        path = self.root.make_file()
        with open(path, 'wt') as o:
            o.write('id\ta\tb\tc\n')
            o.write('row1\t1\t2\t3\n')
            o.write('row2\t4\t5\t6\n')

        with self.assertRaises(ValueError):
            read_delimited_as_dict(path, key='id', header=False)
        with self.assertRaises(ValueError):
            read_delimited_as_dict(path, key=None, header=False)

        self.assertDictEqual(
            dict(row1=['row1', 1, 2, 3], row2=['row2', 4, 5, 6]),
            read_delimited_as_dict(path,
                                   key=0,
                                   header=True,
                                   converters=(str, int, int, int)))
        self.assertDictEqual(
            dict(row1=['row1', 1, 2, 3], row2=['row2', 4, 5, 6]),
            read_delimited_as_dict(path,
                                   key='id',
                                   header=True,
                                   converters=(str, int, int, int)))

        with open(path, 'wt') as o:
            o.write('a\tb\tc\n')
            o.write('1\t2\t3\n')
            o.write('4\t5\t6\n')

        self.assertDictEqual(
            dict(row1=[1, 2, 3], row4=[4, 5, 6]),
            read_delimited_as_dict(path,
                                   key=lambda row: 'row{}'.format(row[0]),
                                   header=True,
                                   converters=int))

    def test_tsv_dict_dups(self):
        path = self.root.make_file()
        with open(path, 'wt') as o:
            o.write('id\ta\tb\tc\n')
            o.write('row1\t1\t2\t3\n')
            o.write('row1\t4\t5\t6\n')

        with self.assertRaises(Exception):
            read_delimited_as_dict(path,
                                   key='id',
                                   header=True,
                                   converters=(str, int, int, int))

    def test_compress_file_no_dest(self):
        path = self.root.make_file()

        with self.assertRaises(ValueError):
            compress_file(path, compression=True, keep=True)

        with open(path, 'wt') as o:
            o.write('foo')
        gzfile = compress_file(path, compression='gz', keep=False)
        self.assertEqual(gzfile, path + '.gz')
        self.assertFalse(os.path.exists(path))
        self.assertTrue(os.path.exists(gzfile))
        with gzip.open(gzfile, 'rt') as i:
            self.assertEqual(i.read(), 'foo')

    def test_compress_fileobj(self):
        path = self.root.make_file()
        with open(path, 'wt') as o:
            o.write('foo')

        f = open(path, 'rb')
        try:
            gzfile = compress_file(f, compression='gz')
            self.assertEqual(gzfile, path + '.gz')
            self.assertTrue(os.path.exists(path))
            self.assertTrue(os.path.exists(gzfile))
            with gzip.open(gzfile, 'rt') as i:
                self.assertEqual(i.read(), 'foo')
        finally:
            f.close()

        gzpath = path + '.gz'
        gzfile = gzip.open(gzpath, 'w')
        try:
            self.assertEquals(gzpath,
                              compress_file(path, gzfile, compression=True))
        finally:
            gzfile.close()
        self.assertTrue(os.path.exists(path))
        self.assertTrue(os.path.exists(gzpath))
        with gzip.open(gzpath, 'rt') as i:
            self.assertEqual(i.read(), 'foo')

    def test_compress_file_no_compression(self):
        path = self.root.make_file()
        with open(path, 'wt') as o:
            o.write('foo')
        gzfile = path + '.gz'
        gzfile2 = compress_file(path, gzfile, keep=True)
        self.assertEqual(gzfile, gzfile2)
        self.assertTrue(os.path.exists(path))
        self.assertTrue(os.path.exists(gzfile))
        with gzip.open(gzfile, 'rt') as i:
            self.assertEqual(i.read(), 'foo')

    def test_decompress_file(self):
        path = self.root.make_file()
        gzfile = path + '.gz'
        with gzip.open(gzfile, 'wt') as o:
            o.write('foo')

        path2 = decompress_file(gzfile, keep=True)
        self.assertEqual(path, path2)
        self.assertTrue(os.path.exists(gzfile))
        self.assertTrue(os.path.exists(path))
        with open(path, 'rt') as i:
            self.assertEqual(i.read(), 'foo')

        with open(gzfile, 'rb') as i:
            path2 = decompress_file(i, keep=True)
            self.assertEqual(path, path2)
            self.assertTrue(os.path.exists(gzfile))
            self.assertTrue(os.path.exists(path))
            with open(path, 'rt') as i:
                self.assertEqual(i.read(), 'foo')

    def test_decompress_file_compression(self):
        path = self.root.make_file()
        gzfile = path + '.foo'
        with gzip.open(gzfile, 'wt') as o:
            o.write('foo')
        with self.assertRaises(ValueError):
            decompress_file(gzfile)
        path2 = decompress_file(gzfile, compression='gz', keep=False)
        self.assertEqual(path, path2)
        self.assertFalse(os.path.exists(gzfile))
        self.assertTrue(os.path.exists(path))
        with open(path, 'rt') as i:
            self.assertEqual(i.read(), 'foo')

    def test_transcode(self):
        path = self.root.make_file()
        gzfile = path + '.gz'
        with gzip.open(gzfile, 'wt') as o:
            o.write('foo')
        bzfile = path + '.bz2'
        transcode_file(gzfile, bzfile)
        with bz2.open(bzfile, 'rt') as i:
            self.assertEqual('foo', i.read())

    def test_exec_process(self):
        inp = self.root.make_file(suffix='.gz')
        with gzip.open(inp, 'wt') as o:
            o.write('foo')
        out = self.root.make_file(suffix='.gz')
        exec_process('cat', stdin=inp, stdout=out)
        with gzip.open(out, 'rt') as o:
            self.assertEquals('foo', o.read())

    def test_linecount(self):
        self.assertEqual(-1, linecount('foobar', errors=False))
        path = self.root.make_file()
        with open(path, 'wt') as o:
            for i in range(100):
                o.write(random_text())
                if i != 99:
                    o.write('\n')
        with self.assertRaises(ValueError):
            linecount(path, buffer_size=-1)
        with self.assertRaises(ValueError):
            linecount(path, mode='wb')
        self.assertEqual(100, linecount(path))

    def test_linecount_empty(self):
        path = self.root.make_file()
        self.assertEqual(0, linecount(path))

    def test_file_manager(self):
        paths12 = dict(path1=self.root.make_empty_files(1)[0],
                       path2=self.root.make_empty_files(1)[0])
        with FileManager(paths12, mode='wt') as f:
            paths34 = self.root.make_empty_files(2)
            for p in paths34:
                f.add(p, mode='wt')
                self.assertTrue(p in f)
                self.assertFalse(f[p].closed)
            path5 = self.root.make_file()
            path5_fh = open(path5, 'wt')
            f.add(path5_fh)
            path6 = self.root.make_file()
            f['path6'] = path6
            self.assertEqual(path6, f.get_path('path6'))
            all_paths = list(paths12.values()) + paths34 + [path5, path6]
            self.assertListEqual(all_paths, f.paths)
            self.assertEqual(len(f), 6)
            for key, fh in f.iter_files():
                self.assertFalse(fh.closed)
            self.assertIsNotNone(f['path2'])
            self.assertIsNotNone(f.get('path2'))
            self.assertEqual(f['path6'], f.get(5))
            with self.assertRaises(KeyError):
                f['foo']
            self.assertIsNone(f.get('foo'))
        self.assertEqual(len(f), 6)
        for key, fh in f.iter_files():
            self.assertTrue(fh.closed)

    def test_file_manager_dup_files(self):
        f = FileManager()
        path = self.root.make_file()
        f.add(path)
        with self.assertRaises(ValueError):
            f.add(path)

    def test_compress_on_close(self):
        path = self.root.make_file()
        compressor = CompressOnClose(compression='gz')
        with FileWrapper(path, 'wt') as wrapper:
            wrapper.register_listener('close', compressor)
            wrapper.write('foo')
        gzfile = path + '.gz'
        self.assertEqual(gzfile, compressor.compressed_path)
        self.assertTrue(os.path.exists(gzfile))
        with gzip.open(gzfile, 'rt') as i:
            self.assertEqual(i.read(), 'foo')

    def test_move_on_close(self):
        path = self.root.make_file()
        dest = self.root.make_file()
        with FileWrapper(path, 'wt') as wrapper:
            wrapper.register_listener('close', MoveOnClose(dest=dest))
            wrapper.write('foo')
        self.assertFalse(os.path.exists(path))
        self.assertTrue(os.path.exists(dest))
        with open(dest, 'rt') as i:
            self.assertEqual(i.read(), 'foo')

    def test_remove_on_close(self):
        path = self.root.make_file()
        with FileWrapper(path, 'wt') as wrapper:
            wrapper.register_listener('close', RemoveOnClose())
            wrapper.write('foo')
        self.assertFalse(os.path.exists(path))

        path = self.root.make_file()
        with FileWrapper(open(path, 'wt')) as wrapper:
            wrapper.register_listener('close', RemoveOnClose())
            wrapper.write('foo')
        self.assertFalse(os.path.exists(path))

    def test_fileinput(self):
        file1 = self.root.make_file(suffix='.gz')
        with gzip.open(file1, 'wt') as o:
            o.write('foo\nbar\n')
        with textinput(file1) as i:
            lines = list(i)
            self.assertListEqual(['foo\n', 'bar\n'], lines)
        file2 = self.root.make_file(suffix='.gz')
        with gzip.open(file2, 'wt') as o:
            o.write('baz\n')
        with textinput((file1, file2)) as i:
            lines = list(i)
            self.assertListEqual(['foo\n', 'bar\n', 'baz\n'], lines)
        with textinput([('key1', file1), ('key2', file2)]) as i:
            self.assertEqual(i.filekey, None)
            self.assertEqual(i.filename, None)
            self.assertEqual(i.lineno, 0)
            self.assertEqual(i.filelineno, 0)

            self.assertEqual(next(i), 'foo\n')
            self.assertEqual(i.filekey, 'key1')
            self.assertEqual(i.filename, file1)
            self.assertEqual(i.lineno, 1)
            self.assertEqual(i.filelineno, 1)

            self.assertEqual(next(i), 'bar\n')
            self.assertEqual(i.filekey, 'key1')
            self.assertEqual(i.filename, file1)
            self.assertEqual(i.lineno, 2)
            self.assertEqual(i.filelineno, 2)

            self.assertEqual(next(i), 'baz\n')
            self.assertEqual(i.filekey, 'key2')
            self.assertEqual(i.filename, file2)
            self.assertEqual(i.lineno, 3)
            self.assertEqual(i.filelineno, 1)

    def test_pending(self):
        file1 = self.root.make_file(suffix='.gz')
        with gzip.open(file1, 'wt') as o:
            o.write('foo\nbar\n')
        f = FileInput(char_mode=TextMode)
        self.assertTrue(f._pending)
        f.add(file1)
        list(f)
        self.assertTrue(f.finished)
        self.assertFalse(f._pending)
        file2 = self.root.make_file(suffix='.gz')
        with gzip.open(file2, 'wt') as o:
            o.write('baz\n')
        f.add(file2)
        self.assertTrue(f._pending)
        self.assertFalse(f.finished)
        self.assertEqual('baz\n', f.readline())
        self.assertEqual('', f.readline())
        with self.assertRaises(StopIteration):
            next(f)
        self.assertTrue(f.finished)
        self.assertFalse(f._pending)

    def test_fileinput_defaults(self):
        path = self.root.make_file()
        with open(path, 'wt') as o:
            o.write('foo\nbar\n')
        sys.argv = [self.system_args[0], path]
        self.assertEqual(['foo\n', 'bar\n'], list(textinput()))
        sys.argv = []
        with intercept_stdin('foo\n'):
            lines = list(textinput([STDIN]))
            self.assertEqual(1, len(lines))
            self.assertEqual('foo\n', lines[0])
        with intercept_stdin(b'foo\nbar\n', is_bytes=True):
            self.assertEqual([b'foo\n', b'bar\n'], list(byteinput()))

    def test_single_fileoutput(self):
        file1 = self.root.make_file(suffix='.gz')
        with textoutput(file1) as o:
            o.writelines(('foo', 'bar', 'baz'))
        with gzip.open(file1, 'rt') as i:
            self.assertEqual('foo\nbar\nbaz\n', i.read())

    def test_tee_fileoutput(self):
        file1 = self.root.make_file(suffix='.gz')
        file2 = self.root.make_file()
        with self.assertRaises(ValueError):
            textoutput((file1, file2), access='z')
        with textoutput((file1, file2)) as o:
            o.writelines(('foo', 'bar', 'baz'))
        with gzip.open(file1, 'rt') as i:
            self.assertEqual('foo\nbar\nbaz\n', i.read())
        with open(file2, 'rt') as i:
            self.assertEqual('foo\nbar\nbaz\n', i.read())

    def test_tee_fileoutput_binary(self):
        file1 = self.root.make_file(suffix='.gz')
        file2 = self.root.make_file()
        with byteoutput((file1, file2), file_output_type=TeeFileOutput) as o:
            o.writelines((b'foo', b'bar', b'baz'))
        with gzip.open(file1, 'rb') as i:
            self.assertEqual(b'foo\nbar\nbaz\n', i.read())
        with open(file2, 'rb') as i:
            self.assertEqual(b'foo\nbar\nbaz\n', i.read())

        with textoutput((file1, file2), file_output_type=TeeFileOutput) as o:
            o.writelines((b'foo', b'bar', b'baz'))
        with gzip.open(file1, 'rt') as i:
            self.assertEqual('foo\nbar\nbaz\n', i.read())
        with open(file2, 'rt') as i:
            self.assertEqual('foo\nbar\nbaz\n', i.read())

        with byteoutput((file1, file2), file_output_type=TeeFileOutput) as o:
            o.writelines(('foo', b'bar', b'baz'))
        with gzip.open(file1, 'rb') as i:
            self.assertEqual(b'foo\nbar\nbaz\n', i.read())
        with open(file2, 'rb') as i:
            self.assertEqual(b'foo\nbar\nbaz\n', i.read())

    def test_tee_fileoutput_no_newline(self):
        file1 = self.root.make_file(suffix='.gz')
        file2 = self.root.make_file()
        with textoutput((file1, file2)) as o:
            o.writeline('foo')
            o.writeline('bar')
            self.assertEqual(2, o.num_lines)
        with gzip.open(file1, 'rb') as i:
            self.assertEqual(b'foo\nbar\n', i.read())
        with open(file2, 'rb') as i:
            self.assertEqual(b'foo\nbar\n', i.read())

    def test_fileoutput_stdout(self):
        path = self.root.make_file()
        sys.argv = [self.system_args, path]
        with textoutput() as o:
            o.writelines(('foo', 'bar', 'baz'))
        with open(path, 'rt') as i:
            self.assertEqual('foo\nbar\nbaz\n', i.read())
        sys.argv = []
        with intercept_stdout(True) as outbuf:
            with byteoutput() as o:
                o.writelines((b'foo', b'bar', b'baz'))
            self.assertEqual(b'foo\nbar\nbaz\n', outbuf.getvalue())

    def test_cycle_fileoutput(self):
        file1 = self.root.make_file(suffix='.gz')
        file2 = self.root.make_file()
        with textoutput((file1, file2), file_output_type=CycleFileOutput) as o:
            o.writelines(('foo', 'bar', 'baz'))
        with gzip.open(file1, 'rt') as i:
            self.assertEqual('foo\nbaz\n', i.read())
        with open(file2, 'rt') as i:
            self.assertEqual('bar\n', i.read())

    def test_ncycle_fileoutput(self):
        file1 = self.root.make_file(suffix='.gz')
        file2 = self.root.make_file()
        with textoutput((file1, file2),
                        lines_per_file=2,
                        file_output_type=NCycleFileOutput) as o:
            o.writelines(('foo', 'bar', 'baz', 'blorf', 'bing'))
        with gzip.open(file1, 'rt') as i:
            self.assertEqual('foo\nbar\nbing\n', i.read())
        with open(file2, 'rt') as i:
            self.assertEqual('baz\nblorf\n', i.read())

    def test_rolling_fileoutput(self):
        path = self.root.make_file()
        with RollingFileOutput(path + '{index}.txt',
                               char_mode=TextMode,
                               linesep=os.linesep,
                               lines_per_file=3) as out:
            for i in range(6):
                out.write(str(i))
        with open(path + '0.txt', 'rt') as infile:
            self.assertEqual('0\n1\n2\n', infile.read())
        with open(path + '1.txt', 'rt') as infile:
            self.assertEqual('3\n4\n5\n', infile.read())

    def test_fileoutput_with_header(self):
        path = self.root.make_file()
        with textoutput(path + '{index}.txt',
                        file_output_type=RollingFileOutput,
                        header="number\n",
                        lines_per_file=3) as out:
            for i in range(6):
                out.write(str(i))
        with open(path + '0.txt', 'rt') as infile:
            self.assertEqual('number\n0\n1\n2\n', infile.read())
        with open(path + '1.txt', 'rt') as infile:
            self.assertEqual('number\n3\n4\n5\n', infile.read())

    def test_rolling_fileoutput_write(self):
        path = self.root.make_file()
        with textoutput(path + '{index}.txt',
                        file_output_type=RollingFileOutput,
                        lines_per_file=3) as out:
            for i in range(6):
                out.write(i, False)
            for ch in ('a', 'b', 'c'):
                out.write(ch, False)
            out.write("d\ne\nf")
        with open(path + '0.txt', 'rt') as infile:
            self.assertEqual('0\n1\n2\n', infile.read())
        with open(path + '1.txt', 'rt') as infile:
            self.assertEqual('3\n4\n5\n', infile.read())
        with open(path + '2.txt', 'rt') as infile:
            self.assertEqual('a\nb\nc\n', infile.read())
        with open(path + '3.txt', 'rt') as infile:
            self.assertEqual('d\ne\nf\n', infile.read())

    def test_pattern_file_output(self):
        path = self.root.make_file()

        def get_tokens(line):
            return dict(zip(('a', 'b'), line.split(' ')))

        with textoutput(path + '{a}.{b}.txt',
                        file_output_type=PatternFileOutput,
                        token_func=get_tokens) as out:
            for a in range(2):
                for b in range(2):
                    out.writeline('{} {}'.format(a, b))
        for a in range(2):
            for b in range(2):
                with open(path + '{}.{}.txt'.format(a, b), 'rt') as infile:
                    self.assertEqual('{} {}\n'.format(a, b), infile.read())
Example #4
0
class FileTests(TestCase):
    def setUp(self):
        self.root = TempDir()

    def tearDown(self):
        self.root.close()

    def test_invalid(self):
        with self.assertRaises(ValueError):
            get_format('gz').open_file('foo', 'n')

    def write_read_file(self, ext, use_system, mode='t', content=None):
        if content is None:
            content = random_text()  # generate 1 kb of random text
            if mode == 'b':
                content = b''.join(c.encode() for c in content)
        path = self.root.make_file(suffix=ext)
        fmt = get_format(ext)
        write_file(fmt, path, use_system, content, 'w' + mode)
        in_text = read_file(fmt, path, use_system, 'r' + mode)
        self.assertEqual(content, in_text)

    def test_write_read_bytes_python(self):
        for fmt in ('.gz', '.bz2', '.xz'):
            with self.subTest(fmt=fmt):
                self.write_read_file(fmt, False, 'b')

    def test_write_read_text_python(self):
        for fmt in ('.gz', '.bz2', '.xz'):
            with self.subTest(fmt=fmt):
                self.write_read_file(fmt, False, 't')

    # These tests will be skipped if the required system-level executables
    # are not available

    @skipIf(gz_path is None, "'gzip' not available")
    def test_system_gzip(self):
        self.write_read_file('.gz', True)

    @skipIf(gz_path is None, "'gzip' not available")
    def test_iter_system(self):
        path = self.root.make_file(suffix='.gz')
        text = 'line1\nline2\nline3'
        fmt = get_format('.gz')
        # Have to open in bytes mode, or it will get wrapped in a
        # TextBuffer, which does not use the underlying __iter__
        with fmt.open_file(path, mode='wb', ext='.gz', use_system=True) as f:
            f.write(text.encode())
        with fmt.open_file(path, mode='rb', ext='.gz', use_system=True) as f:
            lines = list(line.rstrip().decode() for line in iter(f))
        self.assertListEqual(lines, ['line1', 'line2', 'line3'])

    @skipIf(bz_path is None, "'bzip2' not available")
    def test_system_bzip(self):
        self.write_read_file('.bz2', True)

    @skipIf(xz_path is None, "'xz' not available")
    def test_system_lzma(self):
        self.write_read_file('.xz', True)

    def test_compress_path(self):
        b = (True, False) if gz_path else (False, )
        for use_system in b:
            with self.subTest(use_system=use_system):
                path = self.root.make_file()
                with open(path, 'wt') as o:
                    o.write('foo')
                fmt = get_format('.gz')
                dest = fmt.compress_file(path, use_system=use_system)
                gzfile = path + '.gz'
                self.assertEqual(dest, gzfile)
                self.assertTrue(os.path.exists(path))
                self.assertTrue(os.path.exists(gzfile))
                with gzip.open(gzfile, 'rt') as i:
                    self.assertEqual(i.read(), 'foo')

                path = self.root.make_file()
                with open(path, 'wt') as o:
                    o.write('foo')
                gzfile = path + '.bar'
                fmt = get_format('.gz')
                dest = fmt.compress_file(path,
                                         gzfile,
                                         keep=False,
                                         use_system=use_system)
                self.assertEqual(dest, gzfile)
                self.assertFalse(os.path.exists(path))
                self.assertTrue(os.path.exists(gzfile))
                with gzip.open(gzfile, 'rt') as i:
                    self.assertEqual(i.read(), 'foo')

    def test_compress_file(self):
        b = (True, False) if gz_path else (False, )
        for use_system in b:
            with self.subTest(use_system=use_system):
                path = self.root.make_file()
                with open(path, 'wt') as o:
                    o.write('foo')
                with open(path, 'rb') as i:
                    fmt = get_format('.gz')
                    dest = fmt.compress_file(i, use_system=use_system)
                gzfile = path + '.gz'
                self.assertEqual(dest, gzfile)
                self.assertTrue(os.path.exists(gzfile))
                with gzip.open(gzfile, 'rt') as i:
                    self.assertEqual(i.read(), 'foo')

                path = self.root.make_file()
                with open(path, 'wt') as o:
                    o.write('foo')
                gzfile = path + '.bar'
                with open(path, 'rb') as i:
                    fmt = get_format('.gz')
                    dest = fmt.compress_file(i,
                                             gzfile,
                                             keep=False,
                                             use_system=use_system)
                self.assertEqual(dest, gzfile)
                self.assertFalse(os.path.exists(path))
                self.assertTrue(os.path.exists(gzfile))
                with gzip.open(gzfile, 'rt') as i:
                    self.assertEqual(i.read(), 'foo')

    def test_decompress_path_error(self):
        path = self.root.make_file()
        with gzip.open(path, 'wt') as o:
            o.write('foo')
        with self.assertRaises(Exception):
            fmt = get_format('.gz')
            fmt.decompress_file(path)

    def test_decompress_path(self):
        b = (True, False) if gz_path else (False, )
        for use_system in b:
            with self.subTest(use_system=use_system):
                path = self.root.make_file()
                gzfile = path + '.gz'
                with gzip.open(gzfile, 'wt') as o:
                    o.write('foo')
                fmt = get_format('.gz')
                dest = fmt.decompress_file(gzfile, use_system=use_system)
                self.assertEqual(dest, path)
                self.assertTrue(os.path.exists(path))
                self.assertTrue(os.path.exists(gzfile))
                with open(path, 'rt') as i:
                    self.assertEqual(i.read(), 'foo')

                path = self.root.make_file()
                gzfile = path + '.gz'
                with gzip.open(gzfile, 'wt') as o:
                    o.write('foo')
                fmt = get_format('.gz')
                dest = fmt.decompress_file(gzfile,
                                           path,
                                           keep=False,
                                           use_system=use_system)
                self.assertEqual(dest, path)
                self.assertTrue(os.path.exists(path))
                self.assertFalse(os.path.exists(gzfile))
                with open(path, 'rt') as i:
                    self.assertEqual(i.read(), 'foo')

    def test_decompress_file(self):
        b = (True, False) if gz_path else (False, )
        for use_system in b:
            with self.subTest(use_system=use_system):
                path = self.root.make_file()
                gzfile = path + '.gz'
                with gzip.open(gzfile, 'wt') as o:
                    o.write('foo')
                with open(gzfile, 'rb') as i:
                    fmt = get_format('.gz')
                    dest = fmt.decompress_file(i, use_system=use_system)
                self.assertEqual(dest, path)
                self.assertTrue(os.path.exists(path))
                self.assertTrue(os.path.exists(gzfile))
                with open(path, 'rt') as i:
                    self.assertEqual(i.read(), 'foo')

                with gzip.open(gzfile, 'wt') as o:
                    o.write('foo')
                dest = self.root.make_file()
                with open(gzfile, 'rb') as i, open(dest, 'wb') as o:
                    fmt = get_format('.gz')
                    fmt.decompress_file(source=i,
                                        dest=o,
                                        use_system=use_system)
                self.assertTrue(os.path.exists(dest))
                self.assertTrue(os.path.exists(gzfile))
                with open(dest, 'rt') as i:
                    self.assertEqual(i.read(), 'foo')

                path = self.root.make_file()
                gzfile = path + '.bar'
                with gzip.open(gzfile, 'wt') as o:
                    o.write('foo')
                with open(gzfile, 'rb') as i:
                    fmt = get_format('.gz')
                    dest = fmt.decompress_file(i,
                                               path,
                                               keep=False,
                                               use_system=use_system)
                self.assertEqual(dest, path)
                self.assertFalse(os.path.exists(gzfile))
                self.assertTrue(os.path.exists(path))
                with open(path, 'rt') as i:
                    self.assertEqual(i.read(), 'foo')