Python Stream Examples

Programming Language: Python

Namespace/Package Name: tabulator

Class/Type: Stream

Examples at hotexamples.com: 39

The Python tabulator.Stream module is a part of the Tabulator library that provides functionality for working with tabular data streams. It offers various methods and classes to read, write, and manipulate tabular data in formats such as CSV, JSON, Excel, and HTML. This module enables developers to easily process and analyze data from different sources and perform operations like filtering, sorting, and aggregating on tabular datasets. The tabulator.Stream module is a versatile tool for working with structured data in Python.

Python Stream - 39 examples found. These are the top rated real world Python examples of tabulator.Stream extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Stream(30)

open(30)

close(15)

read(7)

iter(6)

save(2)

append(1)

pop(1)

reset(1)

Example #1

Show file

File: table.py Project: akariv/jsontableschema-py

    def __init__(self, source, schema=None, strict=False,
                 post_cast=[], storage=None, **options):
        """https://github.com/frictionlessdata/tableschema-py#schema
        """

        # Set attributes
        self.__source = source
        self.__stream = None
        self.__schema = None
        self.__headers = None
        self.__storage = None
        self.__post_cast = copy(post_cast)

        # Schema
        if schema is not None:
            self.__schema = Schema(schema)

        # Stream (tabulator)
        if storage is None:
            options.setdefault('headers', 1)
            self.__stream = Stream(source,  **options)

        # Stream (storage)
        else:
            if not isinstance(storage, Storage):
                storage = Storage.connect(storage, **options)
            if self.__schema:
                storage.describe(source, self.__schema.descriptor)
            headers = Schema(storage.describe(source)).field_names
            self.__stream = Stream(partial(storage.iter, source), headers=headers)
            self.__storage = storage

Example #2

Show file

def test_stream_skip_rows_with_headers_example_from_readme():
    source = [['#comment'], ['name', 'order'], ['John', 1], ['Alex', 2]]
    with Stream(source, headers=1, skip_rows=['#']) as stream:
        assert stream.headers == ['name', 'order']
        assert stream.read() == [['John', 1], ['Alex', 2]]

Example #3

Show file

def test_stream_skip_rows_excel_empty_column():
    source = 'data/special/skip-rows.xlsx'
    with Stream(source, headers=1, skip_rows=['']) as stream:
        assert stream.read() == [['A', 'B'], [8, 9]]

Example #4

Show file

def test_stream_bytes_sample_size():
    source = 'data/special/latin1.csv'
    with Stream(source) as stream:
        assert stream.encoding == 'cp1252'
    with Stream(source, sample_size=0, bytes_sample_size=10) as stream:
        assert stream.encoding == 'utf-8'

Example #5

Show file

def test_stream_html_content_with_allow_html():
    # Link to html file containing information about csv file
    source = 'https://github.com/frictionlessdata/tabulator-py/blob/master/data/table.csv'
    with Stream(source, allow_html=True) as stream:
        assert stream

Example #6

Show file

def test_stream_encoding_utf_16():
    # Bytes encoded as UTF-16 with BOM in platform order is detected
    bio = io.BytesIO(u'en,English\nja,日本語'.encode('utf-16'))
    with Stream(bio, format='csv') as stream:
        assert stream.encoding == 'utf-16'
        assert stream.read() == [[u'en', u'English'], [u'ja', u'日本語']]

Example #7

Show file

File: test_stream.py Project: frictionlessdata/tabulator-py

def test_stream_http_error():
    stream = Stream('http://github.com/bad_path.csv')
    with pytest.raises(exceptions.HTTPError) as excinfo:
        stream.open()

Example #8

Show file

def test_stream_bad_options_warning():
    Stream('', scheme='text', format='csv', bad_option=True).open()
    with pytest.warns(UserWarning) as record:
        Stream('', scheme='text', format='csv', bad_option=True).open()
    assert 'bad_option' in str(record[0].message.args[0])

Example #9

Show file

File: test_stream.py Project: frictionlessdata/tabulator-py

def test_stream_format_error():
    stream = Stream('', format='bad_format')
    with pytest.raises(exceptions.FormatError) as excinfo:
        stream.open()
    assert 'bad_format' in str(excinfo.value)

Example #10

Show file

File: test_stream.py Project: frictionlessdata/tabulator-py

def test_stream_io_error():
    stream = Stream('bad_path.csv')
    with pytest.raises(exceptions.IOError) as excinfo:
        stream.open()
    assert 'bad_path.csv' in str(excinfo.value)

Example #11

Show file

File: test_stream.py Project: frictionlessdata/tabulator-py

def test_stream_scheme_error():
    stream = Stream('', scheme='bad_scheme')
    with pytest.raises(exceptions.SchemeError) as excinfo:
        stream.open()
    assert 'bad_scheme' in str(excinfo.value)

Example #12

Show file

File: test_stream.py Project: frictionlessdata/tabulator-py

def test_stream_format_error_html():
    stream = Stream('data/special/table.csv.html', format='csv')
    with pytest.raises(exceptions.FormatError) as excinfo:
        stream.open()

Example #13

Show file

File: test_stream.py Project: frictionlessdata/tabulator-py

def test_stream_source_error_data():
    stream = Stream('[1,2]', scheme='text', format='json')
    with pytest.raises(exceptions.SourceError) as excinfo:
        stream.open()
        stream.read()

Example #14

Show file

def test_stream_source_error_data():
    stream = Stream('[1,2]', scheme='text', format='json')
    with pytest.raises(exceptions.SourceError) as excinfo:
        stream.open()
        stream.read()

Example #15

Show file

File: test_stream.py Project: frictionlessdata/tabulator-py

def test_stream_gsheet_bad_url():
    stream = Stream('https://docs.google.com/spreadsheets/d/bad')
    with pytest.raises(exceptions.HTTPError) as excinfo:
        stream.open()

Example #16

Show file

def test_stream_scheme_error():
    stream = Stream('', scheme='bad_scheme')
    with pytest.raises(exceptions.SchemeError) as excinfo:
        stream.open()
    assert 'bad_scheme' in str(excinfo.value)

Example #17

Show file

def test_stream_compression_error_gz():
    source = 'id,filename\n\1,dump.tar.gz'
    stream = Stream(source, scheme='text', format='csv')
    stream.open()

Example #18

Show file

def test_stream_http_error():
    stream = Stream('http://github.com/bad_path.csv')
    with pytest.raises(exceptions.HTTPError) as excinfo:
        stream.open()

Example #19

Show file

def test_stream_compression_error_zip():
    source = 'id,filename\n1,archive.zip'
    stream = Stream(source, scheme='text', format='csv')
    stream.open()

Example #20

Show file

def test_stream_encoding_explicit_latin1():
    with Stream('data/special/latin1.csv', encoding='latin1') as stream:
        assert stream.encoding == 'iso8859-1'
        assert stream.read() == [['id', 'name'], ['1', 'english'], ['2', '©']]

Example #21

Show file

def test_stream_scheme_file():
    with Stream('data/table.csv') as stream:
        assert stream.scheme == 'file'

Example #22

Show file

def test_stream_html_content():
    # Link to html file containing information about csv file
    source = 'https://github.com/frictionlessdata/tabulator-py/blob/master/data/table.csv'
    with pytest.raises(exceptions.FormatError) as excinfo:
        Stream(source).open()
    assert 'HTML' in str(excinfo.value)

Example #23

Show file

def test_stream_scheme_https():
    with Stream(BASE_URL % 'data/table.csv') as stream:
        assert stream.scheme == 'https'

Example #24

Show file

def test_stream_sample():
    source = [['id', 'name'], ['1', 'english'], ['2', '中国人']]
    with Stream(source, headers=1) as stream:
        assert stream.headers == ['id', 'name']
        assert stream.sample == [['1', 'english'], ['2', '中国人']]

Example #25

Show file

def test_stream_scheme_stream():
    with Stream(io.open('data/table.csv', mode='rb'), format='csv') as stream:
        assert stream.scheme == 'stream'

Example #26

Show file

def test_stream_skip_rows():
    source = 'data/special/skip-rows.csv'
    with Stream(source, skip_rows=['#', 5]) as stream:
        assert stream.read() == [['id', 'name'], ['1', 'english']]

Example #27

Show file

def test_stream_scheme_text():
    with Stream('text://a\nb', format='csv') as stream:
        assert stream.scheme == 'text'

Example #28

Show file

def test_stream_skip_rows_with_headers():
    source = 'data/special/skip-rows.csv'
    with Stream(source, headers=1, skip_rows=['#']) as stream:
        assert stream.headers == ['id', 'name']
        assert stream.read() == [['1', 'english'], ['2', '中国人']]

Example #29

Show file

def test_stream_format_ndjson():
    with Stream('data/table.ndjson') as stream:
        assert stream.format == 'ndjson'

Example #30

Show file

def test_stream_json_property():
    source = '{"root": [["value1", "value2"], ["value3", "value4"]]}'
    with Stream(source, scheme='text', format='json',
                property='root') as stream:
        assert stream.read() == [['value1', 'value2'], ['value3', 'value4']]

Example #31

Show file

def test_stream_format_ods():
    with Stream('data/table.ods') as stream:
        assert stream.format == 'ods'

Example #32

Show file

def test_stream_format_error_html():
    stream = Stream('data/special/table.csv.html', format='csv')
    with pytest.raises(exceptions.FormatError) as excinfo:
        stream.open()

Example #33

Show file

def test_stream_format_tsv():
    with Stream('data/table.tsv') as stream:
        assert stream.format == 'tsv'

Example #34

Show file

def test_stream_format_error():
    stream = Stream('', format='bad_format')
    with pytest.raises(exceptions.FormatError) as excinfo:
        stream.open()
    assert 'bad_format' in str(excinfo.value)

Example #35

Show file

def test_stream_format_xlsx():
    with Stream('data/table.xlsx') as stream:
        assert stream.format == 'xlsx'

Example #36

Show file

def test_stream_io_error():
    stream = Stream('bad_path.csv')
    with pytest.raises(exceptions.IOError) as excinfo:
        stream.open()
    assert 'bad_path.csv' in str(excinfo.value)

Example #37

Show file

def test_stream_encoding_explicit_utf8():
    with Stream('data/table.csv', encoding='utf-8') as stream:
        assert stream.encoding == 'utf-8'
        assert stream.read() == [['id', 'name'], ['1', 'english'],
                                 ['2', '中国人']]

Example #38

Show file

def test_stream_read_closed():
    stream = Stream('data/table.csv')
    with pytest.raises(exceptions.TabulatorException) as excinfo:
        stream.read()
    assert 'stream.open()' in str(excinfo.value)

Example #39

Show file

File: table.py Project: akariv/jsontableschema-py

class Table(object):

    # Public

    def __init__(self, source, schema=None, strict=False,
                 post_cast=[], storage=None, **options):
        """https://github.com/frictionlessdata/tableschema-py#schema
        """

        # Set attributes
        self.__source = source
        self.__stream = None
        self.__schema = None
        self.__headers = None
        self.__storage = None
        self.__post_cast = copy(post_cast)

        # Schema
        if schema is not None:
            self.__schema = Schema(schema)

        # Stream (tabulator)
        if storage is None:
            options.setdefault('headers', 1)
            self.__stream = Stream(source,  **options)

        # Stream (storage)
        else:
            if not isinstance(storage, Storage):
                storage = Storage.connect(storage, **options)
            if self.__schema:
                storage.describe(source, self.__schema.descriptor)
            headers = Schema(storage.describe(source)).field_names
            self.__stream = Stream(partial(storage.iter, source), headers=headers)
            self.__storage = storage

    @property
    def headers(self):
        """https://github.com/frictionlessdata/tableschema-py#schema
        """
        return self.__headers

    @property
    def schema(self):
        """https://github.com/frictionlessdata/tableschema-py#schema
        """
        return self.__schema

    def iter(self, keyed=False, extended=False, cast=True, relations=False):
        """https://github.com/frictionlessdata/tableschema-py#schema
        """

        # Prepare unique checks
        if cast:
            unique_fields_cache = {}
            if self.schema:
                unique_fields_cache = _create_unique_fields_cache(self.schema)

        # Open/iterate stream
        self.__stream.open()
        iterator = self.__stream.iter(extended=True)
        iterator = self.__apply_processors(iterator, cast=cast)
        for row_number, headers, row in iterator:

            # Get headers
            if not self.__headers:
                self.__headers = headers

            # Check headers
            if cast:
                if self.schema and self.headers:
                    if self.headers != self.schema.field_names:
                        self.__stream.close()
                        message = 'Table headers don\'t match schema field names'
                        raise exceptions.CastError(message)

            # Check unique
            if cast:
                for indexes, cache in unique_fields_cache.items():
                    values = tuple(value for i, value in enumerate(row) if i in indexes)
                    if not all(map(lambda value: value is None, values)):
                        if values in cache['data']:
                            self.__stream.close()
                            message = 'Field(s) "%s" duplicates in row "%s"'
                            message = message % (cache['name'], row_number)
                            raise exceptions.CastError(message)
                        cache['data'].add(values)

            # Resolve relations
            if relations:
                if self.schema:
                    for foreign_key in self.schema.foreign_keys:
                        row = _resolve_relations(row, headers, relations, foreign_key)
                        if row is None:
                            self.__stream.close()
                            message = 'Foreign key "%s" violation in row "%s"'
                            message = message % (foreign_key['fields'], row_number)
                            raise exceptions.RelationError(message)

            # Form row
            if extended:
                yield (row_number, headers, row)
            elif keyed:
                yield dict(zip(headers, row))
            else:
                yield row

        # Close stream
        self.__stream.close()

    def read(self, keyed=False, extended=False, cast=True, relations=False, limit=None):
        """https://github.com/frictionlessdata/tableschema-py#schema
        """
        result = []
        rows = self.iter(keyed=keyed, extended=extended, cast=cast, relations=relations)
        for count, row in enumerate(rows, start=1):
            result.append(row)
            if count == limit:
                break
        return result

    def infer(self, limit=100):
        """https://github.com/frictionlessdata/tableschema-py#schema
        """
        if self.__schema is None or self.__headers is None:

            # Infer (tabulator)
            if not self.__storage:
                with self.__stream as stream:
                    if self.__schema is None:
                        self.__schema = Schema()
                        self.__schema.infer(stream.sample[:limit], headers=stream.headers)
                    if self.__headers is None:
                        self.__headers = stream.headers

            # Infer (storage)
            else:
                descriptor = self.__storage.describe(self.__source)
                if self.__schema is None:
                    self.__schema = Schema(descriptor)
                if self.__headers is None:
                    self.__headers = self.__schema.field_names

        return self.__schema.descriptor

    def save(self, target, storage=None, **options):
        """https://github.com/frictionlessdata/tableschema-py#schema
        """

        # Save (tabulator)
        if storage is None:
            with Stream(self.iter, headers=self.__schema.headers) as stream:
                stream.save(target, **options)
            return True

        # Save (storage)
        else:
            if not isinstance(storage, Storage):
                storage = Storage.connect(storage, **options)
            storage.create(target, self.__schema.descriptor, force=True)
            storage.write(target, self.iter(cast=False))
            return storage

    # Private

    def __apply_processors(self, iterator, cast=True):

        # Apply processors to iterator
        def builtin_processor(extended_rows):
            for row_number, headers, row in extended_rows:
                if self.__schema and cast:
                    row = self.__schema.cast_row(row)
                yield (row_number, headers, row)
        processors = [builtin_processor] + self.__post_cast
        for processor in processors:
            iterator = processor(iterator)

        return iterator