Beispiel #1
0
def tocsv(table, source=None, encoding=None, errors='strict', write_header=True,
          **csvargs):
    """
    Write the table to a CSV file. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b', 2],
        ...           ['c', 2]]
        >>> etl.tocsv(table1, 'example.csv')
        >>> # look what it did
        ... print(open('example.csv').read())
        foo,bar
        a,1
        b,2
        c,2

    The `source` argument is the path of the delimited file, and the optional
    `write_header` argument specifies whether to include the field names in the
    delimited file.  All other keyword arguments are passed to
    :func:`csv.writer`. So, e.g., to override the delimiter from the default
    CSV dialect, provide the `delimiter` keyword argument.

    Note that if a file already exists at the given location, it will be
    overwritten.

    """

    source = write_source_from_arg(source)
    csvargs.setdefault('dialect', 'excel')
    tocsv_impl(table, source=source, encoding=encoding, errors=errors,
               write_header=write_header, **csvargs)
Beispiel #2
0
 def __iter__(self):
     protocol = self.protocol
     source = write_source_from_arg(self.source)
     with source.open_('wb') as f:
         for row in self.table:
             pickle.dump(row, f, protocol)
             yield row
Beispiel #3
0
def teehtml(
    table,
    source=None,
    encoding=None,
    errors="strict",
    caption=None,
    vrepr=text_type,
    lineterminator="\n",
    index_header=False,
    tr_style=None,
    td_styles=None,
    truncate=None,
):
    """
    Return a table that writes rows to a Unicode HTML file as they are
    iterated over.

    """

    source = write_source_from_arg(source)
    return TeeHTMLView(
        table,
        source=source,
        encoding=encoding,
        errors=errors,
        caption=caption,
        vrepr=vrepr,
        lineterminator=lineterminator,
        index_header=index_header,
        tr_style=tr_style,
        td_styles=td_styles,
        truncate=truncate,
    )
Beispiel #4
0
def totext(table, source=None, template=None, prologue=None, epilogue=None):
    """
    Write the table to a text file. E.g.::

        >>> from petl import totext, look
        >>> look(table)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | 1     |
        +-------+-------+
        | 'b'   | 2     |
        +-------+-------+
        | 'c'   | 2     |
        +-------+-------+

        >>> prologue = \"\"\"{| class="wikitable"
        ... |-
        ... ! foo
        ... ! bar
        ... \"\"\"
        >>> template = \"\"\"|-
        ... | {foo}
        ... | {bar}
        ... \"\"\"
        >>> epilogue = "|}"
        >>> totext(table, 'test.txt', template, prologue, epilogue)
        >>>
        >>> # see what we did
        ... with open('test.txt') as f:
        ...     print f.read()
        ...
        {| class="wikitable"
        |-
        ! foo
        ! bar
        |-
        | a
        | 1
        |-
        | b
        | 2
        |-
        | c
        | 2
        |}

    The `template` will be used to format each row via
    `str.format <http://docs.python.org/library/stdtypes.html#str.format>`_.

    Supports transparent writing to ``.gz`` and ``.bz2`` files.

    """

    assert template is not None, 'template is required'
    source = write_source_from_arg(source)
    with source.open_('w') as f:
        _writetext(table, f, prologue, template, epilogue)
Beispiel #5
0
def _writepickle(table, source, mode, protocol, write_header):
    source = write_source_from_arg(source)
    with source.open(mode) as f:
        it = iter(table)
        hdr = next(it)
        if write_header:
            pickle.dump(hdr, f, protocol)
        for row in it:
            pickle.dump(row, f, protocol)
Beispiel #6
0
def to_lineoriented_json(table, source):
    """
    Function to enabling PETL support for exporting line-oriented JSON.
    """
    source = write_source_from_arg(source)
    encoder = DateEncoder()
    with source.open("wb") as f:
        for d in _dicts(table):
            for chunk in encoder.iterencode(d):
                f.write(chunk)
            f.write("\n")
Beispiel #7
0
def appendtext(table, source=None, template=None, prologue=None, epilogue=None):
    """
    Append the table to a text file.

    .. versionadded:: 0.19
    """

    assert template is not None, 'template is required'
    source = write_source_from_arg(source)
    with source.open_('a') as f:
        _writetext(table, f, prologue, template, epilogue)
Beispiel #8
0
def tocsv(table, source=None, dialect=csv.excel, write_header=True, **kwargs):
    """
    Write the table to a CSV file. E.g.::

        >>> from petl import tocsv, look
        >>> look(table)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | 1     |
        +-------+-------+
        | 'b'   | 2     |
        +-------+-------+
        | 'c'   | 2     |
        +-------+-------+

        >>> tocsv(table, 'test.csv')
        >>> # look what it did
        ... from petl import fromcsv
        >>> look(fromcsv('test.csv'))
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | '1'   |
        +-------+-------+
        | 'b'   | '2'   |
        +-------+-------+
        | 'c'   | '2'   |
        +-------+-------+

    The `filename` argument is the path of the delimited file, and the optional
    `write_header` argument specifies whether to include the field names in the
    delimited file.  All other keyword arguments are passed to
    :func:`csv.writer`. So, e.g., to override the delimiter from the default
    CSV dialect, provide the `delimiter` keyword argument.

    Note that if a file already exists at the given location, it will be
    overwritten.

    Supports transparent writing to ``.gz`` and ``.bz2`` files.

    """

    source = write_source_from_arg(source)
    with source.open_('wb') as f:
        writer = csv.writer(f, dialect=dialect, **kwargs)
        # User specified no header
        if not write_header:
            for row in data(table):
                writer.writerow(row)
       # Default behavior, write the header
        else:
            for row in table:
                writer.writerow(row)
Beispiel #9
0
 def __iter__(self):
     protocol = self.protocol
     source = write_source_from_arg(self.source)
     with source.open('wb') as f:
         it = iter(self.table)
         hdr = next(it)
         if self.write_header:
             pickle.dump(hdr, f, protocol)
         yield tuple(hdr)
         for row in it:
             pickle.dump(row, f, protocol)
             yield tuple(row)
Beispiel #10
0
def teecsv(table, source=None, encoding=None, errors='strict', write_header=True,
           **csvargs):
    """
    Returns a table that writes rows to a CSV file as they are iterated over.

    """

    source = write_source_from_arg(source)
    csvargs.setdefault('dialect', 'excel')
    return teecsv_impl(table, source=source, encoding=encoding,
                       errors=errors, write_header=write_header,
                       **csvargs)
Beispiel #11
0
 def __iter__(self):
     source = write_source_from_arg(self.source)
     with source.open_('wb') as f:
         writer = csv.writer(f, dialect=self.dialect, **self.kwargs)
         # User specified no header
         if not self.write_header:
             for row in data(self.table):
                 writer.writerow(row)
                 yield row
        # Default behavior, write the header
         else:
             for row in self.table:
                 writer.writerow(row)
                 yield row
Beispiel #12
0
def appenducsv(table, source=None, dialect=csv.excel, encoding='utf-8',
               **kwargs):
    """
    Append the table to a CSV file via the given encoding. Like
    :func:`appendcsv` but accepts an additional ``encoding`` argument which
    should be one of the Python supported encodings. See also :mod:`codecs`.

    .. versionadded:: 0.19
    """
    source = write_source_from_arg(source)
    with source.open_('ab') as f:
        writer = UnicodeWriter(f, dialect=dialect, encoding=encoding, **kwargs)
        for row in data(table):
            writer.writerow(row)
Beispiel #13
0
def _writejson(source, obj, prefix, suffix, *args, **kwargs):
    encoder = JSONEncoder(*args, **kwargs)
    source = write_source_from_arg(source)
    with source.open("wb") as f:
        if PY2:
            # write directly to buffer
            _writeobj(encoder, obj, f, prefix, suffix)
        else:
            # wrap buffer for text IO
            f = io.TextIOWrapper(f, encoding="utf-8", newline="", write_through=True)
            try:
                _writeobj(encoder, obj, f, prefix, suffix)
            finally:
                f.detach()
Beispiel #14
0
 def __iter__(self):
     source = write_source_from_arg(self.source)
     prologue = self.prologue
     if prologue is not None:
         prologue = unicode(prologue)
     template = unicode(self.template)
     epilogue = self.epilogue
     if epilogue is not None:
         epilogue = unicode(epilogue)
     with source.open_('w') as f:
         f = codecs.getwriter(self.encoding)(f)
         for row in _teetext(self.table, f, prologue, template,
                             epilogue):
             yield row
Beispiel #15
0
 def __iter__(self):
     source = write_source_from_arg(self.source)
     lineterminator = self.lineterminator
     caption = self.caption
     representation = self.representation
     with source.open_('w') as f:
         it = iter(self.table)
         flds = it.next()
         _write_begin(f, flds, lineterminator, caption)
         yield flds
         for row in it:
             _write_row(f, row, lineterminator, representation)
             yield row
         _write_end(f, lineterminator)
Beispiel #16
0
def appendcsv(table, source=None, encoding=None, errors='strict',
              write_header=False, **csvargs):
    """
    Append data rows to an existing CSV file. As :func:`petl.io.csv.tocsv`
    but the file is opened in append mode and the table header is not written by
    default.

    Note that no attempt is made to check that the fields or row lengths are
    consistent with the existing data, the data rows from the table are simply
    appended to the file.

    """

    source = write_source_from_arg(source)
    csvargs.setdefault('dialect', 'excel')
    appendcsv_impl(table, source=source, encoding=encoding, errors=errors,
                   write_header=write_header, **csvargs)
Beispiel #17
0
def touhtml(table, source=None, caption=None, encoding='utf-8',
            representation=unicode, lineterminator=u'\r\n'):
    """
    Write the table as Unicode HTML to a file.

    .. versionadded:: 0.19
    """

    source = write_source_from_arg(source)
    with source.open_('w') as f:
        f = codecs.getwriter(encoding)(f)
        it = iter(table)
        flds = it.next()
        _write_begin_unicode(f, flds, lineterminator, caption)
        for row in it:
            _write_row_unicode(f, row, lineterminator, representation)
        _write_end_unicode(f, lineterminator)
Beispiel #18
0
def tojsonarrays(table, source=None, prefix=None, suffix=None,
                 output_header=False, *args, **kwargs):
    """
    Write a table in JSON format, with rows output as JSON arrays. E.g.::

        >>> from petl import tojsonarrays, look
        >>> look(table)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | 1     |
        +-------+-------+
        | 'b'   | 2     |
        +-------+-------+
        | 'c'   | 2     |
        +-------+-------+

        >>> tojsonarrays(table, 'example.json')
        >>> # check what it did
        ... with open('example.json') as f:
        ...     print f.read()
        ...
        [["a", 1], ["b", 2], ["c", 2]]

    Note that this is currently not streaming, all data is loaded into memory
    before being written to the file.

    Supports transparent writing to ``.gz`` and ``.bz2`` files.

    .. versionadded:: 0.11

    """

    encoder = JSONEncoder(*args, **kwargs)
    source = write_source_from_arg(source)
    if output_header:
        obj = list(table)
    else:
        obj = list(data(table))
    with source.open_('wb') as f:
        if prefix is not None:
            f.write(prefix)
        for chunk in encoder.iterencode(obj):
            f.write(chunk)
        if suffix is not None:
            f.write(suffix)
Beispiel #19
0
def topickle(table, source=None, protocol=-1):
    """
    Write the table to a pickle file. E.g.::

        >>> from petl import topickle, look
        >>> look(table)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | 1     |
        +-------+-------+
        | 'b'   | 2     |
        +-------+-------+
        | 'c'   | 2     |
        +-------+-------+

        >>> topickle(table, 'test.dat')
        >>> # look what it did
        ... from petl import frompickle
        >>> look(frompickle('test.dat'))
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | 1     |
        +-------+-------+
        | 'b'   | 2     |
        +-------+-------+
        | 'c'   | 2     |
        +-------+-------+

    Note that if a file already exists at the given location, it will be
    overwritten.

    The pickle file format preserves type information, i.e., reading and writing
    is round-trippable.

    Supports transparent writing to ``.gz`` and ``.bz2`` files.

    """

    source = write_source_from_arg(source)
    with source.open_('wb') as f:
        for row in table:
            pickle.dump(row, f, protocol)
Beispiel #20
0
def appendutext(table, source=None, encoding='utf-8', template=None,
                prologue=None, epilogue=None):
    """
    Append the table to a text file via the given encoding. Like
    :func:`appendtext` but accepts an additional ``encoding`` argument which
    should be one of the Python supported encodings. See also :mod:`codecs`.

    .. versionadded:: 0.19
    """

    assert template is not None, 'template is required'
    if prologue is not None:
        prologue = unicode(prologue)
    template = unicode(template)
    if epilogue is not None:
        epilogue = unicode(epilogue)
    source = write_source_from_arg(source)
    with source.open_('a') as f:
        f = codecs.getwriter(encoding)(f)
        _writetext(table, f, prologue, template, epilogue)
Beispiel #21
0
def toucsv(table, source=None, dialect=csv.excel, encoding='utf-8',
           write_header=True, **kwargs):
    """
    Write the table to a CSV file via the given encoding. Like :func:`tocsv` but
    accepts an additional ``encoding`` argument which should be one of the
    Python supported encodings. See also :mod:`codecs`.

    .. versionadded:: 0.19
    """
    source = write_source_from_arg(source)
    with source.open_('wb') as f:
        writer = UnicodeWriter(f, dialect=dialect, encoding=encoding, **kwargs)
        # User specified no header
        if not write_header:
            for row in data(table):
                writer.writerow(row)
        # Default behavior, write the header
        else:
            for row in table:
                writer.writerow(row)
Beispiel #22
0
def _iterteetext(table, source, encoding, errors, template, prologue, epilogue):

    # guard conditions
    assert template is not None, 'template is required'

    # prepare source
    source = write_source_from_arg(source)

    with source.open('wb') as buf:

        # deal with text encoding
        if PY2:
            codec = getcodec(encoding)
            f = codec.streamwriter(buf, errors=errors)
        else:
            f = io.TextIOWrapper(buf,
                                 encoding=encoding,
                                 errors=errors)

        # write the data
        try:
            if prologue is not None:
                f.write(prologue)
            it = iter(table)
            hdr = next(it)
            yield tuple(hdr)
            flds = list(map(text_type, hdr))
            for row in it:
                rec = asdict(flds, row)
                s = template.format(**rec)
                f.write(s)
                yield row
            if epilogue is not None:
                f.write(epilogue)
            f.flush()

        finally:
            if not PY2:
                f.detach()
Beispiel #23
0
def _write_toavro(table, target, mode, schema, sample,
                  codec='deflate', compression_level=None, **avro_args):
    if table is None:
        return
    # build a schema when not defined by user
    if not schema:
        schema, table2 = _build_schema_from_values(table, sample)
    else:
        table2 = _fix_missing_headers(table, schema)
    # fastavro expects a iterator of dicts
    rows = dicts(table2) if PY3 else _ordered_dict_iterator(table2)

    target2 = write_source_from_arg(target, mode=mode)
    with target2.open(mode) as target_file:
        # delay the import of fastavro for not breaking when unused
        from fastavro import parse_schema
        from fastavro.write import Writer

        parsed_schema = parse_schema(schema)
        writer = Writer(fo=target_file,
                        schema=parsed_schema,
                        codec=codec,
                        compression_level=compression_level,
                        **avro_args)
        num = 1
        for record in rows:
            try:
                writer.write(record)
                num = num + 1
            except ValueError as verr:
                vmsg = _get_error_details(target, num, verr, record, schema)
                _raise_error(ValueError, vmsg)
            except TypeError as terr:
                tmsg = _get_error_details(target, num, terr, record, schema)
                _raise_error(TypeError, tmsg)
        # finish writing
        writer.flush()
Beispiel #24
0
def tohtml(table, source=None, caption=None, representation=str,
           lineterminator='\r\n'):
    """
    Write the table as HTML to a file. E.g.::

        >>> from petl import tohtml, look
        >>> look(table)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | 1     |
        +-------+-------+
        | 'b'   | 2     |
        +-------+-------+
        | 'c'   | 2     |
        +-------+-------+

        >>> tohtml(table, 'test.html')

    .. versionadded:: 0.12

    .. versionchanged:: 0.17.1

    Added support for ``caption`` keyword argument to provide table caption
    in output.

    """

    source = write_source_from_arg(source)
    with source.open_('w') as f:
        it = iter(table)
        flds = it.next()
        _write_begin(f, flds, lineterminator, caption)
        for row in it:
            _write_row(f, row, lineterminator, representation)
        _write_end(f, lineterminator)
Beispiel #25
0
def appendxlsx(tbl, filename, sheet=None, write_header=False):
    """
    Appends rows to an existing Excel .xlsx file.
    """

    import openpyxl
    source = read_source_from_arg(filename)
    with source.open('rb') as source2:
        wb = openpyxl.load_workbook(filename=source2, read_only=False)
        if sheet is None:
            ws = wb[wb.sheetnames[0]]
        elif isinstance(sheet, int):
            ws = wb[wb.sheetnames[sheet]]
        else:
            ws = wb[str(sheet)]
        if write_header:
            rows = tbl
        else:
            rows = data(tbl)
        for row in rows:
            ws.append(row)
        target = write_source_from_arg(filename)
        with target.open('wb') as target2:
            wb.save(target2)
Beispiel #26
0
 def __iter__(self):
     source = write_source_from_arg(self.source)
     with source.open_('w') as f:
         for row in _teetext(self.table, f, self.prologue, self.template,
                             self.epilogue):
             yield row
Beispiel #27
0
def tohtml(table, source=None, encoding=None, errors=None, caption=None,
           vrepr=text_type, lineterminator='\n', index_header=False,
           tr_style=None, td_styles=None, truncate=None):
    """
    Write the table as HTML to a file. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b', 2],
        ...           ['c', 2]]
        >>> etl.tohtml(table1, 'example.html', caption='example table')
        >>> print(open('example.html').read())
        <table class='petl'>
        <caption>example table</caption>
        <thead>
        <tr>
        <th>foo</th>
        <th>bar</th>
        </tr>
        </thead>
        <tbody>
        <tr>
        <td>a</td>
        <td style='text-align: right'>1</td>
        </tr>
        <tr>
        <td>b</td>
        <td style='text-align: right'>2</td>
        </tr>
        <tr>
        <td>c</td>
        <td style='text-align: right'>2</td>
        </tr>
        </tbody>
        </table>

    The `caption` keyword argument is used to provide a table caption
    in the output HTML.

    """

    source = write_source_from_arg(source)
    with source.open('wb') as buf:

        # deal with text encoding
        if PY2:
            codec = getcodec(encoding)
            f = codec.streamwriter(buf, errors=errors)
        else:
            f = io.TextIOWrapper(buf,
                                 encoding=encoding,
                                 errors=errors,
                                 newline='')

        # write the table
        try:
            it = iter(table)

            # write header
            hdr = next(it)
            _write_begin(f, hdr, lineterminator, caption, index_header,
                         truncate)

            # write body
            if tr_style and callable(tr_style):
                # wrap as records
                it = (Record(row, hdr) for row in it)
            for row in it:
                _write_row(f, hdr, row, lineterminator, vrepr,
                           tr_style, td_styles, truncate)

            # finish up
            _write_end(f, lineterminator)
            f.flush()

        finally:
            if not PY2:
                f.detach()
Beispiel #28
0
def appendcsv(table, source=None, dialect=csv.excel, **kwargs):
    """
    Append data rows to an existing CSV file. E.g.::

        >>> # look at an existing CSV file
        ... from petl import look, fromcsv
        >>> testcsv = fromcsv('test.csv')
        >>> look(testcsv)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | '1'   |
        +-------+-------+
        | 'b'   | '2'   |
        +-------+-------+
        | 'c'   | '2'   |
        +-------+-------+

        >>> # append some data
        ... look(table)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'd'   | 7     |
        +-------+-------+
        | 'e'   | 42    |
        +-------+-------+
        | 'f'   | 12    |
        +-------+-------+

        >>> from petl import appendcsv
        >>> appendcsv(table, 'test.csv')
        >>> # look what it did
        ... look(testcsv)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | '1'   |
        +-------+-------+
        | 'b'   | '2'   |
        +-------+-------+
        | 'c'   | '2'   |
        +-------+-------+
        | 'd'   | '7'   |
        +-------+-------+
        | 'e'   | '42'  |
        +-------+-------+
        | 'f'   | '12'  |
        +-------+-------+

    The `filename` argument is the path of the delimited file, all other keyword
    arguments are passed to :func:`csv.writer`. So, e.g., to override the
    delimiter from the default CSV dialect, provide the `delimiter` keyword
    argument.

    Note that no attempt is made to check that the fields or row lengths are
    consistent with the existing data, the data rows from the table are simply
    appended to the file. See also the :func:`cat` function.

    Supports transparent writing to ``.gz`` and ``.bz2`` files.

    """

    source = write_source_from_arg(source)
    with source.open_('ab') as f:
        writer = csv.writer(f, dialect=dialect, **kwargs)
        for row in data(table):
            writer.writerow(row)
Beispiel #29
0
def toavro(table,
           target,
           schema=None,
           sample=9,
           codec='deflate',
           compression_level=None,
           **avro_args):
    """
    Write the table into a new avro file according to schema passed.

    This method assume that each column has values with the same type 
    for all rows of the source `table`.

    `Apache Avro`_ is a data
    serialization framework. It is used in data serialization (especially in
    Hadoop ecosystem), for dataexchange for databases (Redshift) and RPC 
    protocols (like in Kafka). It has libraries to support many languages and
    generally is faster and safer than text formats like Json, XML or CSV.

    The `target` argument is the file path for creating the avro file.
    Note that if a file already exists at the given location, it will be
    overwritten.

    The `schema` argument (dict) defines the rows field structure of the file.
    Check fastavro `documentation`_ and Avro schema `reference`_ for details.

    The `sample` argument (int, optional) defines how many rows are inspected
    for discovering the field types and building a schema for the avro file 
    when the `schema` argument is not passed.

    The `codec` argument (string, optional) sets the compression codec used to
    shrink data in the file. It can be 'null', 'deflate' (default), 'bzip2' or
    'snappy', 'zstandard', 'lz4', 'xz' (if installed)

    The `compression_level` argument (int, optional) sets the level of 
    compression to use with the specified codec (if the codec supports it)

    Additionally there are support for passing extra options in the 
    argument `**avro_args` that are fowarded directly to fastavro. Check the
    fastavro `documentation`_ for reference.

    The avro file format preserves type information, i.e., reading and writing
    is round-trippable for tables with non-string data values. However the
    conversion from Python value types to avro fields is not perfect. Use the
    `schema` argument to define proper type to the conversion.

    The following avro types are supported by the schema: null, boolean, 
    string, int, long, float, double, bytes, fixed, enum, 
    :ref:`array <array_schema>`, map, union, record, and recursive types 
    defined in :ref:`complex schemas <complex_schema>`.

    Also :ref:`logical types <logical_schema>` are supported and translated to 
    coresponding python types: long timestamp-millis, long timestamp-micros, int date, 
    bytes decimal, fixed decimal, string uuid, int time-millis, long time-micros.

    Example usage for writing files::

        >>> # set up a Avro file to demonstrate with
        >>> table2 = [['name', 'friends', 'age'],
        ...           ['Bob', 42, 33],
        ...           ['Jim', 13, 69],
        ...           ['Joe', 86, 17],
        ...           ['Ted', 23, 51]]
        ...
        >>> schema2 = {
        ...     'doc': 'Some people records.',
        ...     'name': 'People',
        ...     'namespace': 'test',
        ...     'type': 'record',
        ...     'fields': [
        ...         {'name': 'name', 'type': 'string'},
        ...         {'name': 'friends', 'type': 'int'},
        ...         {'name': 'age', 'type': 'int'},
        ...     ]
        ... }
        ...
        >>> # now demonstrate what writing with toavro()
        >>> import petl as etl
        >>> etl.toavro(table2, 'example-file-to-write.avro', schema=schema2)
        ...
        >>> # this was what was saved above
        >>> tbl2 = etl.fromavro('example-file-to-write.avro')
        >>> tbl2
        +-------+---------+-----+
        | name  | friends | age |
        +=======+=========+=====+
        | 'Bob' |      42 |  33 |
        +-------+---------+-----+
        | 'Jim' |      13 |  69 |
        +-------+---------+-----+
        | 'Joe' |      86 |  17 |
        +-------+---------+-----+
        | 'Ted' |      23 |  51 |
        +-------+---------+-----+

    .. versionadded:: 1.4.0

    .. _Apache Avro: https://avro.apache.org/docs/current/spec.html
    .. _reference: https://avro.apache.org/docs/current/spec.html#schemas
    .. _documentation : https://fastavro.readthedocs.io/en/latest/writer.html

    """
    target2 = write_source_from_arg(target)
    _write_toavro(table,
                  target=target2,
                  mode='wb',
                  schema=schema,
                  sample=sample,
                  codec=codec,
                  compression_level=compression_level,
                  **avro_args)
Beispiel #30
0
def appendcsv(table, source=None, dialect=csv.excel, **kwargs):
    """
    Append data rows to an existing CSV file. E.g.::

        >>> # look at an existing CSV file
        ... from petl import look, fromcsv
        >>> testcsv = fromcsv('test.csv')
        >>> look(testcsv)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | '1'   |
        +-------+-------+
        | 'b'   | '2'   |
        +-------+-------+
        | 'c'   | '2'   |
        +-------+-------+

        >>> # append some data
        ... look(table)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'd'   | 7     |
        +-------+-------+
        | 'e'   | 42    |
        +-------+-------+
        | 'f'   | 12    |
        +-------+-------+

        >>> from petl import appendcsv
        >>> appendcsv(table, 'test.csv')
        >>> # look what it did
        ... look(testcsv)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | '1'   |
        +-------+-------+
        | 'b'   | '2'   |
        +-------+-------+
        | 'c'   | '2'   |
        +-------+-------+
        | 'd'   | '7'   |
        +-------+-------+
        | 'e'   | '42'  |
        +-------+-------+
        | 'f'   | '12'  |
        +-------+-------+

    The `filename` argument is the path of the delimited file, all other keyword
    arguments are passed to :func:`csv.writer`. So, e.g., to override the
    delimiter from the default CSV dialect, provide the `delimiter` keyword
    argument.

    Note that no attempt is made to check that the fields or row lengths are
    consistent with the existing data, the data rows from the table are simply
    appended to the file. See also the :func:`cat` function.

    Supports transparent writing to ``.gz`` and ``.bz2`` files.

    """

    source = write_source_from_arg(source)
    with source.open_('ab') as f:
        writer = csv.writer(f, dialect=dialect, **kwargs)
        for row in data(table):
            writer.writerow(row)
Beispiel #31
0
def appendpickle(table, source=None, protocol=-1):
    """
    Append data to an existing pickle file. E.g.::

        >>> from petl import look, frompickle
        >>> # inspect an existing pickle file
        ... testdat = frompickle('test.dat')
        >>> look(testdat)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | 1     |
        +-------+-------+
        | 'b'   | 2     |
        +-------+-------+
        | 'c'   | 2     |
        +-------+-------+

        >>> # append some data
        ... from petl import appendpickle
        >>> look(table)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'd'   | 7     |
        +-------+-------+
        | 'e'   | 42    |
        +-------+-------+
        | 'f'   | 12    |
        +-------+-------+

        >>> appendpickle(table, 'test.dat')
        >>> # look what it did
        ... look(testdat)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'a'   | 1     |
        +-------+-------+
        | 'b'   | 2     |
        +-------+-------+
        | 'c'   | 2     |
        +-------+-------+
        | 'd'   | 7     |
        +-------+-------+
        | 'e'   | 42    |
        +-------+-------+
        | 'f'   | 12    |
        +-------+-------+

    Note that no attempt is made to check that the fields or row lengths are
    consistent with the existing data, the data rows from the table are simply
    appended to the file. See also the :func:`cat` function.

    Supports transparent writing to ``.gz`` and ``.bz2`` files.

    """

    source = write_source_from_arg(source)
    with source.open_('ab') as f:
        for row in data(table):
            pickle.dump(row, f, protocol)