Python DatasetWriter.write Examples

Programming Language: Python

Namespace/Package Name: dataset

Class/Type: DatasetWriter

Method/Function: write

Examples at hotexamples.com: 4

Python DatasetWriter.write - 4 examples found. These are the top rated real world Python examples of dataset.DatasetWriter.write extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DatasetWriter(26)

add(11)

finish(6)

set_slice(4)

write(4)

column_filename(3)

get_split_write(3)

get_split_write_dict(2)

write_dict(2)

get_split_write_list(1)

set_lines(1)

set_minmax(1)

write_list(1)

Example #1

Show file

File: a_test_datasetwriter.py Project: gridl/accelerator

def synthesis(prepare_res, params):
    dw_passed, _ = prepare_res
    # Using set_slice on a dataset that was written in analysis is not
    # actually supported, but since it currently works (as long as that
    # particular slice wasn't written in analysis) let's test it.
    dw_passed.set_slice(0)
    dw_passed.write(**{k: v[0] for k, v in test_data.data.items()})
    dw_synthesis_split = DatasetWriter(name="synthesis_split", hashlabel="a")
    dw_synthesis_split.add("a", "int32")
    dw_synthesis_split.add("b", "unicode")
    dw_synthesis_split.get_split_write()(1, "a")
    dw_synthesis_split.get_split_write_list()([2, "b"])
    dw_synthesis_split.get_split_write_dict()({"a": 3, "b": "c"})
    dw_synthesis_manual = DatasetWriter(name="synthesis_manual",
                                        columns={"sliceno": "int32"})
    dw_nonetest = DatasetWriter(name="nonetest",
                                columns={t: t
                                         for t in test_data.data})
    for sliceno in range(params.slices):
        dw_synthesis_manual.set_slice(sliceno)
        dw_synthesis_manual.write(sliceno)
        dw_nonetest.set_slice(sliceno)
        dw_nonetest.write(
            **{
                k: v[0] if k in test_data.not_none_capable else None
                for k, v in test_data.data.items()
            })

Example #2

Show file

File: a_test_datasetwriter.py Project: gridl/accelerator

def analysis(sliceno, prepare_res):
    dw_default = DatasetWriter()
    dw_named = DatasetWriter(name="named")
    dw_passed, num = prepare_res
    dw_default.write(a=sliceno, b="a")
    dw_default.write_list([num, str(sliceno)])
    dw_named.write(True, date(1536, 12, min(sliceno + 1, 31)))
    dw_named.write_dict({"c": False, "d": date(2236, 5, min(sliceno + 1, 31))})
    # slice 0 is written in synthesis
    if 0 < sliceno < test_data.value_cnt:
        dw_passed.write_dict(
            {k: v[sliceno]
             for k, v in test_data.data.items()})

Example #3

Show file

File: a_test_dataset_type_corner_cases.py Project: gridl/accelerator

def _verify(name, types, data, coltype, want, default, want_fail, kw):
    if callable(want):
        check = want
    else:

        def check(got, fromstr, filtered=False):
            want1 = want if isinstance(want, list) else want[typ]
            if filtered:
                want1 = want1[::2]
            assert got == want1, 'Expected %r, got %r from %s.' % (
                want1,
                got,
                fromstr,
            )

    dw = DatasetWriter(name=name, columns={'data': coltype, 'extra': 'bytes'})
    dw.set_slice(0)
    for ix, v in enumerate(data):
        dw.write(v, b'1' if ix % 2 == 0 else b'skip')
    for sliceno in range(1, g.SLICES):
        dw.set_slice(sliceno)
    bytes_ds = dw.finish()
    for typ in types:
        opts = dict(column2type=dict(data=typ))
        opts.update(kw)
        if default is not no_default:
            opts['defaults'] = {'data': default}
        try:
            jid = subjobs.build('dataset_type',
                                datasets=dict(source=bytes_ds),
                                options=opts)
        except JobError:
            if want_fail:
                continue
            raise Exception('Typing %r as %s failed.' % (
                bytes_ds,
                typ,
            ))
        assert not want_fail, "Typing %r as %s should have failed, but didn't (%s)." % (
            bytes_ds, typ, jid)
        typed_ds = Dataset(jid)
        got = list(typed_ds.iterate(0, 'data'))
        check(got, '%s (typed as %s from %r)' % (
            typed_ds,
            typ,
            bytes_ds,
        ))
        if 'filter_bad' not in opts and not callable(want):
            opts['filter_bad'] = True
            opts['column2type']['extra'] = 'int32_10'
            jid = subjobs.build('dataset_type',
                                datasets=dict(source=bytes_ds),
                                options=opts)
            typed_ds = Dataset(jid)
            got = list(typed_ds.iterate(0, 'data'))
            check(
                got,
                '%s (typed as %s from %r with every other line skipped from filter_bad)'
                % (
                    typed_ds,
                    typ,
                    bytes_ds,
                ), True)
        used_type(typ)

Example #4

Show file

File: a_test_dataset_type_corner_cases.py Project: gridl/accelerator

def test_filter_bad_across_types():
    columns = {
        'bytes': 'bytes',
        'float64': 'bytes',
        'int32_10': 'ascii',
        'json': 'unicode',
        'number:int': 'unicode',
        'unicode:utf-8': 'bytes',
    }
    # all_good, *values
    # Make sure all those types (except bytes) can filter other lines,
    # and be filtered by other lines. And that several filtering values
    # is not a problem (line 11).
    data = [
        (
            True,
            b'first',
            b'1.1',
            '1',
            '"a"',
            '001',
            b'ett',
        ),
        (
            True,
            b'second',
            b'2.2',
            '2',
            '"b"',
            '02',
            b'tv\xc3\xa5',
        ),
        (
            True,
            b'third',
            b'3.3',
            '3',
            '["c"]',
            '3.0',
            b'tre',
        ),
        (
            False,
            b'fourth',
            b'4.4',
            '4',
            '"d"',
            '4.4',
            b'fyra',
        ),  # number:int bad
        (
            False,
            b'fifth',
            b'5.5',
            '-',
            '"e"',
            '5',
            b'fem',
        ),  # int32_10 bad
        (
            False,
            b'sixth',
            b'6.b',
            '6',
            '"f"',
            '6',
            b'sex',
        ),  # float64 bad
        [
            False,
            b'seventh',
            b'7.7',
            '7',
            '{"g"}',
            '7',
            b'sju',
        ],  # json bad
        (
            False,
            b'eigth',
            b'8.8',
            '8',
            '"h"',
            '8',
            b'\xa5\xc3tta',
        ),  # unicode:utf-8 bad
        (
            True,
            b'ninth',
            b'9.9',
            '9',
            '"i"',
            '9',
            b'nio',
        ),
        (
            True,
            b'tenth',
            b'10',
            '10',
            '"j"',
            '10',
            b'tio',
        ),
        (
            False,
            b'eleventh',
            b'11a',
            '1-',
            '"k",',
            '1,',
            b'elva',
        ),  # float64, int32_10 and number:int bad
        (
            True,
            b'twelfth',
            b'12',
            '12',
            '"l"',
            '12',
            b'tolv',
        ),
    ]
    dw = DatasetWriter(name="filter bad across types", columns=columns)
    dw.set_slice(0)
    want = []

    def add_want(v):
        want.append((
            int(v[3]),
            v[1],
            json.loads(v[4]),
            v[6].decode('utf-8'),
        ))

    for v in data:
        if v[0]:
            add_want(v)
        dw.write(*v[1:])
    for sliceno in range(1, g.SLICES):
        dw.set_slice(sliceno)
    source_ds = dw.finish()
    # Once with just filter_bad, once with some defaults too.
    defaults = {}
    for _ in range(2):
        jid = subjobs.build(
            'dataset_type',
            datasets=dict(source=source_ds),
            options=dict(column2type={t: t
                                      for t in columns},
                         filter_bad=True,
                         defaults=defaults),
        )
        typed_ds = Dataset(jid)
        got = list(
            typed_ds.iterate(0,
                             ['int32_10', 'bytes', 'json', 'unicode:utf-8']))
        assert got == want, "Exptected %r, got %r from %s (from %r%s)" % (
            want, got, typed_ds, source_ds,
            ' with defaults' if defaults else '')
        # make more lines "ok" for the second lap
        defaults = {'number:int': '0', 'float64': '0', 'json': '"replacement"'}
        add_want(data[3])
        add_want(data[5])
        data[6][4] = '"replacement"'
        add_want(data[6])
        want.sort()  # adding them out of order, int32_10 sorts correctly.