Python DatasetWriter.add Examples

Programming Language: Python

Namespace/Package Name: dataset

Class/Type: DatasetWriter

Method/Function: add

Examples at hotexamples.com: 11

Python DatasetWriter.add - 11 examples found. These are the top rated real world Python examples of dataset.DatasetWriter.add extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DatasetWriter(26)

add(11)

finish(6)

set_slice(4)

write(4)

column_filename(3)

get_split_write(3)

get_split_write_dict(2)

write_dict(2)

get_split_write_list(1)

set_lines(1)

set_minmax(1)

write_list(1)

Example #1

Show file

def prepare(params):
    d = datasets.source
    caption = options.caption % dict(caption=d.caption,
                                     hashlabel=options.hashlabel)
    prev_p = job_params(datasets.previous, default_empty=True)
    prev_source = prev_p.datasets.source
    if len(d.chain(stop_jobid=prev_source, length=options.length)) == 1:
        filename = d.filename
    else:
        filename = None
    dws = []
    previous = datasets.previous
    for sliceno in range(params.slices):
        if options.as_chain and sliceno == params.slices - 1:
            name = "default"
        else:
            name = str(sliceno)
        dw = DatasetWriter(
            caption="%s (slice %d)" % (caption, sliceno),
            hashlabel=options.hashlabel,
            filename=filename,
            previous=previous,
            name=name,
            for_single_slice=sliceno,
        )
        previous = (params.jobid, name)
        dws.append(dw)
    names = []
    for n, c in d.columns.items():
        # names has to be in the same order as the add calls
        # so the iterator returns the same order the writer expects.
        names.append(n)
        for dw in dws:
            dw.add(n, c.type)
    return dws, names, prev_source, caption, filename

Example #2

Show file

File: a_test_dataset_column_names.py Project: gridl/accelerator

def synthesis(params):
	dw = DatasetWriter(name="parent")
	in_parent = [ # list because order matters
		"-",      # becomes _ because everything must be a valid python identifier.
		"a b",    # becomes a_b because everything must be a valid python identifier.
		"42",     # becomes _42 because everything must be a valid python identifier.
		"print",  # becomes print_ because print is a keyword (in py2).
		"print@", # becomes print__ because print_ is taken.
		"None",   # becomes None_ because None is a keyword (in py3).
	]
	for colname in in_parent:
		dw.add(colname, "unicode")
	w = dw.get_split_write()
	w(_="- 1", a_b="a b 1", _42="42 1", print_="print 1", None_="None 1", print__="Will be overwritten 1")
	w(_="- 2", a_b="a b 2", _42="42 2", print_="print 2", None_="None 2", print__="Will be overwritten 2")
	parent = dw.finish()
	dw = DatasetWriter(name="child", parent=parent)
	in_child = [ # order still matters
		"print_*", # becomes print___ because print__ is taken.
		"print_",  # becomes print____ because all shorter are taken.
		"normal",  # no collision.
		"Normal",  # no collision.
		"print@",  # re-uses print__ from the parent dataset.
	]
	for colname in in_child:
		dw.add(colname, "unicode")
	w = dw.get_split_write()
	w(print__="print@ 1", print___="print_* 1", print____="print_ 1", normal="normal 1", Normal="Normal 1")
	w(print__="print@ 2", print___="print_* 2", print____="print_ 2", normal="normal 2", Normal="Normal 2")
	child = dw.finish()
	for colname in in_parent + in_child:
		data = set(child.iterate(None, colname))
		assert data == {colname + " 1", colname + " 2"}, "Bad data for %s: %r" % (colname, data)

Example #3

Show file

File: a_test_datasetwriter.py Project: gridl/accelerator

def synthesis(prepare_res, params):
    dw_passed, _ = prepare_res
    # Using set_slice on a dataset that was written in analysis is not
    # actually supported, but since it currently works (as long as that
    # particular slice wasn't written in analysis) let's test it.
    dw_passed.set_slice(0)
    dw_passed.write(**{k: v[0] for k, v in test_data.data.items()})
    dw_synthesis_split = DatasetWriter(name="synthesis_split", hashlabel="a")
    dw_synthesis_split.add("a", "int32")
    dw_synthesis_split.add("b", "unicode")
    dw_synthesis_split.get_split_write()(1, "a")
    dw_synthesis_split.get_split_write_list()([2, "b"])
    dw_synthesis_split.get_split_write_dict()({"a": 3, "b": "c"})
    dw_synthesis_manual = DatasetWriter(name="synthesis_manual",
                                        columns={"sliceno": "int32"})
    dw_nonetest = DatasetWriter(name="nonetest",
                                columns={t: t
                                         for t in test_data.data})
    for sliceno in range(params.slices):
        dw_synthesis_manual.set_slice(sliceno)
        dw_synthesis_manual.write(sliceno)
        dw_nonetest.set_slice(sliceno)
        dw_nonetest.write(
            **{
                k: v[0] if k in test_data.not_none_capable else None
                for k, v in test_data.data.items()
            })

Example #4

Show file

File: a_test_datasetwriter.py Project: gridl/accelerator

def prepare(params):
    assert params.slices >= test_data.value_cnt
    dw_default = DatasetWriter()
    dw_default.add("a", "number")
    dw_default.add("b", "ascii")
    DatasetWriter(name="named", columns={"c": "bool", "d": "date"})
    dw_passed = DatasetWriter(name="passed",
                              columns={t: t
                                       for t in test_data.data})
    return dw_passed, 42

Example #5

Show file

File: a_test_rechain.py Project: gridl/accelerator

def synthesis(jobid):
    manual_chain = [Dataset(jobids.selfchain, name) for name in "abcdefgh"]
    manual_abf = [manual_chain[0], manual_chain[1], manual_chain[5]]
    # build a local abf chain
    prev = None
    for ix, ds in enumerate(manual_abf):
        name = "abf%d" % (ix, )
        ds.link_to_here(name, override_previous=prev)
        prev = (
            jobid,
            name,
        )
    manual_abf_data = list(Dataset.iterate_list(None, None, manual_abf))
    local_abf_data = list(Dataset(jobid, "abf2").iterate_chain(None, None))
    assert manual_abf_data == local_abf_data
    # disconnect h, verify there is no chain
    manual_chain[-1].link_to_here("alone", override_previous=None)
    assert len(Dataset(jobid, "alone").chain()) == 1
    # check that the original chain is unhurt
    assert manual_chain == manual_chain[-1].chain()

    # So far so good, now make a chain long enough to have a cache.
    prev = None
    ix = 0
    going = True
    while going:
        if prev and "cache" in Dataset(prev)._data:
            going = False
        name = "longchain%d" % (ix, )
        dw = DatasetWriter(name=name, previous=prev)
        dw.add("ix", "number")
        dw.get_split_write()(ix)
        dw.finish()
        prev = (
            jobid,
            name,
        )
        ix += 1
    # we now have a chain that goes one past the first cache point
    full_chain = Dataset(prev).chain()
    assert "cache" in full_chain[
        -2]._data  # just to check the above logic is correct
    assert "cache" not in full_chain[-1]._data  # just to be sure..
    full_chain[-2].link_to_here("nocache", override_previous=None)
    full_chain[-1].link_to_here("withcache", override_previous=full_chain[-3])
    assert "cache" not in Dataset(jobid, "nocache")._data
    assert "cache" in Dataset(jobid, "withcache")._data
    # And make sure they both get the right data too.
    assert list(Dataset(prev).iterate_chain(None, "ix")) == list(range(ix))
    assert list(Dataset(jobid, "nocache").iterate_chain(None,
                                                        "ix")) == [ix - 2]
    assert list(Dataset(jobid, "withcache").iterate_chain(
        None, "ix")) == list(range(ix - 2)) + [ix - 1]

Example #6

Show file

File: a_test_selfchain.py Project: gridl/accelerator

def prepare(params):
    dws = {}
    prev = None
    for name in "abcdefgh":
        dw = DatasetWriter(name=name, previous=prev)
        dw.add("ds", "ascii")
        dw.add("num", "number")
        dws[name] = dw
        prev = "%s/%s" % (
            params.jobid,
            name,
        )
    return dws

Example #7

Show file

File: a_example_perf_gendata.py Project: isabella232/accelerator-project_skeleton

def prepare():
	from dataset import DatasetWriter
	# previous allows chaining this method, should you wish to do so
	dw = DatasetWriter(previous=datasets.previous)
	dw.add('a string', 'ascii')  # ascii is not "any string", use 'unicode' for that
	dw.add('large number', 'number') # number is any (real) number, a float or int of any size
	dw.add('small number', 'number')
	dw.add('small integer', 'int32') # int32 is a signed 32 bit number
	dw.add('gauss number', 'number')
	dw.add('gauss float', 'float64') # float64 is what many other languages call double
	return dw

Example #8

Show file

def prepare(params):
    assert params.slices >= 2, "Hashing won't do anything with just one slice"
    dws = DotDict()
    for name, hashlabel in (
        ("unhashed_manual", None),  # manually interlaved
        ("unhashed_split", None),  # split_write interlaved
        ("up_checked", "up"),  # hashed on up using dw.hashcheck
        ("up_split", "up"),  # hashed on up using split_write
        ("down_checked", "down"),  # hashed on down using dw.hashcheck
        ("down_discarded", "down"),  # hashed on down using discarding writes
        ("down_discarded_list",
         "down"),  # hashed on down using discarding list writes
        ("down_discarded_dict",
         "down"),  # hashed on down using discarding dict writes
    ):
        dw = DatasetWriter(name=name, hashlabel=hashlabel)
        dw.add("up", "int32")
        dw.add("down", "int32")
        dws[name] = dw
    return dws

Example #9

Show file

File: a_test_sort_stability.py Project: gridl/accelerator

def prepare():
    dw = DatasetWriter()
    dw.add("str", "ascii")
    dw.add("num", "number")
    return dw

Example #10

Show file

File: a_example1_add_column.py Project: sebras/berkeman-accelerator-project_skeleton

def prepare():
    dw = DatasetWriter(parent=datasets.source)
    dw.add('prod', 'number')  # works for float as well as int
    return dw

Example #11

Show file

def prepare():
    dw = DatasetWriter(previous=datasets.previous)
    dw.add('rflt', 'float64')
    dw.add('rint', 'int64')
    return dw