def synthesis(params): dw = DatasetWriter(name="parent") in_parent = [ # list because order matters "-", # becomes _ because everything must be a valid python identifier. "a b", # becomes a_b because everything must be a valid python identifier. "42", # becomes _42 because everything must be a valid python identifier. "print", # becomes print_ because print is a keyword (in py2). "print@", # becomes print__ because print_ is taken. "None", # becomes None_ because None is a keyword (in py3). ] for colname in in_parent: dw.add(colname, "unicode") w = dw.get_split_write() w(_="- 1", a_b="a b 1", _42="42 1", print_="print 1", None_="None 1", print__="Will be overwritten 1") w(_="- 2", a_b="a b 2", _42="42 2", print_="print 2", None_="None 2", print__="Will be overwritten 2") parent = dw.finish() dw = DatasetWriter(name="child", parent=parent) in_child = [ # order still matters "print_*", # becomes print___ because print__ is taken. "print_", # becomes print____ because all shorter are taken. "normal", # no collision. "Normal", # no collision. "print@", # re-uses print__ from the parent dataset. ] for colname in in_child: dw.add(colname, "unicode") w = dw.get_split_write() w(print__="print@ 1", print___="print_* 1", print____="print_ 1", normal="normal 1", Normal="Normal 1") w(print__="print@ 2", print___="print_* 2", print____="print_ 2", normal="normal 2", Normal="Normal 2") child = dw.finish() for colname in in_parent + in_child: data = set(child.iterate(None, colname)) assert data == {colname + " 1", colname + " 2"}, "Bad data for %s: %r" % (colname, data)
def synthesis(prepare_res, params): dw_passed, _ = prepare_res # Using set_slice on a dataset that was written in analysis is not # actually supported, but since it currently works (as long as that # particular slice wasn't written in analysis) let's test it. dw_passed.set_slice(0) dw_passed.write(**{k: v[0] for k, v in test_data.data.items()}) dw_synthesis_split = DatasetWriter(name="synthesis_split", hashlabel="a") dw_synthesis_split.add("a", "int32") dw_synthesis_split.add("b", "unicode") dw_synthesis_split.get_split_write()(1, "a") dw_synthesis_split.get_split_write_list()([2, "b"]) dw_synthesis_split.get_split_write_dict()({"a": 3, "b": "c"}) dw_synthesis_manual = DatasetWriter(name="synthesis_manual", columns={"sliceno": "int32"}) dw_nonetest = DatasetWriter(name="nonetest", columns={t: t for t in test_data.data}) for sliceno in range(params.slices): dw_synthesis_manual.set_slice(sliceno) dw_synthesis_manual.write(sliceno) dw_nonetest.set_slice(sliceno) dw_nonetest.write( **{ k: v[0] if k in test_data.not_none_capable else None for k, v in test_data.data.items() })
def synthesis(jobid): manual_chain = [Dataset(jobids.selfchain, name) for name in "abcdefgh"] manual_abf = [manual_chain[0], manual_chain[1], manual_chain[5]] # build a local abf chain prev = None for ix, ds in enumerate(manual_abf): name = "abf%d" % (ix, ) ds.link_to_here(name, override_previous=prev) prev = ( jobid, name, ) manual_abf_data = list(Dataset.iterate_list(None, None, manual_abf)) local_abf_data = list(Dataset(jobid, "abf2").iterate_chain(None, None)) assert manual_abf_data == local_abf_data # disconnect h, verify there is no chain manual_chain[-1].link_to_here("alone", override_previous=None) assert len(Dataset(jobid, "alone").chain()) == 1 # check that the original chain is unhurt assert manual_chain == manual_chain[-1].chain() # So far so good, now make a chain long enough to have a cache. prev = None ix = 0 going = True while going: if prev and "cache" in Dataset(prev)._data: going = False name = "longchain%d" % (ix, ) dw = DatasetWriter(name=name, previous=prev) dw.add("ix", "number") dw.get_split_write()(ix) dw.finish() prev = ( jobid, name, ) ix += 1 # we now have a chain that goes one past the first cache point full_chain = Dataset(prev).chain() assert "cache" in full_chain[ -2]._data # just to check the above logic is correct assert "cache" not in full_chain[-1]._data # just to be sure.. full_chain[-2].link_to_here("nocache", override_previous=None) full_chain[-1].link_to_here("withcache", override_previous=full_chain[-3]) assert "cache" not in Dataset(jobid, "nocache")._data assert "cache" in Dataset(jobid, "withcache")._data # And make sure they both get the right data too. assert list(Dataset(prev).iterate_chain(None, "ix")) == list(range(ix)) assert list(Dataset(jobid, "nocache").iterate_chain(None, "ix")) == [ix - 2] assert list(Dataset(jobid, "withcache").iterate_chain( None, "ix")) == list(range(ix - 2)) + [ix - 1]