Ejemplo n.º 1
0
def synthesis(params):
    ds = write(data)
    for colname in data[0]:
        verify(params.slices, data, ds, hashlabel=colname)
    # ok, all the hashing stuff works out, let's test the chaining options.
    bonus_ds = write(bonus_data, name="bonus", previous=ds)
    # no chaining options - full chain
    verify(params.slices, data + bonus_data, bonus_ds, hashlabel="date")
    # just the bonus ds
    verify(params.slices, bonus_data, bonus_ds, hashlabel="date", length=1)
    # built as a chain
    verify(params.slices,
           data + bonus_data,
           bonus_ds,
           hashlabel="date",
           as_chain=True)
    # normal chaining
    a = verify(params.slices, data, ds, hashlabel="date")
    b = verify(params.slices,
               data + bonus_data,
               bonus_ds,
               hashlabel="date",
               previous=a)
    assert b.chain() == [
        a, b
    ], "chain of %s is not [%s, %s] as expected" % (b, a, b)
    # as_chain sparseness
    dw = DatasetWriter(columns=columns, name="empty")
    dw.get_split_write()
    ds = verify(params.slices, [],
                dw.finish(),
                hashlabel="date",
                as_chain=True)
    assert len(
        ds.chain()
    ) == 1, ds + ": dataset_hashpart on empty dataset with as_chain=True did not produce a single dataset"
    # two populated slices with the same data, should end up in two datasets.
    dw = DatasetWriter(columns=columns, name="0 and 2")
    dw.set_slice(0)
    dw.write_dict(data[0])
    dw.set_slice(1)
    dw.set_slice(2)
    dw.write_dict(data[0])
    for s in range(3, params.slices):
        dw.set_slice(s)
    ds = verify(params.slices, [data[0]],
                dw.finish(),
                hashlabel="date",
                as_chain=True)
    got_slices = len(ds.chain())
    assert got_slices == 2, "%s (built with as_chain=True) has %d datasets in chain, expected 2." % (
        ds,
        got_slices,
    )
def analysis(sliceno, prepare_res, job):
    dw_default = job.datasetwriter()
    dw_named = DatasetWriter(name="named")
    dw_passed, num = prepare_res
    dw_default.write(a=sliceno, b="a")
    dw_default.write_list([num, str(sliceno)])
    dw_named.write(True, date(1536, 12, min(sliceno + 1, 31)))
    dw_named.write_dict({"c": False, "d": date(2236, 5, min(sliceno + 1, 31))})
    # slice 0 is written in synthesis
    if 0 < sliceno < test_data.value_cnt:
        dw_passed.write_dict(
            {k: v[sliceno]
             for k, v in test_data.data.items()})