Beispiel #1
0
def fromdatamore(value, fillables, generator=None, pointer_fromequal=False):
    if generator is None:
        generator = oamap.inference.fromdata(value).generator()
    if not isinstance(generator, oamap.generator.Generator):
        generator = generator.generator()

    pointers = []
    pointerobjs_keys = []
    targetids_keys = []
    fillables_leaf_to_root = []
    positions_to_pointerobjs = {}
    
    _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)

    pointerat = {}
    targetids = dict((x, {}) for x in targetids_keys)
    pointerobjs = dict((x, []) for x in pointerobjs_keys)

    if _fromdata_forefront(generator, fillables, pointerobjs) != 0 and not isinstance(generator, oamap.generator.ListGenerator):
        raise TypeError("non-Lists can only be filled from data once")

    _fromdata_fill(value, generator, fillables, targetids, pointerobjs, (), pointerat)
    _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root)

    return fillables
Beispiel #2
0
def fromdata(value, generator=None, pointer_fromequal=False):
    if generator is None:
        generator = oamap.inference.fromdata(value).generator()
    if not isinstance(generator, oamap.generator.Generator):
        generator = generator.generator()

    return toarrays(fromdatamore(value, oamap.fillable.arrays(generator), generator=generator, pointer_fromequal=pointer_fromequal))
Beispiel #3
0
def arrays(generator, chunksize=8192):
    if not isinstance(generator, oamap.generator.Generator):
        generator = generator.generator()
    fillables = {}
    _makefillables(
        generator, fillables,
        lambda name, dtype: FillableArray(dtype, chunksize=chunksize))
    return fillables
Beispiel #4
0
def numpyfiles(generator, directory, chunksize=8192, lendigits=16):
    if not isinstance(generator, oamap.generator.Generator):
        generator = generator.generator()
    if not os.path.exists(directory):
        os.mkdir(directory)
    fillables = {}
    _makefillables(
        generator, fillables,
        lambda name, dtype: FillableNumpyFile(os.path.join(directory, name),
                                              dtype,
                                              chunksize=chunksize,
                                              lendigits=lendigits))
    return fillables
Beispiel #5
0
def fromiterdata(values, generator=None, limit=lambda entries, arrayitems, arraybytes: False, pointer_fromequal=False):
    if generator is None:
        generator = oamap.inference.fromdata(values).generator()
    if not isinstance(generator, oamap.generator.Generator):
        generator = generator.generator()
    if not isinstance(generator, oamap.generator.ListGenerator):
        raise TypeError("non-Lists cannot be filled iteratively")

    # starting set of fillables
    fillables = oamap.fillable.arrays(generator)
    factor = dict((n, x.dtype.itemsize) for n, x in fillables.items())
    
    pointers = []
    pointerobjs_keys = []
    targetids_keys = []
    fillables_leaf_to_root = []
    positions_to_pointerobjs = {}
    
    _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)

    pointerat = {}
    targetids = dict((x, {}) for x in targetids_keys)
    pointerobjs = dict((x, []) for x in pointerobjs_keys)

    start = stop = _fromdata_forefront(generator.content, fillables, pointerobjs)

    for value in values:
        # prospectively fill a value
        _fromdata_fill(value, generator.content, fillables, targetids, pointerobjs, (), pointerat)

        # criteria for ending a limit based on forefront (_potential_ size), rather than len (_accepted_ size)
        arrayitems = {}
        arraybytes = {}
        for n, x in fillables.items():
            if n in positions_to_pointerobjs:
                arrayitems[n] = len(pointerobjs[positions_to_pointerobjs[n]])
            else:
                arrayitems[n] = x.forefront()
            arraybytes[n] = arrayitems[n]*factor[n]

        if not limit((stop - start) + 1, arrayitems, arraybytes):
            # accepting this entry would make the limit too large
            fillables[generator.starts].append(start)
            fillables[generator.stops].append(stop)
            _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root)
            # yield a new limit of arrays
            yield stop - start, toarrays(fillables)

            # and make a new set of fillables (along with everything that depends on it)
            fillables = oamap.fillable.arrays(generator)

            pointers = []
            pointerobjs_keys = []
            targetids_keys = []
            fillables_leaf_to_root = []
            positions_to_pointerobjs = {}

            _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)

            pointerat = {}
            targetids = dict((x, {}) for x in targetids_keys)
            pointerobjs = dict((x, []) for x in pointerobjs_keys)

            start = stop = _fromdata_forefront(generator.content, fillables, pointerobjs)

            # really fill it in this new partition
            _fromdata_fill(value, generator.content, fillables, targetids, pointerobjs, (), pointerat)
            stop += 1
            for fillable in fillables_leaf_to_root:
                fillable.update()

        else:
            # else accept the data into the fillables and move on
            stop += 1
            for fillable in fillables_leaf_to_root:
                fillable.update()
            
    # always yield at the end
    fillables[generator.starts].append(start)
    fillables[generator.stops].append(stop)
    _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root)
    yield (stop - start), toarrays(fillables)