def fromdatamore(value, fillables, generator=None, pointer_fromequal=False): if generator is None: generator = oamap.inference.fromdata(value).generator() if not isinstance(generator, oamap.generator.Generator): generator = generator.generator() pointers = [] pointerobjs_keys = [] targetids_keys = [] fillables_leaf_to_root = [] positions_to_pointerobjs = {} _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs) pointerat = {} targetids = dict((x, {}) for x in targetids_keys) pointerobjs = dict((x, []) for x in pointerobjs_keys) if _fromdata_forefront(generator, fillables, pointerobjs) != 0 and not isinstance(generator, oamap.generator.ListGenerator): raise TypeError("non-Lists can only be filled from data once") _fromdata_fill(value, generator, fillables, targetids, pointerobjs, (), pointerat) _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root) return fillables
def fromdata(value, generator=None, pointer_fromequal=False): if generator is None: generator = oamap.inference.fromdata(value).generator() if not isinstance(generator, oamap.generator.Generator): generator = generator.generator() return toarrays(fromdatamore(value, oamap.fillable.arrays(generator), generator=generator, pointer_fromequal=pointer_fromequal))
def arrays(generator, chunksize=8192): if not isinstance(generator, oamap.generator.Generator): generator = generator.generator() fillables = {} _makefillables( generator, fillables, lambda name, dtype: FillableArray(dtype, chunksize=chunksize)) return fillables
def numpyfiles(generator, directory, chunksize=8192, lendigits=16): if not isinstance(generator, oamap.generator.Generator): generator = generator.generator() if not os.path.exists(directory): os.mkdir(directory) fillables = {} _makefillables( generator, fillables, lambda name, dtype: FillableNumpyFile(os.path.join(directory, name), dtype, chunksize=chunksize, lendigits=lendigits)) return fillables
def fromiterdata(values, generator=None, limit=lambda entries, arrayitems, arraybytes: False, pointer_fromequal=False): if generator is None: generator = oamap.inference.fromdata(values).generator() if not isinstance(generator, oamap.generator.Generator): generator = generator.generator() if not isinstance(generator, oamap.generator.ListGenerator): raise TypeError("non-Lists cannot be filled iteratively") # starting set of fillables fillables = oamap.fillable.arrays(generator) factor = dict((n, x.dtype.itemsize) for n, x in fillables.items()) pointers = [] pointerobjs_keys = [] targetids_keys = [] fillables_leaf_to_root = [] positions_to_pointerobjs = {} _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs) pointerat = {} targetids = dict((x, {}) for x in targetids_keys) pointerobjs = dict((x, []) for x in pointerobjs_keys) start = stop = _fromdata_forefront(generator.content, fillables, pointerobjs) for value in values: # prospectively fill a value _fromdata_fill(value, generator.content, fillables, targetids, pointerobjs, (), pointerat) # criteria for ending a limit based on forefront (_potential_ size), rather than len (_accepted_ size) arrayitems = {} arraybytes = {} for n, x in fillables.items(): if n in positions_to_pointerobjs: arrayitems[n] = len(pointerobjs[positions_to_pointerobjs[n]]) else: arrayitems[n] = x.forefront() arraybytes[n] = arrayitems[n]*factor[n] if not limit((stop - start) + 1, arrayitems, arraybytes): # accepting this entry would make the limit too large fillables[generator.starts].append(start) fillables[generator.stops].append(stop) _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root) # yield a new limit of arrays yield stop - start, toarrays(fillables) # and make a new set of fillables (along with everything that depends on it) fillables = oamap.fillable.arrays(generator) pointers = [] pointerobjs_keys = [] targetids_keys = [] fillables_leaf_to_root = [] positions_to_pointerobjs = {} _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs) pointerat = {} targetids = dict((x, {}) for x in targetids_keys) pointerobjs = dict((x, []) for x in pointerobjs_keys) start = stop = _fromdata_forefront(generator.content, fillables, pointerobjs) # really fill it in this new partition _fromdata_fill(value, generator.content, fillables, targetids, pointerobjs, (), pointerat) stop += 1 for fillable in fillables_leaf_to_root: fillable.update() else: # else accept the data into the fillables and move on stop += 1 for fillable in fillables_leaf_to_root: fillable.update() # always yield at the end fillables[generator.starts].append(start) fillables[generator.stops].append(stop) _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root) yield (stop - start), toarrays(fillables)