Beispiel #1
0
    def __delitem__(self, key):
        # can't efficiently find item in queue to delete, check
        # boundaries. otherwise just wait till next cache purge
        while len(self.queue) and self.queue[0][0] == key:
            # item at left end of queue pop it since it'll be appended
            # to right
            self.queue.popleft()

        while len(self.queue) and self.queue[-1][0] == key:
            # item at right end of queue pop it since it'll be
            # appended again
            self.queue.pop()

        return WeakValueDictionary.__delitem__(self, key)
Beispiel #2
0
    def __delitem__(self, key):
        # can't efficiently find item in queue to delete, check
        # boundaries. otherwise just wait till next cache purge
        while len(self.queue) and self.queue[0][0] == key:
            # item at left end of queue pop it since it'll be appended
            # to right
            self.queue.popleft()

        while len(self.queue) and self.queue[-1][0] == key:
            # item at right end of queue pop it since it'll be
            # appended again
            self.queue.pop()

        return WeakValueDictionary.__delitem__(self, key)
Beispiel #3
0
class SilkArray(SilkObject):
    _element = None
    dtype = None
    _elementary = None
    _arity = None
    __slots__ = [
        "_parent", "_storage_enum", "_storage_nonjson_children",
        "_data", "_children", "_Len", "_is_none", "__weakref__"
    ]

    def __init__(self, *args, _mode="any", **kwargs):
        self._storage_enum = None
        self._storage_nonjson_children = set()
        self._children = None
        if _mode == "parent":
            self._init(
                kwargs["parent"],
                kwargs["storage"],
                kwargs["data_store"],
                kwargs["len_data_store"],
            )
        elif _mode == "from_numpy":
            assert "parent" not in kwargs
            self._init(
                None,
                "numpy",
                kwargs["data_store"],
                kwargs["len_data_store"],
            )
        else:
            assert "parent" not in kwargs
            assert "storage" not in kwargs
            assert "data_store" not in kwargs
            self._init(None, "json", None, None)
            if _mode == "any":
                self.set(*args)
            elif _mode == "empty":
                pass
            elif _mode == "from_json":
                self.set(*args, prop_setter=_prop_setter_json, **kwargs)
            else:
                raise ValueError(_mode)

    @property
    def _len(self):
        return int(self._Len[0])

    @_len.setter
    def _len(self, value):
        self._Len[0] = value

    def _init(self, parent, storage, data_store, len_data_store):
        if parent is not None:
            if storage == "numpy":
                self._parent = lambda: parent # hard ref
            else:
                self._parent = weakref.ref(parent)
        else:
            self._parent = lambda: None
        self.storage = storage

        self._is_none = False
        self._storage_nonjson_children.clear()

        if self._children is not None:
            for child in self._children:
                child._parent = lambda: None
        if storage == "json":
            self._children = []
            if data_store is None:
                data_store = []
            self._data = data_store
            self._Len = [0]
        elif storage == "numpy":
            self._children = WeakValueDictionary()
            assert data_store is not None
            assert len_data_store is not None
            assert len(len_data_store), len_data_store
            dtype = np.dtype(self.dtype, align=True)
            assert data_store.dtype == dtype
            self._data = data_store
            self._Len = len_data_store
            return
        else:
            raise ValueError(storage)

        assert storage == "json"
        for n in range(len(self._data)):
            if n > len(data_store):
                if issubclass(self._element, SilkArray):
                    self._data.append([])
                else:
                    self._data.append({})
            if not self._elementary:
                child = self._element(
                  _mode="parent",
                  storage="json",
                  parent=self,
                  data_store=self._data[n],
                  len_data_store=None,
                )
                self._children.append(child)
        self._len = len(self._data)

    def copy(self, storage="json"):
        """Returns a copy with the storage in the specified format"""
        cls = type(self)
        if storage == "json":
            json = self.json()
            return cls.from_json(json)
        elif storage == "numpy":
            numpydata = self.numpy()
            lengths = self.lengths()
            return cls.from_numpy(numpydata, lengths, copy=False)
        else:
            raise ValueError(storage)

    @classmethod
    def from_json(cls, data):
        data = _filter_json(data)
        return cls(data, _mode="from_json")

    @classmethod
    def _check_numpy_args(cls, arr, lengths, length_can_be_none, self_data):
        if self_data is not None:
            d = self_data
            if len(arr.shape) != len(d.shape) or arr.dtype != d.dtype:
                err = TypeError((len(arr.shape), len(d.shape), arr.dtype, d.dtype))
                raise err
        if len(arr.shape) != cls._arity:
            raise TypeError("Array must be %d-dimensional" % cls._arity)
        if arr.dtype != np.dtype(cls.dtype,align=True):
            raise TypeError("Array has the wrong dtype")
        if lengths is None and length_can_be_none:
            return
        assert lengths.dtype == np.uint32
        lenarray_shape = (_get_lenarray_size(arr.shape),)
        if lengths.shape != lenarray_shape:
            err = TypeError((lengths.shape, lenarray_shape, arr.shape))
            raise err

    @classmethod
    def from_numpy(cls, arr, lengths=None, *, copy=True, validate=True):
        """Constructs from a numpy array "arr"
        "lengths": The lengths of the array elements
          If not specified, it is assumed that "arr" is unpadded,
            i.e. that all elements have a valid value
        """
        if isinstance(arr, tuple) and len(arr) == 2 and \
          isinstance(arr[0], np.ndarray) and isinstance(arr[1], np.ndarray):
            return cls.from_numpy(arr[0], arr[1],
                copy=copy, validate=validate
            )
        cls._check_numpy_args(arr, lengths, length_can_be_none=True, self_data=None)

        if copy:
            arr = datacopy(arr)
        if lengths is None:
            lengths = _get_lenarray_full(arr.shape)
        ret = cls(_mode="from_numpy", data_store=arr,len_data_store=lengths)
        if validate:
            ret.validate()
        return ret

    @classmethod
    def empty(cls):
        return cls(_mode="empty")

    def _get_child(self, childnr):
        if not isinstance(childnr, int):
            raise TypeError(childnr)
        if childnr < 0:
            childnr += self._len
        if childnr < 0 or childnr >= self._len:
            raise IndexError(childnr)
        from .silkarray import SilkArray
        if self.storage == "numpy":
            child = self._element (
                _mode = "parent",
                parent = self,
                storage = "numpy",
                data_store = self._data[childnr],
                len_data_store = self._get_child_lengths(childnr)
            )
            self._children[childnr] = child
        return self._children[childnr]

    def _get_children(self):
        if self.storage == "numpy":
            for n in range(self._len):
                yield self._get_child(n)
        else:
            for child in self._children:
                yield child

    def set(self, *args, prop_setter=_prop_setter_any):
        if len(args) == 1:
            if args[0] is None:
                self._is_none = True
                self._len = 0
                self._clear_data()
                return
        # TODO: make a nice composite exception that stores all exceptions
        try:
            if self.storage == "numpy" and \
              len(args) == 1 and len(kwargs) == 0 and \
              isinstance(args[0], np.ndarray):
                self._construct_from_numpy(args[0],lengths=None)
            else:
                raise TypeError("Not a numpy array")
        except Exception:
            try:
                keep_trying = True
                ok = False
                if len(args) == 1:
                    a = args[0]
                    if isinstance(a, str):
                        self._parse(a)
                    elif isinstance(a, SilkArray):
                        if a.storage == "numpy":
                            if isinstance(a, type(self)):
                                keep_trying = False
                                self._construct_from_numpy(a._data, a._Len)
                            else:
                                self._construct(prop_setter, a.json(), prop_setter=_prop_setter_json)
                        else:
                            self._construct(prop_setter, *a)
                    elif isinstance(a, collections.Iterable) or isinstance(a, np.void):
                        self._construct(prop_setter, *a)
                    else:
                        raise TypeError(a)
                else:
                    raise TypeError(args)
                ok = True
            except Exception:
                if not ok:
                    if not keep_trying:
                        raise
                    try:
                        self._construct(prop_setter, *args)
                    except Exception:
                        raise
        self.validate()
        self._is_none = False

    def validate(self):
        pass

    def json(self):
        """Returns a JSON representation of the Silk object
        """
        if self.storage == "json":
            return _filter_json(self._data)

        if self._elementary:
            return [dd for dd in self._data]
        else:
            d = []
            for child in self._get_children():
                dd = child.json()
                d.append(dd)
        return d

    def numpy(self):
        """Returns a numpy representation of the Silk object
        NOTE: for all numpy arrays,
          the entire storage buffer is returned,
          including (zeroed) elements if the data is not present!
          the length of each array is stored in the LEN_xxx field
          TODO: document multidimensional length vector, PTR_LEN_xxx
        TODO: add and document SHAPE field
        """
        if self.storage == "numpy":
            return datacopy(self._data)
        new_obj = self.copy("json")
        return new_obj.make_numpy()

    def make_json(self):
        if self.storage == "json":
            return self._data
        elif self.storage == "numpy":
            json = _filter_json(self.json(), self)
            parent = self._parent()
            if parent is not None and parent.storage == "numpy":
                parent.numpy_shatter()
            self._init(parent, "json", None, None)
            self.set(json, prop_setter=_prop_setter_json)
            if parent is not None:
                parent._remove_nonjson_child(self)
                myname = parent._find_child(id(self))
                parent._data[myname] = self._data
            return self._data
        elif self.storage == "mixed":
            for child_id in list(self._storage_nonjson_children):  # copy!
                for child in self._get_children():
                    if id(child) == child_id:
                        child.make_json()
                        break
                else:
                    raise Exception("Cannot find child that was marked as 'non-JSON'")
            # Above will automatically update storage status to "json"
            assert self.storage == "json"
            return self._data

    def _get_outer_shape(self):
        shape = [len(self)]
        d = self
        for n in range(1, self._arity):
            maxlen = max([len(dd) for dd in d])
            shape.append(maxlen)
            d2 = []
            for dd in d:
                for ddd in dd:
                    d2.append(ddd)
            d = d2
        return shape

    def _get_child_lengths(self, child):
        if self.storage != "numpy":
            return None
        if self._arity == 1:
            return None
        child_size = _get_lenarray_size(self._data.shape[1:])
        start = 1 + child_size * child
        assert start+child_size <= len(self._Len)
        return self._Len[start:start+child_size]

    def _del_child_lengths(self, child):
        if self.storage != "numpy":
            return
        if self._arity == 1:
            return
        size = _get_lenarray_size(self._data.shape[1:])
        offset = 1 + size * child
        lsize = len(self._Len)
        self._Len[offset:lsize-size] = self._Len[offset+size:lsize]
        self._Len[lsize-size:] = 0
        for n in range(child+1, len(self._children)):
            c_offset = 1 + size * n
            c = self._children[n]
            c._Len = self._Len[c_offset:c_offset+size]

    def _insert_child_lengths(self, child, child_lengths):
        if self.storage != "numpy":
            assert child_lengths is None
            return
        if self._arity == 1:
            assert child_lengths is None
            return
        assert child_lengths is not None
        size = _get_lenarray_size(self._data.shape[1:])
        offset = 1 + size * child
        lsize = len(self._Len)
        self._Len[offset+size:lsize] = self._Len[offset:lsize-size]
        self._Len[offset:offset+size] = child_lengths
        for n in range(child, len(self._children)):
            c_offset = 1 + size * (n+1)
            c = self._children[n]
            c._Len = self._Len[c_offset:c_offset+size]

    def _restore_array_coupling(self, data=None, myname=None):
        """
        Array members have their length vector stored in the parent data
        In addition, var_arrays have a pointer to their data stored
        If the parent data gets reallocated or copied, then
         this information gets decoupled, so it must be restored
        """
        assert self.storage == "numpy"
        if data is None:
            parent = self._parent()
            if parent is None:
                return
            if parent.storage != "numpy":
                return
            myname = parent._find_child(id(self))
            if not isinstance(parent, SilkArray) and \
              parent._props[myname].get("var_array", False):
                data = parent._data
                assert data is not None
        if data is not None:
            assert myname is not None
            data[myname] = self._data
            data["PTR_"+myname] = self._data.ctypes.data
            data["LEN_"+myname] = self._Len.copy()
            self._Len = data["LEN_"+myname]
            if self._arity > 1:
                data["SHAPE_"+myname] = self._data.shape
                data["PTR_LEN_"+myname] = self._Len.ctypes.data


    def make_numpy(self,_toplevel=None):
        """Sets the internal storage to 'numpy'
        Returns the numpy array that is used as internal storage buffer
        NOTE: for optional members,
          the entire storage buffer is returned,
          including (zeroed) elements if the data is not present!
          an extra field "HAS_xxx" indicates if the data is present.
        TODO: update doc
        NOTE: for numpy array members of variable shape,
          an extra field "PTR_xxx" contains a C pointer to the data
          For this, the dimensionality of the array does not matter,
           e.g. both for IntegerArray and IntegerArrayArray,
            the C pointer will be "int *"
           and both for MyStructArray and MyStructArrayArray,
            the C pointer will be "MyStruct *"
        """
        from .silkarray import SilkArray
        if self.storage == "numpy":
            return self._data

        dtype = np.dtype(self.dtype, align=True)
        shape = self._get_outer_shape()
        data = np.zeros(dtype=dtype, shape=shape)
        lengths = _get_lenarray_empty(shape)
        lengths[0] = len(self)
        if self._elementary:
            self._set_numpy_ele_range(self, 0, len(self._data), self._data, self._arity, data)
        else:
            for childnr, child in enumerate(self._get_children()):
                child.make_numpy(_toplevel=False)
                if self._arity > 1:
                    slices = [slice(0,v) for v in child._data.shape]
                    data[childnr][slices] = child._data
                else:
                    try:
                        data[childnr] = child._data
                    except ValueError: #numpy bug
                        for field in child._data.dtype.names:
                            data[childnr][field] = child._data[field]
                if self._arity > 1:
                    child_size = _get_lenarray_size(shape[1:])
                    start = 1 + child_size * childnr
                    arr1 = lengths[start:start+child_size]
                    shape1 = data.shape[1:]
                    arr2 = child._Len
                    shape2 = child._data.shape
                    _lenarray_copypad(arr1, shape1, arr2, shape2)

        self._init(self._parent(), "numpy", data, lengths)
        parent = self._parent()
        if parent is not None:
            if parent.storage != "numpy":
                parent._add_nonjson_child(self)
        for child in self._get_children():
            child._restore_array_coupling()

        return data

    def lengths(self):
        assert self.storage == "numpy"
        return self._Len

    def realloc(self, *shape):
        assert self.storage == "numpy"
        if len(shape) == 1 and isinstance(shape[0], tuple):
            shape = shape[0]
        parent = self._parent()
        if parent is not None:
            myname = parent._find_child(id(self))
            if parent.storage == "numpy":
                if not parent._props[myname].get("var_array", False):
                    raise Exception("Cannot reallocate numpy array that is\
part of a larger numpy buffer. Use numpy_shatter() on the parent to allow\
reallocation")
        if len(shape) != self._arity:
            msg = "Shape must have %d dimensions, not %d"
            raise ValueError(msg % (self._arity, len(shape)))
        min_shape = self._data.shape
        for n in range(self._arity):
            msg = "Dimension %d: shape must have at least length %d, not %d"
            if min_shape[n] > shape[n]:
                raise ValueError(msg % (n+1, min_shape[n], shape[n]))
        old_data = self._data
        old_len = self._Len
        self._data = np.zeros(dtype=self.dtype, shape=shape)
        slices = [slice(0,s) for s in min_shape]
        self._data[slices] = old_data
        self._Len = _get_lenarray_empty(shape)
        _lenarray_copypad(self._Len, shape, old_len, old_data.shape)
        self._init(parent, "numpy", self._data, self._Len)
        self._restore_array_coupling()

    def _find_child(self, child_id):
        if self.storage == "numpy":
            for childname, ch in self._children.items():
                if child_id == id(ch):
                    return childname
        else:
            for childname, ch in enumerate(self._children):
                if child_id == id(ch):
                    return childname
        raise KeyError

    def _add_nonjson_child(self, child):
        assert self.storage != "numpy"
        njc = self._storage_nonjson_children
        child_id = id(child)
        if child_id not in njc:
            njc.add(child_id)
            if self.storage == "json":
                self.storage = "mixed"
                parent = self._parent()
                if parent is not None:
                    parent._add_nonjson_child(self)

    def _remove_nonjson_child(self, child):
        assert self.storage != "numpy"
        njc = self._storage_nonjson_children
        child_id = id(child)
        if child_id in njc:
            assert self.storage == "mixed", self.storage
            njc.remove(child_id)
            if len(njc) == 0:
                self.storage = "json"
                parent = self._parent()
                if parent is not None:
                    parent()._remove_nonjson_child(self)


    def numpy_shatter(self):
        """
        Breaks up a unified numpy storage into one numpy storage per child
        """
        assert self.storage == "numpy"
        assert not self._elementary
        parent = self._parent()
        if parent is not None and parent.storage == "numpy":
            parent.numpy_shatter()
        data = []
        children = []
        for child in self._get_children():
            d = datacopy(child._data)
            data.append(d)
            child._data = d
            children.append(child)
        self._data = data
        self._children = children
        self._storage_nonjson_children = set([id(p) for p in children])
        self.storage = "mixed"

    def _construct(self, prop_setter, *args):
        old_data = self._data
        old_children = self._children
        with _ArrayConstructContext(self):
            if self.storage == "numpy":
                if len(args) > len(self._data):
                    msg = "index {0} is out of bounds for axis with size {1}"\
                          .format(len(args), len(data))
                    raise IndexError(msg)
                if self._elementary:
                    _set_numpy_ele_range(self, 0, len(args), args, self._arity)
                else:
                    for anr, a in enumerate(args):
                        child = self._get_child(anr)
                        child.set(args[anr],prop_setter=prop_setter)
            else:
                if self._elementary:
                    newdata = []
                    for anr, a in enumerate(args):
                        v = self._element(a)
                        newdata.append(v)
                    self._data[:] = newdata
                else:
                    for n in range(self._len, len(args)):
                        if issubclass(self._element, SilkArray):
                            self._data.append([])
                        else:
                            self._data.append({})
                        child = self._element(
                          _mode="parent",
                          storage=self.storage,
                          parent=self,
                          data_store=self._data[n],
                          len_data_store=self._get_child_lengths(n)
                        )
                        self._children.append(child)

                    for n in range(len(args)):
                        child = self._children[n]
                        child.set(args[n], prop_setter=prop_setter)

                    if len(args) < self._len:
                        self._children[:] = self._children[:len(args)]
                        if self.storage == "numpy":
                            self._data[len(args):] = \
                              np.zeros_like(self._data[len(args):])
                        else:
                            self._data[:] = self._data[:len(args)]

        self._len = len(args)

    def _construct_from_numpy(self, arr, lengths):
        if self.storage != "numpy":
            self._init(self._parent(), "numpy", arr, lengths)
            self.make_json()
            return

        self._check_numpy_args(arr, lengths, self_data=self._data, length_can_be_none=False)
        if lengths is None:
            lengths = _get_lenarray_full(arr.shape)

        self._data = datacopy(arr)
        self._Len = lengths.copy()
        self._restore_array_coupling()

    def _parse(self, s):
        raise NotImplementedError  # can be user-defined

    _storage_names = ("numpy", "json", "mixed")

    @property
    def storage(self):
        return self._storage_names[self._storage_enum]

    @storage.setter
    def storage(self, storage):
        assert storage in self._storage_names, storage
        self._storage_enum = self._storage_names.index(storage)

    def __dir__(self):
        return dir(type(self))

    def __setattr__(self, attr, value):
        if attr.startswith("_") or attr == "storage":
            object.__setattr__(self, attr, value)
        else:
            self._set_prop(attr, value, _prop_setter_any)

    def __getitem__(self, item):
        if isinstance(item, slice):
            return type(self)([self[v] for v in range(*item.indices(len(self)))])
        if not isinstance(item, int):
            msg = "{0} indices must be integers or slices, not {1}"
            raise TypeError(msg.format(self.__class__.__name__,
                                       item.__class__.__name__))
        if self._elementary:
            if self.storage == "numpy":
                return _get_numpy_ele_prop(self, item, self._len)
            else:
                return self._data[:self._len][item]
        else:
            return self._get_child(item)

    def _set_prop(self, item, value, prop_setter=_prop_setter_any):
        if self._elementary:
            if self.storage == "numpy":
                _set_numpy_ele_prop(self, item, value)
            else:
                if item < 0:
                    item = self._len - item
                elif item >= self._len:
                    raise IndexError(item)
                self._data[item] = self._element(value)
        else:
            child = self._get_child(item)
            child.set(value,prop_setter=prop_setter)

    def __setitem__(self, item, value):
        if isinstance(item, slice):
            start, stop, stride = item.indices(self._len)
            indices = list(range(start, stop, stride))
            if len(indices) != len(value):
                msg = "Cannot assign to a slice of length %d using \
a sequence of length %d"
                raise IndexError(msg % (len(indices), len(value)))
            for n in indices:
                self._set_prop(n, value[n])
            return
        elif isinstance(item, int):
            self._set_prop(item, value)
        else:
            msg = "{0} indices must be integers or slices, not {1}"
            raise TypeError(msg.format(self.__class__.__name__,
                                       item.__class__.__name__))

    def __delitem__(self, item):
        if isinstance(item, slice):
            start, stop, stride = item.indices(self._len)
            indices = list(range(start, stop, stride))
            for n in reversed(indices):
                self.pop(n)
            return
        if not isinstance(item, int):
            msg = "{0} indices must be integers or slices, not {1}"
            raise TypeError(msg.format(self.__class__.__name__,
                                       item.__class__.__name__))
        self.pop(item)

    def pop(self, index=-1):
        if not isinstance(index, int):
            msg = "{0} indices must be integers, not {1}"
            raise TypeError(msg.format(self.__class__.__name__,
                                       index.__class__.__name__))
        if index < 0:
            index += self._len
        if index < 0:
            raise IndexError
        if self.storage == "numpy":
            ret_data = datacopy(self._data[index])
            ret_lengths = None
            if self._arity > 1:
                ret_lengths = _get_lenarray_empty(ret_data.shape)
            ret = self._element(
                    _mode="from_numpy",
                    data_store=ret_data,
                    len_data_store=ret_lengths,
                )
            self._data[index:self._len-1] = self._data[index+1:self._len]
            try:
                self._data[self._len-1] = np.zeros_like(self._data[self._len-1])
            except ValueError: # numpy bug
                for field in self._data.dtype.fields:
                    self._data[self._len-1][field] = np.zeros_like(self._data[self._len-1][field])
            self._del_child_lengths(index)
        elif self._elementary:
            ret = self._data[:self._len][index]
            self._data.__delitem__(index)
        else:
            ret = self._children[:self._len][index].copy()
            self._children.__delitem__(index)
            self._data.__delitem__(index)
        self._len -= 1
        return ret


    def _Len__(self):
        return self._len

    def _print(self, spaces):
        ret = "{0} (\n".format(self.__class__.__name__)
        for n in range(self._len):
            if self._elementary:
                value = self._data[n]
                if self.storage == "numpy":
                    if value.dtype.kind == 'S':
                        substr = '"' + value.decode() + '"'
                    else:
                        substr = str(value)
                else:
                    substr = value._print(spaces+2)
            else:
                value = self._get_child(n)
                substr = value._print(spaces+2)
            ret += "{0}{1},\n".format(" " * (spaces+2), substr)
        ret += "{0})".format(" " * spaces)
        return ret

    def __str__(self):
        return self._print(0)

    def __repr__(self):
        return self._print(0)

    def clear(self):
        self.set([])

    def append(self, item):
        self.insert(self._len, item)

    def insert(self, index, item):
        if not isinstance(index, int):
            msg = "{0} indices must be integers, not {1}"
            raise TypeError(msg.format(self.__class__.__name__,
                                       index.__class__.__name__))
        if index < 0:
            index += self._len
        if index < 0:
            raise IndexError
        if self.storage == "numpy":
            if self._len >= len(self._data):
                raise IndexError("Numpy array overflows allocated space")
            if not self._elementary:
                ele = self._element(item)
                child_data = ele.make_numpy()
                child_lengths = None
                if self._arity > 1:
                    child_lengths = ele.lengths()
                self._data[index+1:self._len+1] = self._data[index:self._len]
                if self._arity > 1:
                    slices = [slice(0,v) for v in child_data.shape]
                    self._data[index][slices] = child_data
                else:
                    self._data[index] = child_data
                self._insert_child_lengths(index, child_lengths)
            else:
                self._data[self._len] = item  # dry run
                self._data[index+1:self._len+1] = self._data[index:self._len]
                self._data[index] = item  # should give no exception now
            self._len += 1
        else:
            with _ArrayInsertContext(self, index):
                if self._elementary:
                    self._data[index] = item
                else:
                    child = self._element(
                      _mode="parent",
                      storage=self.storage,
                      parent=self,
                      data_store=self._data[index],
                      len_data_store=self._get_child_lengths(index)
                    )
                    self._children.insert(index, child)
                    child.set(item)

    def __eq__(self, other):
        if not isinstance(other, SilkArray):
            return False
        if self.storage == other.storage == "json":
            return self._data == other._data
        else: #can't use numpy _data because of PTR and different allocation sizes
            return self.json() == other.json()

    def __len__(self):
        return self._len

    def _clear_data(self):
        d = self._data
        if self.storage == "numpy":
            d[:] =  np.zeros_like(d)
        else:
            for child in self._get_children():
                child._clear_data()
Beispiel #4
0
class thread_container(blist):
    '''
    Base class for thread containers. This container requires "hints" in
    order to work. Okay, it requires more than hints. It needs every
    conversation container to list its conversaton id.

    To be clear, thread_containers are used to merge related conversations
    into the same object, and are used to contain all emails/conversations
    in a folder or those found in a query.

    The thread_container holds the conversations inside itself (it is a list)
    and holds metadata that is used to instantly find related conversations
    in self._map when adding new messages to the container.

    While Jamie Zawinski makes some excellent arguments against storing
    which message belongs to which conversation, doing threading his way
    requires either loading every message into ram in order to find every
    message that goes in a conversation, or doing dozens of searches until
    everything we find everything. This eats up lots of ram unfortunately. :(
    '''
    __slots__ = ('_map')
    #__metaclass__ = MetaSuper
    def __init__(self):
        #self._map = lazy_refmap(self, 'nique_terms')
        #self._map = {}
        self._map = WeakValueDictionary()


    def datesort(self):
        '''
        Sort conversations so newest are at the top.
        '''
        self.sort(key=attrgetter('last_update'), reverse=True)

    def __getitem__(self, idx):
        '''
        If the key we're given is an integer, what's being asked for is a
        message at index idx. Otherwise we've been given a string and are being
        asked to look up something in the lookup table instead.
        '''
        try: idx.__int__
        except AttributeError: return self._map[idx]
        else: return super(thread_container, self).__getitem__(idx)
        #else: return self.__super.__getitem__(idx)
    def __setitem__(self, idx, value):
        try: idx.__int__
        except AttributeError: return self._map.__setitem__(idx, value)
        else: return super(thread_container, self).__setitem__(idx, value)
        #else: return self.__super.__setitem__(idx, value)
    def __delitem__(self, idx):
        try: idx.__int__
        except AttributeError: return self._map.__delitem__(idx)
        else: return super(thread_container, self).__delitem__(idx)
        #else: return self.__super.__delitem__(idx)

        #def append(self, item):
            #    if type(item) is not conv_container:
                #        raise TypeError('Wrong type of container. Use a conv_container instead of %s' % type(item))
                #    return list.append(self, item)

    def join(self, item):
        '''
        To keep things lite and simple (translation: use the least amount of
        of ram possible while still keeping things fast), look conversations
        up only based upon their threadid.
        '''
        if type(item) is conv_container:
            try: return self[item.thread].merge(item)
            except KeyError:
                self.append(item)
                self[item.thread] = item
        elif type(item) is msg_container:
            raise TypeError('Unable to thread that.')
            #return self.join(conv_container(item))

    _thread = join

    def thread(self, msgs):
        #map(self._thread, threadmap.map(conv_factory, msgs) )
        map(self._thread, (conv_factory(x) for x in msgs) )
        self.datesort()
        return
Beispiel #5
0
class SilkArray(SilkObject):
    _element = None
    dtype = None
    _elementary = None
    _arity = None
    __slots__ = [
        "_parent", "_storage_enum", "_storage_nonjson_children", "_data",
        "_children", "_Len", "_is_none", "__weakref__"
    ]

    def __init__(self, *args, _mode="any", **kwargs):
        self._storage_enum = None
        self._storage_nonjson_children = set()
        self._children = None
        if _mode == "parent":
            self._init(
                kwargs["parent"],
                kwargs["storage"],
                kwargs["data_store"],
                kwargs["len_data_store"],
            )
        elif _mode == "from_numpy":
            assert "parent" not in kwargs
            self._init(
                None,
                "numpy",
                kwargs["data_store"],
                kwargs["len_data_store"],
            )
        else:
            assert "parent" not in kwargs
            assert "storage" not in kwargs
            assert "data_store" not in kwargs
            self._init(None, "json", None, None)
            if _mode == "any":
                self.set(*args)
            elif _mode == "empty":
                pass
            elif _mode == "from_json":
                self.set(*args, prop_setter=_prop_setter_json, **kwargs)
            else:
                raise ValueError(_mode)

    @property
    def _len(self):
        return int(self._Len[0])

    @_len.setter
    def _len(self, value):
        self._Len[0] = value

    def _init(self, parent, storage, data_store, len_data_store):
        if parent is not None:
            if storage == "numpy":
                self._parent = lambda: parent  # hard ref
            else:
                self._parent = weakref.ref(parent)
        else:
            self._parent = lambda: None
        self.storage = storage

        self._is_none = False
        self._storage_nonjson_children.clear()

        if self._children is not None:
            for child in self._children:
                child._parent = lambda: None
        if storage == "json":
            self._children = []
            if data_store is None:
                data_store = []
            self._data = data_store
            self._Len = [0]
        elif storage == "numpy":
            self._children = WeakValueDictionary()
            assert data_store is not None
            assert len_data_store is not None
            assert len(len_data_store), len_data_store
            dtype = np.dtype(self.dtype, align=True)
            assert data_store.dtype == dtype
            self._data = data_store
            self._Len = len_data_store
            return
        else:
            raise ValueError(storage)

        assert storage == "json"
        for n in range(len(self._data)):
            if n > len(data_store):
                if issubclass(self._element, SilkArray):
                    self._data.append([])
                else:
                    self._data.append({})
            if not self._elementary:
                child = self._element(
                    _mode="parent",
                    storage="json",
                    parent=self,
                    data_store=self._data[n],
                    len_data_store=None,
                )
                self._children.append(child)
        self._len = len(self._data)

    def copy(self, storage="json"):
        """Returns a copy with the storage in the specified format"""
        cls = type(self)
        if storage == "json":
            json = self.json()
            return cls.from_json(json)
        elif storage == "numpy":
            numpydata = self.numpy()
            lengths = self.lengths()
            return cls.from_numpy(numpydata, lengths, copy=False)
        else:
            raise ValueError(storage)

    @classmethod
    def from_json(cls, data):
        data = _filter_json(data)
        return cls(data, _mode="from_json")

    @classmethod
    def _check_numpy_args(cls, arr, lengths, length_can_be_none, self_data):
        if self_data is not None:
            d = self_data
            if len(arr.shape) != len(d.shape) or arr.dtype != d.dtype:
                err = TypeError(
                    (len(arr.shape), len(d.shape), arr.dtype, d.dtype))
                raise err
        if len(arr.shape) != cls._arity:
            raise TypeError("Array must be %d-dimensional" % cls._arity)
        if arr.dtype != np.dtype(cls.dtype, align=True):
            raise TypeError("Array has the wrong dtype")
        if lengths is None and length_can_be_none:
            return
        assert lengths.dtype == np.uint32
        lenarray_shape = (_get_lenarray_size(arr.shape), )
        if lengths.shape != lenarray_shape:
            err = TypeError((lengths.shape, lenarray_shape, arr.shape))
            raise err

    @classmethod
    def from_numpy(cls, arr, lengths=None, *, copy=True, validate=True):
        """Constructs from a numpy array "arr"
        "lengths": The lengths of the array elements
          If not specified, it is assumed that "arr" is unpadded,
            i.e. that all elements have a valid value
        """
        if isinstance(arr, tuple) and len(arr) == 2 and \
          isinstance(arr[0], np.ndarray) and isinstance(arr[1], np.ndarray):
            return cls.from_numpy(arr[0], arr[1], copy=copy, validate=validate)
        cls._check_numpy_args(arr,
                              lengths,
                              length_can_be_none=True,
                              self_data=None)

        if copy:
            arr = datacopy(arr)
        if lengths is None:
            lengths = _get_lenarray_full(arr.shape)
        ret = cls(_mode="from_numpy", data_store=arr, len_data_store=lengths)
        if validate:
            ret.validate()
        return ret

    @classmethod
    def empty(cls):
        return cls(_mode="empty")

    def _get_child(self, childnr):
        if not isinstance(childnr, int):
            raise TypeError(childnr)
        if childnr < 0:
            childnr += self._len
        if childnr < 0 or childnr >= self._len:
            raise IndexError(childnr)
        from .silkarray import SilkArray
        if self.storage == "numpy":
            child = self._element(
                _mode="parent",
                parent=self,
                storage="numpy",
                data_store=self._data[childnr],
                len_data_store=self._get_child_lengths(childnr))
            self._children[childnr] = child
        return self._children[childnr]

    def _get_children(self):
        if self.storage == "numpy":
            for n in range(self._len):
                yield self._get_child(n)
        else:
            for child in self._children:
                yield child

    def set(self, *args, prop_setter=_prop_setter_any):
        if len(args) == 1:
            if args[0] is None:
                self._is_none = True
                self._len = 0
                self._clear_data()
                return
        # TODO: make a nice composite exception that stores all exceptions
        try:
            if self.storage == "numpy" and \
              len(args) == 1 and len(kwargs) == 0 and \
              isinstance(args[0], np.ndarray):
                self._construct_from_numpy(args[0], lengths=None)
            else:
                raise TypeError("Not a numpy array")
        except Exception:
            try:
                keep_trying = True
                ok = False
                if len(args) == 1:
                    a = args[0]
                    if isinstance(a, str):
                        self._parse(a)
                    elif isinstance(a, SilkArray):
                        if a.storage == "numpy":
                            if isinstance(a, type(self)):
                                keep_trying = False
                                self._construct_from_numpy(a._data, a._Len)
                            else:
                                self._construct(prop_setter,
                                                a.json(),
                                                prop_setter=_prop_setter_json)
                        else:
                            self._construct(prop_setter, *a)
                    elif isinstance(a, collections.Iterable) or isinstance(
                            a, np.void):
                        self._construct(prop_setter, *a)
                    else:
                        raise TypeError(a)
                else:
                    raise TypeError(args)
                ok = True
            except Exception:
                if not ok:
                    if not keep_trying:
                        raise
                    try:
                        self._construct(prop_setter, *args)
                    except Exception:
                        raise
        self.validate()
        self._is_none = False

    def validate(self):
        pass

    def json(self):
        """Returns a JSON representation of the Silk object
        """
        if self.storage == "json":
            return _filter_json(self._data)

        if self._elementary:
            return [dd for dd in self._data]
        else:
            d = []
            for child in self._get_children():
                dd = child.json()
                d.append(dd)
        return d

    def numpy(self):
        """Returns a numpy representation of the Silk object
        NOTE: for all numpy arrays,
          the entire storage buffer is returned,
          including (zeroed) elements if the data is not present!
          the length of each array is stored in the LEN_xxx field
          TODO: document multidimensional length vector, PTR_LEN_xxx
        TODO: add and document SHAPE field
        """
        if self.storage == "numpy":
            return datacopy(self._data)
        new_obj = self.copy("json")
        return new_obj.make_numpy()

    def make_json(self):
        if self.storage == "json":
            return self._data
        elif self.storage == "numpy":
            json = _filter_json(self.json(), self)
            parent = self._parent()
            if parent is not None and parent.storage == "numpy":
                parent.numpy_shatter()
            self._init(parent, "json", None, None)
            self.set(json, prop_setter=_prop_setter_json)
            if parent is not None:
                parent._remove_nonjson_child(self)
                myname = parent._find_child(id(self))
                parent._data[myname] = self._data
            return self._data
        elif self.storage == "mixed":
            for child_id in list(self._storage_nonjson_children):  # copy!
                for child in self._get_children():
                    if id(child) == child_id:
                        child.make_json()
                        break
                else:
                    raise Exception(
                        "Cannot find child that was marked as 'non-JSON'")
            # Above will automatically update storage status to "json"
            assert self.storage == "json"
            return self._data

    def _get_outer_shape(self):
        shape = [len(self)]
        d = self
        for n in range(1, self._arity):
            maxlen = max([len(dd) for dd in d])
            shape.append(maxlen)
            d2 = []
            for dd in d:
                for ddd in dd:
                    d2.append(ddd)
            d = d2
        return shape

    def _get_child_lengths(self, child):
        if self.storage != "numpy":
            return None
        if self._arity == 1:
            return None
        child_size = _get_lenarray_size(self._data.shape[1:])
        start = 1 + child_size * child
        assert start + child_size <= len(self._Len)
        return self._Len[start:start + child_size]

    def _del_child_lengths(self, child):
        if self.storage != "numpy":
            return
        if self._arity == 1:
            return
        size = _get_lenarray_size(self._data.shape[1:])
        offset = 1 + size * child
        lsize = len(self._Len)
        self._Len[offset:lsize - size] = self._Len[offset + size:lsize]
        self._Len[lsize - size:] = 0
        for n in range(child + 1, len(self._children)):
            c_offset = 1 + size * n
            c = self._children[n]
            c._Len = self._Len[c_offset:c_offset + size]

    def _insert_child_lengths(self, child, child_lengths):
        if self.storage != "numpy":
            assert child_lengths is None
            return
        if self._arity == 1:
            assert child_lengths is None
            return
        assert child_lengths is not None
        size = _get_lenarray_size(self._data.shape[1:])
        offset = 1 + size * child
        lsize = len(self._Len)
        self._Len[offset + size:lsize] = self._Len[offset:lsize - size]
        self._Len[offset:offset + size] = child_lengths
        for n in range(child, len(self._children)):
            c_offset = 1 + size * (n + 1)
            c = self._children[n]
            c._Len = self._Len[c_offset:c_offset + size]

    def _restore_array_coupling(self, data=None, myname=None):
        """
        Array members have their length vector stored in the parent data
        In addition, var_arrays have a pointer to their data stored
        If the parent data gets reallocated or copied, then
         this information gets decoupled, so it must be restored
        """
        assert self.storage == "numpy"
        if data is None:
            parent = self._parent()
            if parent is None:
                return
            if parent.storage != "numpy":
                return
            myname = parent._find_child(id(self))
            if not isinstance(parent, SilkArray) and \
              parent._props[myname].get("var_array", False):
                data = parent._data
                assert data is not None
        if data is not None:
            assert myname is not None
            data[myname] = self._data
            data["PTR_" + myname] = self._data.ctypes.data
            data["LEN_" + myname] = self._Len.copy()
            self._Len = data["LEN_" + myname]
            if self._arity > 1:
                data["SHAPE_" + myname] = self._data.shape
                data["PTR_LEN_" + myname] = self._Len.ctypes.data

    def make_numpy(self, _toplevel=None):
        """Sets the internal storage to 'numpy'
        Returns the numpy array that is used as internal storage buffer
        NOTE: for optional members,
          the entire storage buffer is returned,
          including (zeroed) elements if the data is not present!
          an extra field "HAS_xxx" indicates if the data is present.
        TODO: update doc
        NOTE: for numpy array members of variable shape,
          an extra field "PTR_xxx" contains a C pointer to the data
          For this, the dimensionality of the array does not matter,
           e.g. both for IntegerArray and IntegerArrayArray,
            the C pointer will be "int *"
           and both for MyStructArray and MyStructArrayArray,
            the C pointer will be "MyStruct *"
        """
        from .silkarray import SilkArray
        if self.storage == "numpy":
            return self._data

        dtype = np.dtype(self.dtype, align=True)
        shape = self._get_outer_shape()
        data = np.zeros(dtype=dtype, shape=shape)
        lengths = _get_lenarray_empty(shape)
        lengths[0] = len(self)
        if self._elementary:
            self._set_numpy_ele_range(self, 0, len(self._data), self._data,
                                      self._arity, data)
        else:
            for childnr, child in enumerate(self._get_children()):
                child.make_numpy(_toplevel=False)
                if self._arity > 1:
                    slices = [slice(0, v) for v in child._data.shape]
                    data[childnr][slices] = child._data
                else:
                    try:
                        data[childnr] = child._data
                    except ValueError:  #numpy bug
                        for field in child._data.dtype.names:
                            data[childnr][field] = child._data[field]
                if self._arity > 1:
                    child_size = _get_lenarray_size(shape[1:])
                    start = 1 + child_size * childnr
                    arr1 = lengths[start:start + child_size]
                    shape1 = data.shape[1:]
                    arr2 = child._Len
                    shape2 = child._data.shape
                    _lenarray_copypad(arr1, shape1, arr2, shape2)

        self._init(self._parent(), "numpy", data, lengths)
        parent = self._parent()
        if parent is not None:
            if parent.storage != "numpy":
                parent._add_nonjson_child(self)
        for child in self._get_children():
            child._restore_array_coupling()

        return data

    def lengths(self):
        assert self.storage == "numpy"
        return self._Len

    def realloc(self, *shape):
        assert self.storage == "numpy"
        if len(shape) == 1 and isinstance(shape[0], tuple):
            shape = shape[0]
        parent = self._parent()
        if parent is not None:
            myname = parent._find_child(id(self))
            if parent.storage == "numpy":
                if not parent._props[myname].get("var_array", False):
                    raise Exception("Cannot reallocate numpy array that is\
part of a larger numpy buffer. Use numpy_shatter() on the parent to allow\
reallocation")
        if len(shape) != self._arity:
            msg = "Shape must have %d dimensions, not %d"
            raise ValueError(msg % (self._arity, len(shape)))
        min_shape = self._data.shape
        for n in range(self._arity):
            msg = "Dimension %d: shape must have at least length %d, not %d"
            if min_shape[n] > shape[n]:
                raise ValueError(msg % (n + 1, min_shape[n], shape[n]))
        old_data = self._data
        old_len = self._Len
        self._data = np.zeros(dtype=self.dtype, shape=shape)
        slices = [slice(0, s) for s in min_shape]
        self._data[slices] = old_data
        self._Len = _get_lenarray_empty(shape)
        _lenarray_copypad(self._Len, shape, old_len, old_data.shape)
        self._init(parent, "numpy", self._data, self._Len)
        self._restore_array_coupling()

    def _find_child(self, child_id):
        if self.storage == "numpy":
            for childname, ch in self._children.items():
                if child_id == id(ch):
                    return childname
        else:
            for childname, ch in enumerate(self._children):
                if child_id == id(ch):
                    return childname
        raise KeyError

    def _add_nonjson_child(self, child):
        assert self.storage != "numpy"
        njc = self._storage_nonjson_children
        child_id = id(child)
        if child_id not in njc:
            njc.add(child_id)
            if self.storage == "json":
                self.storage = "mixed"
                parent = self._parent()
                if parent is not None:
                    parent._add_nonjson_child(self)

    def _remove_nonjson_child(self, child):
        assert self.storage != "numpy"
        njc = self._storage_nonjson_children
        child_id = id(child)
        if child_id in njc:
            assert self.storage == "mixed", self.storage
            njc.remove(child_id)
            if len(njc) == 0:
                self.storage = "json"
                parent = self._parent()
                if parent is not None:
                    parent()._remove_nonjson_child(self)

    def numpy_shatter(self):
        """
        Breaks up a unified numpy storage into one numpy storage per child
        """
        assert self.storage == "numpy"
        assert not self._elementary
        parent = self._parent()
        if parent is not None and parent.storage == "numpy":
            parent.numpy_shatter()
        data = []
        children = []
        for child in self._get_children():
            d = datacopy(child._data)
            data.append(d)
            child._data = d
            children.append(child)
        self._data = data
        self._children = children
        self._storage_nonjson_children = set([id(p) for p in children])
        self.storage = "mixed"

    def _construct(self, prop_setter, *args):
        old_data = self._data
        old_children = self._children
        with _ArrayConstructContext(self):
            if self.storage == "numpy":
                if len(args) > len(self._data):
                    msg = "index {0} is out of bounds for axis with size {1}"\
                          .format(len(args), len(data))
                    raise IndexError(msg)
                if self._elementary:
                    _set_numpy_ele_range(self, 0, len(args), args, self._arity)
                else:
                    for anr, a in enumerate(args):
                        child = self._get_child(anr)
                        child.set(args[anr], prop_setter=prop_setter)
            else:
                if self._elementary:
                    newdata = []
                    for anr, a in enumerate(args):
                        v = self._element(a)
                        newdata.append(v)
                    self._data[:] = newdata
                else:
                    for n in range(self._len, len(args)):
                        if issubclass(self._element, SilkArray):
                            self._data.append([])
                        else:
                            self._data.append({})
                        child = self._element(
                            _mode="parent",
                            storage=self.storage,
                            parent=self,
                            data_store=self._data[n],
                            len_data_store=self._get_child_lengths(n))
                        self._children.append(child)

                    for n in range(len(args)):
                        child = self._children[n]
                        child.set(args[n], prop_setter=prop_setter)

                    if len(args) < self._len:
                        self._children[:] = self._children[:len(args)]
                        if self.storage == "numpy":
                            self._data[len(args):] = \
                              np.zeros_like(self._data[len(args):])
                        else:
                            self._data[:] = self._data[:len(args)]

        self._len = len(args)

    def _construct_from_numpy(self, arr, lengths):
        if self.storage != "numpy":
            self._init(self._parent(), "numpy", arr, lengths)
            self.make_json()
            return

        self._check_numpy_args(arr,
                               lengths,
                               self_data=self._data,
                               length_can_be_none=False)
        if lengths is None:
            lengths = _get_lenarray_full(arr.shape)

        self._data = datacopy(arr)
        self._Len = lengths.copy()
        self._restore_array_coupling()

    def _parse(self, s):
        raise NotImplementedError  # can be user-defined

    _storage_names = ("numpy", "json", "mixed")

    @property
    def storage(self):
        return self._storage_names[self._storage_enum]

    @storage.setter
    def storage(self, storage):
        assert storage in self._storage_names, storage
        self._storage_enum = self._storage_names.index(storage)

    def __dir__(self):
        return dir(type(self))

    def __setattr__(self, attr, value):
        if attr.startswith("_") or attr == "storage":
            object.__setattr__(self, attr, value)
        else:
            self._set_prop(attr, value, _prop_setter_any)

    def __getitem__(self, item):
        if isinstance(item, slice):
            return type(self)(
                [self[v] for v in range(*item.indices(len(self)))])
        if not isinstance(item, int):
            msg = "{0} indices must be integers or slices, not {1}"
            raise TypeError(
                msg.format(self.__class__.__name__, item.__class__.__name__))
        if self._elementary:
            if self.storage == "numpy":
                return _get_numpy_ele_prop(self, item, self._len)
            else:
                return self._data[:self._len][item]
        else:
            return self._get_child(item)

    def _set_prop(self, item, value, prop_setter=_prop_setter_any):
        if self._elementary:
            if self.storage == "numpy":
                _set_numpy_ele_prop(self, item, value)
            else:
                if item < 0:
                    item = self._len - item
                elif item >= self._len:
                    raise IndexError(item)
                self._data[item] = self._element(value)
        else:
            child = self._get_child(item)
            child.set(value, prop_setter=prop_setter)

    def __setitem__(self, item, value):
        if isinstance(item, slice):
            start, stop, stride = item.indices(self._len)
            indices = list(range(start, stop, stride))
            if len(indices) != len(value):
                msg = "Cannot assign to a slice of length %d using \
a sequence of length %d"

                raise IndexError(msg % (len(indices), len(value)))
            for n in indices:
                self._set_prop(n, value[n])
            return
        elif isinstance(item, int):
            self._set_prop(item, value)
        else:
            msg = "{0} indices must be integers or slices, not {1}"
            raise TypeError(
                msg.format(self.__class__.__name__, item.__class__.__name__))

    def __delitem__(self, item):
        if isinstance(item, slice):
            start, stop, stride = item.indices(self._len)
            indices = list(range(start, stop, stride))
            for n in reversed(indices):
                self.pop(n)
            return
        if not isinstance(item, int):
            msg = "{0} indices must be integers or slices, not {1}"
            raise TypeError(
                msg.format(self.__class__.__name__, item.__class__.__name__))
        self.pop(item)

    def pop(self, index=-1):
        if not isinstance(index, int):
            msg = "{0} indices must be integers, not {1}"
            raise TypeError(
                msg.format(self.__class__.__name__, index.__class__.__name__))
        if index < 0:
            index += self._len
        if index < 0:
            raise IndexError
        if self.storage == "numpy":
            ret_data = datacopy(self._data[index])
            ret_lengths = None
            if self._arity > 1:
                ret_lengths = _get_lenarray_empty(ret_data.shape)
            ret = self._element(
                _mode="from_numpy",
                data_store=ret_data,
                len_data_store=ret_lengths,
            )
            self._data[index:self._len - 1] = self._data[index + 1:self._len]
            try:
                self._data[self._len - 1] = np.zeros_like(
                    self._data[self._len - 1])
            except ValueError:  # numpy bug
                for field in self._data.dtype.fields:
                    self._data[self._len - 1][field] = np.zeros_like(
                        self._data[self._len - 1][field])
            self._del_child_lengths(index)
        elif self._elementary:
            ret = self._data[:self._len][index]
            self._data.__delitem__(index)
        else:
            ret = self._children[:self._len][index].copy()
            self._children.__delitem__(index)
            self._data.__delitem__(index)
        self._len -= 1
        return ret

    def _Len__(self):
        return self._len

    def _print(self, spaces):
        ret = "{0} (\n".format(self.__class__.__name__)
        for n in range(self._len):
            if self._elementary:
                value = self._data[n]
                if self.storage == "numpy":
                    if value.dtype.kind == 'S':
                        substr = '"' + value.decode() + '"'
                    else:
                        substr = str(value)
                else:
                    substr = value._print(spaces + 2)
            else:
                value = self._get_child(n)
                substr = value._print(spaces + 2)
            ret += "{0}{1},\n".format(" " * (spaces + 2), substr)
        ret += "{0})".format(" " * spaces)
        return ret

    def __str__(self):
        return self._print(0)

    def __repr__(self):
        return self._print(0)

    def clear(self):
        self.set([])

    def append(self, item):
        self.insert(self._len, item)

    def insert(self, index, item):
        if not isinstance(index, int):
            msg = "{0} indices must be integers, not {1}"
            raise TypeError(
                msg.format(self.__class__.__name__, index.__class__.__name__))
        if index < 0:
            index += self._len
        if index < 0:
            raise IndexError
        if self.storage == "numpy":
            if self._len >= len(self._data):
                raise IndexError("Numpy array overflows allocated space")
            if not self._elementary:
                ele = self._element(item)
                child_data = ele.make_numpy()
                child_lengths = None
                if self._arity > 1:
                    child_lengths = ele.lengths()
                self._data[index + 1:self._len +
                           1] = self._data[index:self._len]
                if self._arity > 1:
                    slices = [slice(0, v) for v in child_data.shape]
                    self._data[index][slices] = child_data
                else:
                    self._data[index] = child_data
                self._insert_child_lengths(index, child_lengths)
            else:
                self._data[self._len] = item  # dry run
                self._data[index + 1:self._len +
                           1] = self._data[index:self._len]
                self._data[index] = item  # should give no exception now
            self._len += 1
        else:
            with _ArrayInsertContext(self, index):
                if self._elementary:
                    self._data[index] = item
                else:
                    child = self._element(
                        _mode="parent",
                        storage=self.storage,
                        parent=self,
                        data_store=self._data[index],
                        len_data_store=self._get_child_lengths(index))
                    self._children.insert(index, child)
                    child.set(item)

    def __eq__(self, other):
        if not isinstance(other, SilkArray):
            return False
        if self.storage == other.storage == "json":
            return self._data == other._data
        else:  #can't use numpy _data because of PTR and different allocation sizes
            return self.json() == other.json()

    def __len__(self):
        return self._len

    def _clear_data(self):
        d = self._data
        if self.storage == "numpy":
            d[:] = np.zeros_like(d)
        else:
            for child in self._get_children():
                child._clear_data()