def __setitem__(self, key, value): if is_float(key): msg = 'array index cannot be float; please cast to int' raise KeyError(msg) elif is_bool(key): msg = 'logical indexing must provide an iterable of full length' raise KeyError(msg) elif is_string(key): msg = 'array index cannot be string' raise TypeError(msg) elif isinstance(key, tuple): msg = ('tuple is ambiguous because it can refer to dual-indexing; ' 'convert index to list') raise TypeError(msg) elif is_integer(key): if self._is_valid_dtype_element(value): self._data.iloc[key] = value else: msg = 'value type does not match array dtype = {}' raise ValueError(msg.format(self.dtype.__name__)) else: if is_scalar(value): if self._is_valid_dtype_element(value): self._data.iloc[key] = value else: msg = 'value type does not match array dtype = {}' raise ValueError(msg.format(self.dtype.__name__)) else: if self._is_valid_dtype_iterable(value): self._data.iloc[key] = value else: msg = 'value type does not match array dtype = {}' raise ValueError(msg.format(self.dtype.__name__)) self._data = self._data.astype(object) self.dtype = infer_dtype(self._data)
def __delitem__(self, key): if is_float(key): msg = 'array index cannot be float; please cast to int' raise KeyError(msg) elif is_bool(key): msg = 'logical indexing must provide an iterable of full length' raise KeyError(msg) elif is_string(key): msg = 'array index cannot be string' raise TypeError(msg) elif isinstance(key, tuple): msg = ('tuple is ambiguous because it can refer to dual-indexing; ' 'convert index to list') raise TypeError(msg) elif is_integer(key): # We can only drop by pd.Series index (not using row numbers). # So, we ensure that the Series index is the same as row numbers. self._data.reset_index(drop=True, inplace=True) del self._data[key] self.dtype = infer_dtype(self._data) elif isinstance(key, slice): key = range(*key.indices(len(self))) self._del_by_iterable(key) elif isinstance(key, Iterable): if infer_dtype(key) is bool: key = self._convert_logical_index_to_int_index(key) self._del_by_iterable(key) self.dtype = infer_dtype(self._data) else: msg = 'index can only be int or iterable (int, bool)' raise IndexError(msg)
def short_str(x, n_chars=5): assert is_string(x) assert is_integer(n_chars) assert n_chars > 0 if len(x) < ((2 * n_chars) + 5): return x else: return '{}...{}'.format(x[:(n_chars + 1)], x[-n_chars:])
def nice_str(x): if hasattr(x, '__name__'): output = x.__name__ elif is_string(x): output = repr(x) else: output = str(x) output = short_str(output) return output
def isin(self, values): if isinstance(values, Iterable) and not is_string(values): output = [False] * len(self) for i, e in enumerate(self): for _, v in enumerate(values): output[i] = identical(e, v) if output[i] is True: break return Array(output) else: msg = 'values must be an iterable container' raise ValueError(msg)
def _append_new_column(self, name, value): assert is_string(name) assert name not in self._names tmp = self._create_array(value) if len(tmp) == self._nrow: # FIXME: Override Array.append by checking for type self._names.extend(Array([name])) self._data.extend(Array([tmp])) self._update_nrow_ncol() self._update_names_to_index() else: msg = 'value does not have match existing number of rows = {}' raise ValueError(msg.format(self._nrow))
def __setitem__(self, key, value): if is_float(key): msg = 'float index is not supported; please cast to int' raise KeyError(msg) elif is_bool(key): msg = 'logical indexing must provide a list of full length' raise KeyError(msg) elif is_integer(key): self._setitem_using_int_key(key, value) elif is_string(key): if key in self._names: self._setitem_using_int_key(self._names_to_index[key], value) else: self._append_new_column(key, value) elif isinstance(key, (slice, Iterable)) and not isinstance(key, tuple): key = self._parse_colkey(key) if is_scalar(value): for k in key: self._setitem_using_int_key(k, value) elif isinstance(value, np.array): self._setitem_using_list_of_int_key_numpy_value(key, value) elif isinstance(value, pd.Series): msg = ('pandas Series is not supported, please use pandas ' 'DataFrame instead') raise ValueError(msg) elif isinstance(value, pd.DataFrame): self._setitem_using_list_of_int_key_pandas_value(key, value) elif isinstance(value, Iterable): if len(key) == len(value): for k, v in zip(key, value): self._setitem_using_int_key(k, v) else: msg = ('key and value do not have the same number ' 'of columns') raise ValueError(msg) else: msg = 'cannot assign {} type value'.format(type(value)) raise ValueError(msg) elif isinstance(key, tuple): # Dual Indexing. Set both rows and columns. if len(key) == 2: rowkey = key[0] colkey = key[1] self._setitem_using_rowkey_colkey(rowkey, colkey, value) else: msg = 'tuple indexing must have exactly 2 elements' raise KeyError(msg) else: # Catchall for all other addresses msg = 'key must be int, string, list, slice, or a 2-tuple' raise KeyError(msg)
def _setitem_using_rowkey_colkey(self, rowkey, colkey, value): if is_float(colkey): msg = 'float index is not supported; please cast to int' raise KeyError(msg) elif is_bool(colkey): msg = 'logical indexing must provide a list of full length' raise KeyError(msg) elif is_integer(colkey): self._data[colkey][rowkey] = value elif is_string(colkey): colkey = self._names_to_index[colkey] self._data[colkey][rowkey] = value elif isinstance(colkey, (slice, list)): colkey = self._parse_colkey(colkey) if is_scalar(value): for k in colkey: self._data[k][rowkey] = value elif isinstance(value, np.array): self._setitem_elements_using_list_of_int_key_numpy_value( rowkey, colkey, value) elif isinstance(value, pd.Series): msg = ('pandas Series is not supported, please use pandas ' 'DataFrame instead') raise ValueError(msg) elif isinstance(value, pd.DataFrame): self._setitem_elements_using_list_of_int_key_pandas_value( rowkey, colkey, value) elif isinstance(value, Iterable): if len(colkey) == len(value): for k, v in zip(colkey, value): self._data[k][rowkey] = v else: msg = ('key and value do not have the same number ' 'of columns') raise ValueError(msg) else: msg = 'cannot assign {} type value'.format(type(value)) raise ValueError(msg) else: # Catchall for all other addresses msg = 'column key must be int, string, list, or slice' raise KeyError(msg)
def __getitem__(self, key): if is_float(key): msg = 'array index cannot be float; please cast to int' raise TypeError(msg) elif is_bool(key): msg = 'logical indexing must provide an iterable of full length' raise TypeError(msg) elif is_string(key): msg = 'array index cannot be string' raise TypeError(msg) elif isinstance(key, tuple): msg = ('tuple is ambiguous because it can refer to dual-indexing; ' 'convert index to list') raise TypeError(msg) elif is_integer(key): return self._data.iloc[key] elif isinstance(key, Iterable) and infer_dtype(key) is bool: key = self._convert_logical_index_to_int_index(key) return type(self)(_ArraySlice(self._data.iloc[key])) else: return type(self)(_ArraySlice(self._data.iloc[key]))
def __delitem__(self, key): if is_float(key): msg = 'float index is not supported; please cast to int' raise KeyError(msg) elif is_bool(key): msg = 'logical indexing must provide a list of full length' raise KeyError(msg) elif is_integer(key): key = [key] self._delitem_colkey(key) elif is_string(key): key = [self._names_to_index[key]] self._delitem_colkey(key) elif isinstance(key, (slice, Iterable)) and not isinstance(key, tuple): self._delitem_colkey(key) elif isinstance(key, tuple): # Dual Indexing. Set both rows and columns. if len(key) == 2: rowkey = key[0] colkey = key[1] if isinstance(colkey, tuple): colkey = list(colkey) if colkey == slice(None): # Form: del df[<something>, :] self._delitem_rowkey(rowkey) else: # colkey is not `:` if rowkey == slice(None): # Form: del df[:, <something-but-not-:>] del self[colkey] else: # Neither colkey nor rowkey is `:` msg = 'either row key or column key must be :' raise KeyError(msg) else: msg = 'tuple indexing must have exactly 2 elements' raise KeyError(msg)
def __getitem__(self, key): if is_float(key): msg = 'float index is not supported; please cast to int' raise KeyError(msg) elif is_bool(key): msg = 'logical indexing must provide a list of full length' raise KeyError(msg) elif is_integer(key): return self._data[key] elif is_string(key): return self._data[self._names_to_index[key]] elif isinstance(key, slice): return type(self)(_DataFrameSlice(self._data[key], self._names[key])) elif isinstance(key, Iterable) and not isinstance(key, tuple): if is_iterable_string(key): key = [self._names_to_index[k] for k in key] if not is_iterable_unique(self._names[key]): msg = 'duplicate column names found' raise KeyError(msg) return type(self)(_DataFrameSlice(self._data[key], self._names[key])) elif isinstance(key, tuple): # Dual Indexing. Select both rows and columns. if len(key) == 2: rowkey = key[0] colkey = key[1] if is_float(colkey): msg = ('float column index is not supported; ' 'please cast to int') raise KeyError(msg) elif is_bool(colkey): msg = ('logical column indexing must provide a ' 'list of full length') raise KeyError(msg) elif is_integer(colkey): return self._data[colkey][rowkey] elif is_string(colkey): return self._data[self._names_to_index[colkey]][rowkey] elif isinstance(colkey, (slice, Iterable)): if isinstance(colkey, Iterable): if is_iterable_string(colkey): colkey = [self._names_to_index[k] for k in colkey] _names = self._names[colkey] if not is_iterable_unique(_names): msg = 'duplicate column names found' raise KeyError(msg) if is_integer(rowkey): rowkey = [rowkey] _data = Array( [column[rowkey] for column in self._data[colkey]]) return type(self)(_DataFrameSlice(_data, _names)) else: # Catchall for all other column addresses msg = ('column address must be int, string, slice,' ' or iterable') raise KeyError(msg) else: msg = 'tuple indexing must have exactly 2 elements' raise KeyError(msg) elif isinstance(key, Iterable): return self[list(key)] else: # Catchall for all other addresses msg = 'address must be int, string, list, slice, or a 2-tuple' raise KeyError(msg)