def __setitem__(self, key, value): if is_float(key): msg = 'array index cannot be float; please cast to int' raise KeyError(msg) elif is_bool(key): msg = 'logical indexing must provide an iterable of full length' raise KeyError(msg) elif is_string(key): msg = 'array index cannot be string' raise TypeError(msg) elif isinstance(key, tuple): msg = ('tuple is ambiguous because it can refer to dual-indexing; ' 'convert index to list') raise TypeError(msg) elif is_integer(key): if self._is_valid_dtype_element(value): self._data.iloc[key] = value else: msg = 'value type does not match array dtype = {}' raise ValueError(msg.format(self.dtype.__name__)) else: if is_scalar(value): if self._is_valid_dtype_element(value): self._data.iloc[key] = value else: msg = 'value type does not match array dtype = {}' raise ValueError(msg.format(self.dtype.__name__)) else: if self._is_valid_dtype_iterable(value): self._data.iloc[key] = value else: msg = 'value type does not match array dtype = {}' raise ValueError(msg.format(self.dtype.__name__)) self._data = self._data.astype(object) self.dtype = infer_dtype(self._data)
def _init_from_dict(self, data): scalarity_per_value = [is_scalar(value) for value in data.values()] if all(scalarity_per_value): # Box all scalar values _data = [Array(value) for value in data.values()] elif any(scalarity_per_value): # At least one value is scalar but all values are not scalars # Allocate a list and put non-scalar values inside. _data = [None] * len(data) length_per_value = [None] * len(data) for i, value in enumerate(data.values()): if not scalarity_per_value[i]: _data[i] = Array(value) length_per_value[i] = len(_data[i]) # All non-scalar columns must have the same length or # we raise a ValueError length_non_scalars = set([ length for length, scalarity in zip( length_per_value, scalarity_per_value) if not scalarity ]) if len(length_non_scalars) > 1: msg = 'columns do not have the same length' raise ValueError(msg) elif len(length_non_scalars) == 0: msg = 'you found a bug, please report it' raise InternalError(msg) else: length = list(length_non_scalars)[0] # Now that we have the length, we can fill out the columns # using scalars. for i, value in enumerate(data.values()): if scalarity_per_value[i]: _data[i] = Array([value] * length) else: # All values are non-scalars. No need to box them. _data = [Array(value) for value in data.values()] # Ensure dict keys are string types if not is_iterable_string(data.keys()): msg = 'non string names are not allowed' raise ValueError(msg) else: _names = data.keys() # Ensure all columns have the same length if not is_list_same([len(column) for column in _data]): msg = 'columns do not have the same lengths' raise ValueError(msg) # Set curated internal vars self._data = Array(_data) self._names = Array(_names) # Update all other fields self._update_nrow_ncol() self._update_names_to_index()
def __mod__(self, other): if is_scalar(other): return Array([__mod__(e, other) for e in self]) elif isinstance(other, Iterable): if len(self) == get_length(other): return Array([__mod__(x, y) for x, y in zip(self, other)]) else: msg = 'iterables have different lengths' raise ValueError(msg) else: msg = 'cannot perform this operation with {} object' raise ValueError(msg.format(type(object)))
def __setitem__(self, key, value): if is_float(key): msg = 'float index is not supported; please cast to int' raise KeyError(msg) elif is_bool(key): msg = 'logical indexing must provide a list of full length' raise KeyError(msg) elif is_integer(key): self._setitem_using_int_key(key, value) elif is_string(key): if key in self._names: self._setitem_using_int_key(self._names_to_index[key], value) else: self._append_new_column(key, value) elif isinstance(key, (slice, Iterable)) and not isinstance(key, tuple): key = self._parse_colkey(key) if is_scalar(value): for k in key: self._setitem_using_int_key(k, value) elif isinstance(value, np.array): self._setitem_using_list_of_int_key_numpy_value(key, value) elif isinstance(value, pd.Series): msg = ('pandas Series is not supported, please use pandas ' 'DataFrame instead') raise ValueError(msg) elif isinstance(value, pd.DataFrame): self._setitem_using_list_of_int_key_pandas_value(key, value) elif isinstance(value, Iterable): if len(key) == len(value): for k, v in zip(key, value): self._setitem_using_int_key(k, v) else: msg = ('key and value do not have the same number ' 'of columns') raise ValueError(msg) else: msg = 'cannot assign {} type value'.format(type(value)) raise ValueError(msg) elif isinstance(key, tuple): # Dual Indexing. Set both rows and columns. if len(key) == 2: rowkey = key[0] colkey = key[1] self._setitem_using_rowkey_colkey(rowkey, colkey, value) else: msg = 'tuple indexing must have exactly 2 elements' raise KeyError(msg) else: # Catchall for all other addresses msg = 'key must be int, string, list, slice, or a 2-tuple' raise KeyError(msg)
def _setitem_using_rowkey_colkey(self, rowkey, colkey, value): if is_float(colkey): msg = 'float index is not supported; please cast to int' raise KeyError(msg) elif is_bool(colkey): msg = 'logical indexing must provide a list of full length' raise KeyError(msg) elif is_integer(colkey): self._data[colkey][rowkey] = value elif is_string(colkey): colkey = self._names_to_index[colkey] self._data[colkey][rowkey] = value elif isinstance(colkey, (slice, list)): colkey = self._parse_colkey(colkey) if is_scalar(value): for k in colkey: self._data[k][rowkey] = value elif isinstance(value, np.array): self._setitem_elements_using_list_of_int_key_numpy_value( rowkey, colkey, value) elif isinstance(value, pd.Series): msg = ('pandas Series is not supported, please use pandas ' 'DataFrame instead') raise ValueError(msg) elif isinstance(value, pd.DataFrame): self._setitem_elements_using_list_of_int_key_pandas_value( rowkey, colkey, value) elif isinstance(value, Iterable): if len(colkey) == len(value): for k, v in zip(colkey, value): self._data[k][rowkey] = v else: msg = ('key and value do not have the same number ' 'of columns') raise ValueError(msg) else: msg = 'cannot assign {} type value'.format(type(value)) raise ValueError(msg) else: # Catchall for all other addresses msg = 'column key must be int, string, list, or slice' raise KeyError(msg)
def _create_array(self, value): if is_scalar(value): return Array([value] * self._nrow) else: return Array(value)