def dict_to_manager(sdict, columns, index): """ create and return the block manager from a dict of series, columns, index """ # from BlockManager perspective axes = [_ensure_index(columns), _ensure_index(index)] return create_block_manager_from_arrays([sdict[c] for c in columns], columns, axes)
def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): from pandas.core.panel import _get_combined_index output = {} for item, df in compat.iteritems(frames): if not isinstance(df, SparseDataFrame): df = SparseDataFrame(df, default_kind=kind, default_fill_value=fill_value) output[item] = df if index is None: all_indexes = [df.index for df in output.values()] index = _get_combined_index(all_indexes) if columns is None: all_columns = [df.columns for df in output.values()] columns = _get_combined_index(all_columns) index = _ensure_index(index) columns = _ensure_index(columns) for item, df in compat.iteritems(output): if not (df.index.equals(index) and df.columns.equals(columns)): output[item] = df.reindex(index=index, columns=columns) return output, index, columns
def __setstate__(self, state): frames, items, major, minor, fv, kind = state self.default_fill_value = fv self.default_kind = kind self._items = _ensure_index(com._unpickle_array(items)) self._major_axis = _ensure_index(com._unpickle_array(major)) self._minor_axis = _ensure_index(com._unpickle_array(minor)) self._frames = frames
def __setstate__(self, state): frames, items, major, minor, fv, kind = state from pandas.io.pickle import _unpickle_array self.default_fill_value = fv self.default_kind = kind self._items = _ensure_index(_unpickle_array(items)) self._major_axis = _ensure_index(_unpickle_array(major)) self._minor_axis = _ensure_index(_unpickle_array(minor)) self._frames = frames
def __init__(self, values, items, ref_items, ndim=2): if issubclass(values.dtype.type, basestring): values = np.array(values, dtype=object) assert(values.ndim == ndim) assert(len(items) == len(values)) self._ref_locs = None self.values = values self.ndim = ndim self.items = _ensure_index(items) self.ref_items = _ensure_index(ref_items)
def __init__(self, values, items, ref_items, ndim=2, do_integrity_check=False): if issubclass(values.dtype.type, basestring): values = np.array(values, dtype=object) assert values.ndim == ndim assert len(items) == len(values) self.values = values self.ndim = ndim self.items = _ensure_index(items) self.ref_items = _ensure_index(ref_items) if do_integrity_check: self._check_integrity()
def __init__(self, values, items, ref_items, ndim=2): if issubclass(values.dtype.type, basestring): values = np.array(values, dtype=object) if values.ndim != ndim: raise AssertionError('Wrong number of dimensions') if len(items) != len(values): raise AssertionError('Wrong number of items passed') self._ref_locs = None self.values = values self.ndim = ndim self.items = _ensure_index(items) self.ref_items = _ensure_index(ref_items)
def __init__(self, bins, binlabels, filter_empty=False, mutated=False, indexer=None): self.bins = _ensure_int64(bins) self.binlabels = _ensure_index(binlabels) self._filter_empty_groups = filter_empty self.mutated = mutated self.indexer = indexer
def _interleave(self, items): """ Return ndarray from blocks with specified item order Items must be contained in the blocks """ dtype = _interleaved_dtype(self.blocks) items = _ensure_index(items) result = np.empty(self.shape, dtype=dtype) itemmask = np.zeros(len(items), dtype=bool) # By construction, all of the item should be covered by one of the # blocks if items.is_unique: for block in self.blocks: indexer = items.get_indexer(block.items) if (indexer == -1).any(): raise AssertionError('Items must contain all block items') result[indexer] = block.get_values(dtype) itemmask[indexer] = 1 else: for block in self.blocks: mask = items.isin(block.items) indexer = mask.nonzero()[0] if (len(indexer) != len(block.items)): raise AssertionError('All items must be in block items') result[indexer] = block.get_values(dtype) itemmask[indexer] = 1 if not itemmask.all(): raise AssertionError('Some items were not contained in blocks') return result
def _set_levels(self, levels): from pandas.core.index import _ensure_index levels = _ensure_index(levels) if not levels.is_unique: raise ValueError("Categorical levels must be unique") self._levels = levels
def __init__(self, frames, items=None, major_axis=None, minor_axis=None, default_fill_value=np.nan, default_kind='block'): assert(isinstance(frames, dict)) self.default_fill_value = fill_value = default_fill_value self.default_kind = kind = default_kind # pre-filter, if necessary if items is None: items = Index(sorted(frames.keys())) items = _ensure_index(items) (clean_frames, major_axis, minor_axis) = _convert_frames(frames, major_axis, minor_axis, kind=kind, fill_value=fill_value) self._frames = clean_frames # do we want to fill missing ones? for item in items: if item not in clean_frames: raise Exception('column %s not found in data' % item) self._items = items self.major_axis = major_axis self.minor_axis = minor_axis
def reindex(self, index=None, method=None, copy=True, limit=None): """ Conform SparseSeries to new Index See Series.reindex docstring for general behavior Returns ------- reindexed : SparseSeries """ new_index = _ensure_index(index) if self.index.equals(new_index): if copy: return self.copy() else: return self if len(self.index) == 0: # FIXME: inelegant / slow values = np.empty(len(new_index), dtype=np.float64) values.fill(nan) return SparseSeries(values, index=new_index, fill_value=self.fill_value) new_index, fill_vec = self.index.reindex(index, method=method, limit=limit) new_values = common.take_1d(self.values, fill_vec) return SparseSeries(new_values, index=new_index, fill_value=self.fill_value, name=self.name)
def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, index_names=True, line_width=None, **kwds): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names if sparsify is None: sparsify = get_option("print.multi_sparse") self.sparsify = sparsify self.float_format = float_format self.formatters = formatters if formatters is not None else {} self.na_rep = na_rep self.col_space = col_space self.header = header self.index = index self.line_width = line_width if justify is None: self.justify = get_option("print.colheader_justify") else: self.justify = justify self.kwds = kwds if columns is not None: self.columns = _ensure_index(columns) self.frame = self.frame[self.columns] else: self.columns = frame.columns
def __init__(self, data=None, index=None, columns=None, default_kind='block', default_fill_value=None): if default_fill_value is None: default_fill_value = np.nan self.default_kind = default_kind self.default_fill_value = default_fill_value if isinstance(data, dict): sdict, columns, index = self._init_dict(data, index, columns) elif isinstance(data, (np.ndarray, list)): sdict, columns, index = self._init_matrix(data, index, columns) elif isinstance(data, DataFrame): sdict, columns, index = self._init_dict(data, data.index, data.columns) elif data is None: sdict = {} if index is None: index = Index([]) else: index = _ensure_index(index) if columns is None: columns = Index([]) else: for c in columns: sdict[c] = SparseSeries(np.nan, index=index, kind=self.default_kind, fill_value=self.default_fill_value) self._series = sdict self.columns = columns self.index = index
def __init__( self, frame, buf=None, columns=None, col_space=None, na_rep="NaN", formatters=None, float_format=None, sparsify=True, index_names=True, ): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names self.sparsify = sparsify self.float_format = float_format self.formatters = formatters self.na_rep = na_rep self.col_space = col_space if columns is not None: self.columns = _ensure_index(columns) else: self.columns = frame.columns
def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=True, index_names=True, **kwds): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names self.sparsify = sparsify self.float_format = float_format self.formatters = formatters self.na_rep = na_rep self.col_space = col_space self.header = header self.index = index if justify is None: self.justify = com.print_config.colheader_justify else: self.justify = justify self.kwds = kwds if columns is not None: self.columns = _ensure_index(columns) else: self.columns = frame.columns
def reindex_items(self, new_items, copy=True): """ """ new_items = _ensure_index(new_items) data = self if not data.is_consolidated(): data = data.consolidate() return data.reindex_items(new_items) # TODO: this part could be faster (!) new_items, indexer = self.items.reindex(new_items) # could have some pathological (MultiIndex) issues here new_blocks = [] if indexer is None: for blk in self.blocks: if copy: new_blocks.append(blk.reindex_items_from(new_items)) else: new_blocks.append(blk) else: for block in self.blocks: newb = block.reindex_items_from(new_items, copy=copy) if len(newb.items) > 0: new_blocks.append(newb) mask = indexer == -1 if mask.any(): extra_items = new_items[mask] na_block = self._make_na_block(extra_items, new_items) new_blocks.append(na_block) new_blocks = _consolidate(new_blocks, new_items) return BlockManager(new_blocks, [new_items] + self.axes[1:])
def _has_valid_type(self, key, axis): ax = self.obj._get_axis(axis) # valid for a label where all labels are in the index # slice of lables (where start-end in labels) # slice of integers (only if in the lables) # boolean if isinstance(key, slice): if ax.is_floating(): # allowing keys to be slicers with no fallback pass else: if key.start is not None: if key.start not in ax: raise KeyError("start bound [%s] is not the [%s]" % (key.start,self.obj._get_axis_name(axis))) if key.stop is not None: if key.stop not in ax: raise KeyError("stop bound [%s] is not in the [%s]" % (key.stop,self.obj._get_axis_name(axis))) elif com._is_bool_indexer(key): return True elif _is_list_like(key): # mi is just a passthru if isinstance(key, tuple) and isinstance(ax, MultiIndex): return True # require all elements in the index idx = _ensure_index(key) if not idx.isin(ax).all(): raise KeyError("[%s] are not in ALL in the [%s]" % (key,self.obj._get_axis_name(axis))) return True else: def error(): if isnull(key): raise ValueError("cannot use label indexing with a null key") raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis))) try: key = self._convert_scalar_indexer(key, axis) if not key in ax: error() except (TypeError) as e: # python 3 type errors should be raised if 'unorderable' in str(e): # pragma: no cover error() raise except: error() return True
def _get_concat_axis(self): if self._is_series: if self.axis == 0: indexes = [x.index for x in self.objs] elif self.keys is None: names = [] for x in self.objs: if not isinstance(x, Series): raise TypeError( "Cannot concatenate type 'Series' " "with object of type " "%r" % type(x).__name__ ) if x.name is not None: names.append(x.name) else: return Index(np.arange(len(self.objs))) return Index(names) else: return _ensure_index(self.keys) else: indexes = [x._data.axes[self.axis] for x in self.objs] if self.keys is None: concat_axis = _concat_indexes(indexes) else: concat_axis = _make_concat_multiindex(indexes, self.keys, self.levels, self.names) self._maybe_check_integrity(concat_axis) return concat_axis
def remove_unused_categories(self, inplace=False): """ Removes categories which are not used. Parameters ---------- inplace : boolean (default: False) Whether or not to drop unused categories inplace or return a copy of this categorical with unused categories dropped. Returns ------- cat : Categorical with unused categories dropped or None if inplace. See also -------- rename_categories reorder_categories add_categories remove_categories set_categories """ cat = self if inplace else self.copy() _used = sorted(np.unique(cat._codes)) new_categories = cat.categories.take(com._ensure_platform_int(_used)) new_categories = _ensure_index(new_categories) cat._codes = _get_codes_for_values(cat.__array__(), new_categories) cat._categories = new_categories if not inplace: return cat
def _extract_axis(data, axis=0, intersect=False): if len(data) == 0: index = Index([]) elif len(data) > 0: raw_lengths = [] indexes = [] have_raw_arrays = False have_frames = False for v in data.values(): if isinstance(v, DataFrame): have_frames = True indexes.append(v._get_axis(axis)) else: have_raw_arrays = True raw_lengths.append(v.shape[axis]) if have_frames: index = _get_combined_index(indexes, intersect=intersect) if have_raw_arrays: lengths = list(set(raw_lengths)) if len(lengths) > 1: raise ValueError('ndarrays must match shape on axis %d' % axis) if have_frames: assert(lengths[0] == len(index)) else: index = Index(np.arange(lengths[0])) return _ensure_index(index)
def _get_concat_axis(self): if self._is_series: if self.axis == 0: indexes = [x.index for x in self.objs] elif self.keys is None: names = [] for x in self.objs: if x.name is not None: names.append(x.name) else: return Index(np.arange(len(self.objs))) return Index(names) else: return _ensure_index(self.keys) else: indexes = [x._data.axes[self.axis] for x in self.objs] if self.keys is None: concat_axis = _concat_indexes(indexes) else: concat_axis = _make_concat_multiindex(indexes, self.keys, self.levels, self.names) self._maybe_check_integrity(concat_axis) return concat_axis
def reindex_axis(self, new_axis, method=None, axis=0, copy=True): new_axis = _ensure_index(new_axis) cur_axis = self.axes[axis] if new_axis.equals(cur_axis): if copy: result = self.copy(deep=True) result.axes[axis] = new_axis if axis == 0: # patch ref_items, #1823 for blk in result.blocks: blk.ref_items = new_axis return result else: return self if axis == 0: if method is not None: raise AssertionError('method argument not supported for ' 'axis == 0') return self.reindex_items(new_axis) new_axis, indexer = cur_axis.reindex(new_axis, method) return self.reindex_indexer(new_axis, indexer, axis=axis)
def _interleave(self, items): """ Return ndarray from blocks with specified item order Items must be contained in the blocks """ dtype = _interleaved_dtype(self.blocks) items = _ensure_index(items) result = np.empty(self.shape, dtype=dtype) itemmask = np.zeros(len(items), dtype=bool) # By construction, all of the item should be covered by one of the # blocks if items.is_unique: for block in self.blocks: indexer = items.get_indexer(block.items) assert((indexer != -1).all()) result[indexer] = block.get_values(dtype) itemmask[indexer] = 1 else: for block in self.blocks: mask = items.isin(block.items) indexer = mask.nonzero()[0] assert(len(indexer) == len(block.items)) result[indexer] = block.get_values(dtype) itemmask[indexer] = 1 assert(itemmask.all()) return result
def set_axis(self, axis, value): cur_axis = self.axes[axis] if len(value) != len(cur_axis): raise Exception("Length mismatch (%d vs %d)" % (len(value), len(cur_axis))) self.axes[axis] = _ensure_index(value) if axis == 0: for block in self.blocks: block.set_ref_items(self.items, maybe_rename=True)
def remove_unused_levels(self): """ Removes levels which are not used. The level removal is done inplace. """ _used = sorted(np.unique(self._codes)) new_levels = self.levels.take(_used) new_levels = _ensure_index(new_levels) self._codes = _get_codes_for_values(self.__array__(), new_levels) self._levels = new_levels
def __init__(self, labels, levels, name=None): from pandas.core.index import _ensure_index levels = _ensure_index(levels) if not levels.is_unique: raise ValueError('Factor levels must be unique') self.labels = labels self.levels = levels self.name = name
def __set__(self, obj, value): value = _ensure_index(value) if isinstance(value, MultiIndex): raise NotImplementedError for v in compat.itervalues(obj._frames): setattr(v, self.frame_attr, value) setattr(obj, self.cache_field, value)
def __init__(self, blocks, axes, do_integrity_check=True): self.axes = [_ensure_index(ax) for ax in axes] self.blocks = blocks ndim = len(axes) for block in blocks: assert(ndim == block.values.ndim) if do_integrity_check: self._verify_integrity()
def __setstate__(self, state): # discard anything after 3rd, support beta pickling format for a little # while longer ax_arrays, bvalues, bitems = state[:3] self.axes = [_ensure_index(ax) for ax in ax_arrays] blocks = [] for values, items in zip(bvalues, bitems): blk = make_block(values, items, self.axes[0], do_integrity_check=True) blocks.append(blk) self.blocks = blocks
def __new__(cls, data, index=None, sparse_index=None, kind='block', fill_value=None, name=None, copy=False): is_sparse_array = isinstance(data, SparseArray) if fill_value is None: if is_sparse_array: fill_value = data.fill_value else: fill_value = nan if is_sparse_array: if isinstance(data, SparseSeries) and index is None: index = data.index elif index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index values = np.asarray(data) elif isinstance(data, (Series, dict)): if index is None: index = data.index data = Series(data) values, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) elif np.isscalar(data): # pragma: no cover if index is None: raise Exception('must pass index!') values = np.empty(len(index)) values.fill(data) # TODO: more efficient values, sparse_index = make_sparse(values, kind=kind, fill_value=fill_value) else: # array-like if sparse_index is None: values, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) else: values = data assert (len(values) == sparse_index.npoints) if index is None: index = Index(np.arange(sparse_index.length)) index = _ensure_index(index) # Create array, do *not* copy data by default if copy: subarr = np.array(values, dtype=np.float64, copy=True) else: subarr = np.asarray(values, dtype=np.float64) if index.is_all_dates: cls = SparseTimeSeries # Change the class of the array to be the subclass type. output = subarr.view(cls) output.sp_index = sparse_index output.fill_value = np.float64(fill_value) output.index = index output.name = name return output
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): zipped = zip(*keys) if names is None: names = [None] * len(zipped) if levels is None: levels = [Factor(zp).levels for zp in zipped] else: levels = [_ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [_ensure_index(keys)] else: levels = [_ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] # things are potentially different sizes, so compute the exact labels # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): i = level.get_loc(key) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) label_list.extend(concat_index.labels) else: factor = Factor(concat_index) levels.append(factor.levels) label_list.append(factor.labels) # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct labels new_labels = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): mapped = level.get_indexer(hlevel) new_labels.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) new_names.extend(new_index.names) else: new_levels.append(new_index) new_names.append(new_index.name) new_labels.append(np.tile(np.arange(n), kpieces)) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
def __setstate__(self, state): items, ref_items, values = state self.items = _ensure_index(items) self.ref_items = _ensure_index(ref_items) self.values = values self.ndim = values.ndim
def set_items_norename(self, value): value = _ensure_index(value) self.axes[0] = value for block in self.blocks: block.set_ref_items(value, maybe_rename=False)
def __init__(self, data=None, index=None, sparse_index=None, kind='block', fill_value=None, name=None, dtype=None, copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() else: if data is None: data = [] if isinstance(data, Series) and name is None: name = data.name if isinstance(data, SparseArray): if index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value data = np.asarray(data) elif isinstance(data, SparseSeries): if index is None: index = data.index.view() if fill_value is None: fill_value = data.fill_value # extract the SingleBlockManager data = data._data elif isinstance(data, (Series, dict)): data = Series(data, index=index) index = data.index.view() res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res else: assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype) if index is None: index = data.index.view() else: data = data.reindex(index, copy=False) else: length = len(index) if data == fill_value or (isna(data) and isna(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) data = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) v = data data = np.empty(length) data.fill(v) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): if copy: data = data.copy() else: # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) data = SingleBlockManager(data, index) generic.NDFrame.__init__(self, data) self.index = index self.name = name
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): zipped = lzip(*keys) if names is None: names = [None] * len(zipped) if levels is None: levels = [Categorical.from_array(zp).levels for zp in zipped] else: levels = [_ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [_ensure_index(keys)] else: levels = [_ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] # things are potentially different sizes, so compute the exact labels # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): try: i = level.get_loc(key) except KeyError: raise ValueError('Key %s not in level %s' % (str(key), str(level))) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) label_list.extend(concat_index.labels) else: factor = Categorical.from_array(concat_index) levels.append(factor.levels) label_list.append(factor.labels) if len(names) == len(levels): names = list(names) else: # make sure that all of the passed indices have the same nlevels if not len(set([i.nlevels for i in indexes])) == 1: raise AssertionError("Cannot concat indices that do" " not have the same number of levels") # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct labels new_labels = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): hlevel = _ensure_index(hlevel) mapped = level.get_indexer(hlevel) mask = mapped == -1 if mask.any(): raise ValueError('Values not found in passed level: %s' % str(hlevel[mask])) new_labels.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) else: new_levels.append(new_index) new_labels.append(np.tile(np.arange(n), kpieces)) if len(new_names) < len(new_levels): new_names.extend(new_index.names) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
def __init__(self, data=None, index=None, columns=None, default_kind=None, default_fill_value=None, dtype=None, copy=False): # pick up the defaults from the Sparse structures if isinstance(data, SparseDataFrame): if index is None: index = data.index if columns is None: columns = data.columns if default_fill_value is None: default_fill_value = data.default_fill_value if default_kind is None: default_kind = data.default_kind elif isinstance(data, (SparseSeries, SparseArray)): if index is None: index = data.index if default_fill_value is None: default_fill_value = data.fill_value if columns is None and hasattr(data, 'name'): columns = [data.name] if columns is None: raise Exception("cannot pass a series w/o a name or columns") data = {columns[0]: data} if default_fill_value is None: default_fill_value = np.nan if default_kind is None: default_kind = 'block' self._default_kind = default_kind self._default_fill_value = default_fill_value if is_scipy_sparse(data): mgr = self._init_spmatrix(data, index, columns, dtype=dtype, fill_value=default_fill_value) elif isinstance(data, dict): mgr = self._init_dict(data, index, columns, dtype=dtype) elif isinstance(data, (np.ndarray, list)): mgr = self._init_matrix(data, index, columns, dtype=dtype) elif isinstance(data, SparseDataFrame): mgr = self._init_mgr(data._data, dict(index=index, columns=columns), dtype=dtype, copy=copy) elif isinstance(data, DataFrame): mgr = self._init_dict(data, data.index, data.columns, dtype=dtype) elif isinstance(data, BlockManager): mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) elif data is None: data = DataFrame() if index is None: index = Index([]) else: index = _ensure_index(index) if columns is None: columns = Index([]) else: for c in columns: data[c] = SparseArray(np.nan, index=index, kind=self._default_kind, fill_value=self._default_fill_value) mgr = to_manager(data, columns, index) if dtype is not None: mgr = mgr.astype(dtype) generic.NDFrame.__init__(self, mgr)
def __new__(cls, data, index=None, sparse_index=None, kind='block', fill_value=None, name=None, copy=False): is_sparse_array = isinstance(data, SparseArray) if fill_value is None: if is_sparse_array: fill_value = data.fill_value else: fill_value = nan if is_sparse_array: if isinstance(data, SparseSeries) and index is None: index = data.index elif index is not None: if not (len(index) == len(data)): raise AssertionError() sparse_index = data.sp_index values = np.asarray(data) elif isinstance(data, (Series, dict)): if index is None: index = data.index data = Series(data) values, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: values, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) else: values = data if not (len(values) == sparse_index.npoints): raise AssertionError() else: if index is None: raise TypeError('must pass index!') length = len(index) if data == fill_value or (isnull(data) and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) values = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) values = np.empty(length) values.fill(data) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # Create array, do *not* copy data by default if copy: subarr = np.array(values, dtype=np.float64, copy=True) else: subarr = np.asarray(values, dtype=np.float64) if index.is_all_dates: cls = SparseTimeSeries # Change the class of the array to be the subclass type. output = subarr.view(cls) output.sp_index = sparse_index output.fill_value = np.float64(fill_value) output.index = index output.name = name return output
def _set_columns(self, cols): if len(cols) != len(self._series): raise Exception('Columns length %d did not match data %d!' % (len(cols), len(self._series))) self._columns = _ensure_index(cols)
def _has_valid_type(self, key, axis): ax = self.obj._get_axis(axis) # valid for a label where all labels are in the index # slice of lables (where start-end in labels) # slice of integers (only if in the lables) # boolean if isinstance(key, slice): if ax.is_floating(): # allowing keys to be slicers with no fallback pass else: if key.start is not None: if key.start not in ax: raise KeyError( "start bound [%s] is not the [%s]" % (key.start, self.obj._get_axis_name(axis))) if key.stop is not None: if key.stop not in ax: raise KeyError( "stop bound [%s] is not in the [%s]" % (key.stop, self.obj._get_axis_name(axis))) elif com._is_bool_indexer(key): return True elif _is_list_like(key): # mi is just a passthru if isinstance(key, tuple) and isinstance(ax, MultiIndex): return True # require all elements in the index idx = _ensure_index(key) if not idx.isin(ax).all(): raise KeyError("[%s] are not in ALL in the [%s]" % (key, self.obj._get_axis_name(axis))) return True else: def error(): if isnull(key): raise ValueError( "cannot use label indexing with a null key") raise KeyError("the label [%s] is not in the [%s]" % (key, self.obj._get_axis_name(axis))) try: key = self._convert_scalar_indexer(key, axis) if not key in ax: error() except (TypeError) as e: # python 3 type errors should be raised if 'unorderable' in str(e): # pragma: no cover error() raise except: error() return True
def _validate_levels(cls, levels): """" Validates that we have good levels """ levels = _ensure_index(levels) if not levels.is_unique: raise ValueError('Categorical levels must be unique') return levels
def _set_index(self, index): self._index = _ensure_index(index) for v in self._series.values(): v.index = self._index