def _get_series_result_type(result): """ return appropriate class of Series concat input is either dict or array-like """ if isinstance(result, dict): # concat Series with axis 1 if all(com.is_sparse(c) for c in compat.itervalues(result)): from pandas.sparse.api import SparseDataFrame return SparseDataFrame else: from pandas.core.frame import DataFrame return DataFrame elif com.is_sparse(result): # concat Series with axis 1 from pandas.sparse.api import SparseSeries return SparseSeries else: from pandas.core.series import Series return Series
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if com.is_categorical_dtype(dtype): typ = 'category' elif com.is_sparse(arr): typ = 'sparse' elif com.is_datetimetz(arr): typ = 'datetimetz' elif com.is_datetime64_dtype(dtype): typ = 'datetime' elif com.is_timedelta64_dtype(dtype): typ = 'timedelta' elif com.is_object_dtype(dtype): typ = 'object' elif com.is_bool_dtype(dtype): typ = 'bool' else: typ = dtype.kind typs.add(typ) return typs
def get_dtype_kinds(l): """ Parameters ---------- l : list of arrays Returns ------- a set of kinds that exist in this list of arrays """ typs = set() for arr in l: dtype = arr.dtype if com.is_categorical_dtype(dtype): typ = "category" elif com.is_sparse(arr): typ = "sparse" elif com.is_datetimetz(arr): typ = "datetimetz" elif com.is_datetime64_dtype(dtype): typ = "datetime" elif com.is_timedelta64_dtype(dtype): typ = "timedelta" elif com.is_object_dtype(dtype): typ = "object" elif com.is_bool_dtype(dtype): typ = "bool" else: typ = dtype.kind typs.add(typ) return typs
def _concat_sparse(to_concat, axis=0, typs=None): """ provide concatenation of an sparse/dense array of arrays each of which is a single dtype Parameters ---------- to_concat : array of arrays axis : axis to provide concatenation typs : set of to_concat dtypes Returns ------- a single array, preserving the combined dtypes """ from pandas.sparse.array import SparseArray, _make_index def convert_sparse(x, axis): # coerce to native type if isinstance(x, SparseArray): x = x.get_values() x = x.ravel() if axis > 0: x = np.atleast_2d(x) return x if typs is None: typs = com.get_dtype_kinds(to_concat) if len(typs) == 1: # concat input as it is if all inputs are sparse # and have the same fill_value fill_values = set(c.fill_value for c in to_concat) if len(fill_values) == 1: sp_values = [c.sp_values for c in to_concat] indexes = [c.sp_index.to_int_index() for c in to_concat] indices = [] loc = 0 for idx in indexes: indices.append(idx.indices + loc) loc += idx.length sp_values = np.concatenate(sp_values) indices = np.concatenate(indices) sp_index = _make_index(loc, indices, kind=to_concat[0].sp_index) return SparseArray(sp_values, sparse_index=sp_index, fill_value=to_concat[0].fill_value) # input may be sparse / dense mixed and may have different fill_value # input must contain sparse at least 1 sparses = [c for c in to_concat if com.is_sparse(c)] fill_values = [c.fill_value for c in sparses] sp_indexes = [c.sp_index for c in sparses] # densify and regular concat to_concat = [convert_sparse(x, axis) for x in to_concat] result = np.concatenate(to_concat, axis=axis) if not len(typs - set(['sparse', 'f', 'i'])): # sparsify if inputs are sparse and dense numerics # first sparse input's fill_value and SparseIndex is used result = SparseArray(result.ravel(), fill_value=fill_values[0], kind=sp_indexes[0]) else: # coerce to object if needed result = result.astype('object') return result