def init_dict(data, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. """ if columns is not None: from pandas.core.series import Series arrays = Series(data, index=columns, dtype=object) data_names = arrays.index missing = arrays.isnull() if index is None: # GH10856 # raise ValueError if only scalars in dict index = extract_index(arrays[~missing]) else: index = ensure_index(index) # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): if dtype is None or np.issubdtype(dtype, np.flexible): # GH#1783 nan_dtype = object else: nan_dtype = dtype v = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) arrays.loc[missing] = [v] * missing.sum() else: keys = com.dict_keys_to_ordered_list(data) columns = data_names = Index(keys) arrays = [data[k] for k in keys] return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
def _init_dict(self, data, index, columns, dtype=None): # pre-filter out columns if we passed it if columns is not None: columns = ensure_index(columns) data = {k: v for k, v in data.items() if k in columns} else: keys = com.dict_keys_to_ordered_list(data) columns = Index(keys) if index is None: index = extract_index(list(data.values())) def sp_maker(x): return SparseArray( x, kind=self._default_kind, fill_value=self._default_fill_value, copy=True, dtype=dtype, ) sdict = {} for k, v in data.items(): if isinstance(v, Series): # Force alignment, no copy necessary if not v.index.equals(index): v = v.reindex(index) if not isinstance(v, SparseSeries): v = sp_maker(v.values) elif isinstance(v, SparseArray): v = v.copy() else: if isinstance(v, dict): v = [v.get(i, np.nan) for i in index] v = sp_maker(v) if index is not None and len(v) != len(index): msg = "Length of passed values is {}, index implies {}" raise ValueError(msg.format(len(v), len(index))) sdict[k] = v if len(columns.difference(sdict)): # TODO: figure out how to handle this case, all nan's? # add in any other columns we want to have (completeness) nan_arr = np.empty(len(index), dtype="float64") nan_arr.fill(np.nan) nan_arr = SparseArray( nan_arr, kind=self._default_kind, fill_value=self._default_fill_value, copy=False, ) sdict.update((c, nan_arr) for c in columns if c not in sdict) return to_manager(sdict, columns, index)
def _init_dict(self, data, index, columns, dtype=None): # pre-filter out columns if we passed it if columns is not None: columns = ensure_index(columns) data = {k: v for k, v in compat.iteritems(data) if k in columns} else: keys = com.dict_keys_to_ordered_list(data) columns = Index(keys) if index is None: index = extract_index(list(data.values())) def sp_maker(x): return SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, copy=True, dtype=dtype) sdict = {} for k, v in compat.iteritems(data): if isinstance(v, Series): # Force alignment, no copy necessary if not v.index.equals(index): v = v.reindex(index) if not isinstance(v, SparseSeries): v = sp_maker(v.values) elif isinstance(v, SparseArray): v = v.copy() else: if isinstance(v, dict): v = [v.get(i, np.nan) for i in index] v = sp_maker(v) if index is not None and len(v) != len(index): msg = "Length of passed values is {}, index implies {}" raise ValueError(msg.format(len(v), len(index))) sdict[k] = v if len(columns.difference(sdict)): # TODO: figure out how to handle this case, all nan's? # add in any other columns we want to have (completeness) nan_arr = np.empty(len(index), dtype='float64') nan_arr.fill(np.nan) nan_arr = SparseArray(nan_arr, kind=self._default_kind, fill_value=self._default_fill_value, copy=False) sdict.update((c, nan_arr) for c in columns if c not in sdict) return to_manager(sdict, columns, index)
def init_dict(data, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. """ if columns is not None: from pandas.core.series import Series arrays = Series(data, index=columns, dtype=object) data_names = arrays.index missing = arrays.isna() if index is None: # GH10856 # raise ValueError if only scalars in dict index = extract_index(arrays[~missing]) else: index = ensure_index(index) # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): if dtype is None or np.issubdtype(dtype, np.flexible): # GH#1783 nan_dtype = object else: nan_dtype = dtype val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() else: keys = com.dict_keys_to_ordered_list(data) columns = data_names = Index(keys) arrays = (com.maybe_iterable_to_list(data[k]) for k in keys) # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies arrays = [ arr if not isinstance(arr, ABCIndexClass) else arr._data for arr in arrays ] arrays = [ arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays ] return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
def init_dict(data, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. Needs to handle a lot of exceptional cases. """ if columns is not None: from pandas.core.series import Series arrays = Series(data, index=columns, dtype=object) data_names = arrays.index missing = arrays.isnull() if index is None: # GH10856 # raise ValueError if only scalars in dict index = extract_index(arrays[~missing]) else: index = ensure_index(index) # no obvious "empty" int column if missing.any() and not is_integer_dtype(dtype): if dtype is None or np.issubdtype(dtype, np.flexible): # GH#1783 nan_dtype = object else: nan_dtype = dtype val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() else: for key in data: if (isinstance(data[key], ABCDatetimeIndex) and data[key].tz is not None): # GH#24096 need copy to be deep for datetime64tz case # TODO: See if we can avoid these copies data[key] = data[key].copy(deep=True) keys = com.dict_keys_to_ordered_list(data) columns = data_names = Index(keys) arrays = [data[k] for k in keys] return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
def __init__( self, objs, axis=0, join="outer", join_axes=None, keys=None, levels=None, names=None, ignore_index=False, verify_integrity=False, copy=True, sort=False, ): if isinstance(objs, (NDFrame, str)): raise TypeError("first argument must be an iterable of pandas " "objects, you passed an object of type " '"{name}"'.format(name=type(objs).__name__)) if join == "outer": self.intersect = False elif join == "inner": self.intersect = True else: # pragma: no cover raise ValueError("Only can inner (intersect) or outer (union) " "join the other axis") if isinstance(objs, dict): if keys is None: keys = com.dict_keys_to_ordered_list(objs) objs = [objs[k] for k in keys] else: objs = list(objs) if len(objs) == 0: raise ValueError("No objects to concatenate") if keys is None: objs = list(com._not_none(*objs)) else: # #1649 clean_keys = [] clean_objs = [] for k, v in zip(keys, objs): if v is None: continue clean_keys.append(k) clean_objs.append(v) objs = clean_objs name = getattr(keys, "name", None) keys = Index(clean_keys, name=name) if len(objs) == 0: raise ValueError("All objects passed were None") # consolidate data & figure out what our result ndim is going to be ndims = set() for obj in objs: if not isinstance(obj, (Series, DataFrame)): msg = ("cannot concatenate object of type '{}';" " only Series and DataFrame objs are valid".format( type(obj))) raise TypeError(msg) # consolidate obj._consolidate(inplace=True) ndims.add(obj.ndim) # get the sample # want the highest ndim that we have, and must be non-empty # unless all objs are empty sample = None if len(ndims) > 1: max_ndim = max(ndims) for obj in objs: if obj.ndim == max_ndim and np.sum(obj.shape): sample = obj break else: # filter out the empties if we have not multi-index possibilities # note to keep empty Series as it affect to result columns / name non_empties = [ obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series) ] if len(non_empties) and (keys is None and names is None and levels is None and not self.intersect): objs = non_empties sample = objs[0] if sample is None: sample = objs[0] self.objs = objs # Standardize axis parameter to int if isinstance(sample, Series): axis = DataFrame._get_axis_number(axis) else: axis = sample._get_axis_number(axis) # Need to flip BlockManager axis in the DataFrame special case self._is_frame = isinstance(sample, DataFrame) if self._is_frame: axis = 1 if axis == 0 else 0 self._is_series = isinstance(sample, Series) if not 0 <= axis <= sample.ndim: raise AssertionError("axis must be between 0 and {ndim}, input was" " {axis}".format(ndim=sample.ndim, axis=axis)) # if we have mixed ndims, then convert to highest ndim # creating column numbers as needed if len(ndims) > 1: current_column = 0 max_ndim = sample.ndim self.objs, objs = [], self.objs for obj in objs: ndim = obj.ndim if ndim == max_ndim: pass elif ndim != max_ndim - 1: raise ValueError("cannot concatenate unaligned mixed " "dimensional NDFrame objects") else: name = getattr(obj, "name", None) if ignore_index or name is None: name = current_column current_column += 1 # doing a row-wise concatenation so need everything # to line up if self._is_frame and axis == 1: name = 0 obj = sample._constructor({name: obj}) self.objs.append(obj) # note: this is the BlockManager axis (since DataFrame is transposed) self.axis = axis self.join_axes = join_axes self.keys = keys self.names = names or getattr(keys, "names", None) self.levels = levels self.sort = sort self.ignore_index = ignore_index self.verify_integrity = verify_integrity self.copy = copy self.new_axes = self._get_new_axes()
def __init__(self, objs, axis=0, join='outer', join_axes=None, keys=None, levels=None, names=None, ignore_index=False, verify_integrity=False, copy=True, sort=False): if isinstance(objs, (NDFrame, str)): raise TypeError('first argument must be an iterable of pandas ' 'objects, you passed an object of type ' '"{name}"'.format(name=type(objs).__name__)) if join == 'outer': self.intersect = False elif join == 'inner': self.intersect = True else: # pragma: no cover raise ValueError('Only can inner (intersect) or outer (union) ' 'join the other axis') if isinstance(objs, dict): if keys is None: keys = com.dict_keys_to_ordered_list(objs) objs = [objs[k] for k in keys] else: objs = list(objs) if len(objs) == 0: raise ValueError('No objects to concatenate') if keys is None: objs = list(com._not_none(*objs)) else: # #1649 clean_keys = [] clean_objs = [] for k, v in zip(keys, objs): if v is None: continue clean_keys.append(k) clean_objs.append(v) objs = clean_objs name = getattr(keys, 'name', None) keys = Index(clean_keys, name=name) if len(objs) == 0: raise ValueError('All objects passed were None') # consolidate data & figure out what our result ndim is going to be ndims = set() for obj in objs: if not isinstance(obj, (Series, DataFrame)): msg = ("cannot concatenate object of type '{}';" ' only Series and DataFrame objs are valid' .format(type(obj))) raise TypeError(msg) # consolidate obj._consolidate(inplace=True) ndims.add(obj.ndim) # get the sample # want the highest ndim that we have, and must be non-empty # unless all objs are empty sample = None if len(ndims) > 1: max_ndim = max(ndims) for obj in objs: if obj.ndim == max_ndim and np.sum(obj.shape): sample = obj break else: # filter out the empties if we have not multi-index possibilities # note to keep empty Series as it affect to result columns / name non_empties = [obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series)] if (len(non_empties) and (keys is None and names is None and levels is None and join_axes is None and not self.intersect)): objs = non_empties sample = objs[0] if sample is None: sample = objs[0] self.objs = objs # Standardize axis parameter to int if isinstance(sample, Series): axis = DataFrame._get_axis_number(axis) else: axis = sample._get_axis_number(axis) # Need to flip BlockManager axis in the DataFrame special case self._is_frame = isinstance(sample, DataFrame) if self._is_frame: axis = 1 if axis == 0 else 0 self._is_series = isinstance(sample, Series) if not 0 <= axis <= sample.ndim: raise AssertionError("axis must be between 0 and {ndim}, input was" " {axis}".format(ndim=sample.ndim, axis=axis)) # if we have mixed ndims, then convert to highest ndim # creating column numbers as needed if len(ndims) > 1: current_column = 0 max_ndim = sample.ndim self.objs, objs = [], self.objs for obj in objs: ndim = obj.ndim if ndim == max_ndim: pass elif ndim != max_ndim - 1: raise ValueError("cannot concatenate unaligned mixed " "dimensional NDFrame objects") else: name = getattr(obj, 'name', None) if ignore_index or name is None: name = current_column current_column += 1 # doing a row-wise concatenation so need everything # to line up if self._is_frame and axis == 1: name = 0 obj = sample._constructor({name: obj}) self.objs.append(obj) # note: this is the BlockManager axis (since DataFrame is transposed) self.axis = axis self.join_axes = join_axes self.keys = keys self.names = names or getattr(keys, 'names', None) self.levels = levels self.sort = sort self.ignore_index = ignore_index self.verify_integrity = verify_integrity self.copy = copy self.new_axes = self._get_new_axes()