Ejemplo n.º 1
def init_dict(data, index, columns, dtype=None):
    Segregate Series based on type and coerce into matrices.
    Needs to handle a lot of exceptional cases.
    if columns is not None:
        from pandas.core.series import Series
        arrays = Series(data, index=columns, dtype=object)
        data_names = arrays.index

        missing = arrays.isnull()
        if index is None:
            # GH10856
            # raise ValueError if only scalars in dict
            index = extract_index(arrays[~missing])
            index = ensure_index(index)

        # no obvious "empty" int column
        if missing.any() and not is_integer_dtype(dtype):
            if dtype is None or np.issubdtype(dtype, np.flexible):
                # GH#1783
                nan_dtype = object
                nan_dtype = dtype
            v = construct_1d_arraylike_from_scalar(np.nan, len(index),
            arrays.loc[missing] = [v] * missing.sum()

        keys = com.dict_keys_to_ordered_list(data)
        columns = data_names = Index(keys)
        arrays = [data[k] for k in keys]

    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
Ejemplo n.º 2
    def _init_dict(self, data, index, columns, dtype=None):
        # pre-filter out columns if we passed it
        if columns is not None:
            columns = ensure_index(columns)
            data = {k: v for k, v in data.items() if k in columns}
            keys = com.dict_keys_to_ordered_list(data)
            columns = Index(keys)

        if index is None:
            index = extract_index(list(data.values()))

        def sp_maker(x):
            return SparseArray(

        sdict = {}
        for k, v in data.items():
            if isinstance(v, Series):
                # Force alignment, no copy necessary
                if not v.index.equals(index):
                    v = v.reindex(index)

                if not isinstance(v, SparseSeries):
                    v = sp_maker(v.values)
            elif isinstance(v, SparseArray):
                v = v.copy()
                if isinstance(v, dict):
                    v = [v.get(i, np.nan) for i in index]

                v = sp_maker(v)

            if index is not None and len(v) != len(index):
                msg = "Length of passed values is {}, index implies {}"
                raise ValueError(msg.format(len(v), len(index)))
            sdict[k] = v

        if len(columns.difference(sdict)):
            # TODO: figure out how to handle this case, all nan's?
            # add in any other columns we want to have (completeness)
            nan_arr = np.empty(len(index), dtype="float64")
            nan_arr = SparseArray(
            sdict.update((c, nan_arr) for c in columns if c not in sdict)

        return to_manager(sdict, columns, index)
Ejemplo n.º 3
    def _init_dict(self, data, index, columns, dtype=None):
        # pre-filter out columns if we passed it
        if columns is not None:
            columns = ensure_index(columns)
            data = {k: v for k, v in compat.iteritems(data) if k in columns}
            keys = com.dict_keys_to_ordered_list(data)
            columns = Index(keys)

        if index is None:
            index = extract_index(list(data.values()))

        def sp_maker(x):
            return SparseArray(x, kind=self._default_kind,
                               copy=True, dtype=dtype)
        sdict = {}
        for k, v in compat.iteritems(data):
            if isinstance(v, Series):
                # Force alignment, no copy necessary
                if not v.index.equals(index):
                    v = v.reindex(index)

                if not isinstance(v, SparseSeries):
                    v = sp_maker(v.values)
            elif isinstance(v, SparseArray):
                v = v.copy()
                if isinstance(v, dict):
                    v = [v.get(i, np.nan) for i in index]

                v = sp_maker(v)

            if index is not None and len(v) != len(index):
                msg = "Length of passed values is {}, index implies {}"
                raise ValueError(msg.format(len(v), len(index)))
            sdict[k] = v

        if len(columns.difference(sdict)):
            # TODO: figure out how to handle this case, all nan's?
            # add in any other columns we want to have (completeness)
            nan_arr = np.empty(len(index), dtype='float64')
            nan_arr = SparseArray(nan_arr, kind=self._default_kind,
            sdict.update((c, nan_arr) for c in columns if c not in sdict)

        return to_manager(sdict, columns, index)
Ejemplo n.º 4
def init_dict(data, index, columns, dtype=None):
    Segregate Series based on type and coerce into matrices.
    Needs to handle a lot of exceptional cases.
    if columns is not None:
        from pandas.core.series import Series

        arrays = Series(data, index=columns, dtype=object)
        data_names = arrays.index

        missing = arrays.isna()
        if index is None:
            # GH10856
            # raise ValueError if only scalars in dict
            index = extract_index(arrays[~missing])
            index = ensure_index(index)

        # no obvious "empty" int column
        if missing.any() and not is_integer_dtype(dtype):
            if dtype is None or np.issubdtype(dtype, np.flexible):
                # GH#1783
                nan_dtype = object
                nan_dtype = dtype
            val = construct_1d_arraylike_from_scalar(np.nan, len(index),
            arrays.loc[missing] = [val] * missing.sum()

        keys = com.dict_keys_to_ordered_list(data)
        columns = data_names = Index(keys)
        arrays = (com.maybe_iterable_to_list(data[k]) for k in keys)
        # GH#24096 need copy to be deep for datetime64tz case
        # TODO: See if we can avoid these copies
        arrays = [
            arr if not isinstance(arr, ABCIndexClass) else arr._data
            for arr in arrays
        arrays = [
            arr if not is_datetime64tz_dtype(arr) else arr.copy()
            for arr in arrays
    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
Ejemplo n.º 5
def init_dict(data, index, columns, dtype=None):
    Segregate Series based on type and coerce into matrices.
    Needs to handle a lot of exceptional cases.
    if columns is not None:
        from pandas.core.series import Series
        arrays = Series(data, index=columns, dtype=object)
        data_names = arrays.index

        missing = arrays.isnull()
        if index is None:
            # GH10856
            # raise ValueError if only scalars in dict
            index = extract_index(arrays[~missing])
            index = ensure_index(index)

        # no obvious "empty" int column
        if missing.any() and not is_integer_dtype(dtype):
            if dtype is None or np.issubdtype(dtype, np.flexible):
                # GH#1783
                nan_dtype = object
                nan_dtype = dtype
            val = construct_1d_arraylike_from_scalar(np.nan, len(index),
            arrays.loc[missing] = [val] * missing.sum()


        for key in data:
            if (isinstance(data[key], ABCDatetimeIndex) and
                    data[key].tz is not None):
                # GH#24096 need copy to be deep for datetime64tz case
                # TODO: See if we can avoid these copies
                data[key] = data[key].copy(deep=True)

        keys = com.dict_keys_to_ordered_list(data)
        columns = data_names = Index(keys)
        arrays = [data[k] for k in keys]

    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
Ejemplo n.º 6
    def __init__(
        if isinstance(objs, (NDFrame, str)):
            raise TypeError("first argument must be an iterable of pandas "
                            "objects, you passed an object of type "

        if join == "outer":
            self.intersect = False
        elif join == "inner":
            self.intersect = True
        else:  # pragma: no cover
            raise ValueError("Only can inner (intersect) or outer (union) "
                             "join the other axis")

        if isinstance(objs, dict):
            if keys is None:
                keys = com.dict_keys_to_ordered_list(objs)
            objs = [objs[k] for k in keys]
            objs = list(objs)

        if len(objs) == 0:
            raise ValueError("No objects to concatenate")

        if keys is None:
            objs = list(com._not_none(*objs))
            # #1649
            clean_keys = []
            clean_objs = []
            for k, v in zip(keys, objs):
                if v is None:
            objs = clean_objs
            name = getattr(keys, "name", None)
            keys = Index(clean_keys, name=name)

        if len(objs) == 0:
            raise ValueError("All objects passed were None")

        # consolidate data & figure out what our result ndim is going to be
        ndims = set()
        for obj in objs:
            if not isinstance(obj, (Series, DataFrame)):
                msg = ("cannot concatenate object of type '{}';"
                       " only Series and DataFrame objs are valid".format(
                raise TypeError(msg)

            # consolidate

        # get the sample
        # want the highest ndim that we have, and must be non-empty
        # unless all objs are empty
        sample = None
        if len(ndims) > 1:
            max_ndim = max(ndims)
            for obj in objs:
                if obj.ndim == max_ndim and np.sum(obj.shape):
                    sample = obj

            # filter out the empties if we have not multi-index possibilities
            # note to keep empty Series as it affect to result columns / name
            non_empties = [
                obj for obj in objs
                if sum(obj.shape) > 0 or isinstance(obj, Series)

            if len(non_empties) and (keys is None and names is None and
                                     levels is None and not self.intersect):
                objs = non_empties
                sample = objs[0]

        if sample is None:
            sample = objs[0]
        self.objs = objs

        # Standardize axis parameter to int
        if isinstance(sample, Series):
            axis = DataFrame._get_axis_number(axis)
            axis = sample._get_axis_number(axis)

        # Need to flip BlockManager axis in the DataFrame special case
        self._is_frame = isinstance(sample, DataFrame)
        if self._is_frame:
            axis = 1 if axis == 0 else 0

        self._is_series = isinstance(sample, Series)
        if not 0 <= axis <= sample.ndim:
            raise AssertionError("axis must be between 0 and {ndim}, input was"
                                 " {axis}".format(ndim=sample.ndim, axis=axis))

        # if we have mixed ndims, then convert to highest ndim
        # creating column numbers as needed
        if len(ndims) > 1:
            current_column = 0
            max_ndim = sample.ndim
            self.objs, objs = [], self.objs
            for obj in objs:

                ndim = obj.ndim
                if ndim == max_ndim:

                elif ndim != max_ndim - 1:
                    raise ValueError("cannot concatenate unaligned mixed "
                                     "dimensional NDFrame objects")

                    name = getattr(obj, "name", None)
                    if ignore_index or name is None:
                        name = current_column
                        current_column += 1

                    # doing a row-wise concatenation so need everything
                    # to line up
                    if self._is_frame and axis == 1:
                        name = 0
                    obj = sample._constructor({name: obj})


        # note: this is the BlockManager axis (since DataFrame is transposed)
        self.axis = axis
        self.join_axes = join_axes
        self.keys = keys
        self.names = names or getattr(keys, "names", None)
        self.levels = levels
        self.sort = sort

        self.ignore_index = ignore_index
        self.verify_integrity = verify_integrity
        self.copy = copy

        self.new_axes = self._get_new_axes()
Ejemplo n.º 7
    def __init__(self, objs, axis=0, join='outer', join_axes=None,
                 keys=None, levels=None, names=None,
                 ignore_index=False, verify_integrity=False, copy=True,
        if isinstance(objs, (NDFrame, str)):
            raise TypeError('first argument must be an iterable of pandas '
                            'objects, you passed an object of type '

        if join == 'outer':
            self.intersect = False
        elif join == 'inner':
            self.intersect = True
        else:  # pragma: no cover
            raise ValueError('Only can inner (intersect) or outer (union) '
                             'join the other axis')

        if isinstance(objs, dict):
            if keys is None:
                keys = com.dict_keys_to_ordered_list(objs)
            objs = [objs[k] for k in keys]
            objs = list(objs)

        if len(objs) == 0:
            raise ValueError('No objects to concatenate')

        if keys is None:
            objs = list(com._not_none(*objs))
            # #1649
            clean_keys = []
            clean_objs = []
            for k, v in zip(keys, objs):
                if v is None:
            objs = clean_objs
            name = getattr(keys, 'name', None)
            keys = Index(clean_keys, name=name)

        if len(objs) == 0:
            raise ValueError('All objects passed were None')

        # consolidate data & figure out what our result ndim is going to be
        ndims = set()
        for obj in objs:
            if not isinstance(obj, (Series, DataFrame)):
                msg = ("cannot concatenate object of type '{}';"
                       ' only Series and DataFrame objs are valid'
                raise TypeError(msg)

            # consolidate

        # get the sample
        # want the highest ndim that we have, and must be non-empty
        # unless all objs are empty
        sample = None
        if len(ndims) > 1:
            max_ndim = max(ndims)
            for obj in objs:
                if obj.ndim == max_ndim and np.sum(obj.shape):
                    sample = obj

            # filter out the empties if we have not multi-index possibilities
            # note to keep empty Series as it affect to result columns / name
            non_empties = [obj for obj in objs
                           if sum(obj.shape) > 0 or isinstance(obj, Series)]

            if (len(non_empties) and (keys is None and names is None and
                                      levels is None and
                                      join_axes is None and
                                      not self.intersect)):
                objs = non_empties
                sample = objs[0]

        if sample is None:
            sample = objs[0]
        self.objs = objs

        # Standardize axis parameter to int
        if isinstance(sample, Series):
            axis = DataFrame._get_axis_number(axis)
            axis = sample._get_axis_number(axis)

        # Need to flip BlockManager axis in the DataFrame special case
        self._is_frame = isinstance(sample, DataFrame)
        if self._is_frame:
            axis = 1 if axis == 0 else 0

        self._is_series = isinstance(sample, Series)
        if not 0 <= axis <= sample.ndim:
            raise AssertionError("axis must be between 0 and {ndim}, input was"
                                 " {axis}".format(ndim=sample.ndim, axis=axis))

        # if we have mixed ndims, then convert to highest ndim
        # creating column numbers as needed
        if len(ndims) > 1:
            current_column = 0
            max_ndim = sample.ndim
            self.objs, objs = [], self.objs
            for obj in objs:

                ndim = obj.ndim
                if ndim == max_ndim:

                elif ndim != max_ndim - 1:
                    raise ValueError("cannot concatenate unaligned mixed "
                                     "dimensional NDFrame objects")

                    name = getattr(obj, 'name', None)
                    if ignore_index or name is None:
                        name = current_column
                        current_column += 1

                    # doing a row-wise concatenation so need everything
                    # to line up
                    if self._is_frame and axis == 1:
                        name = 0
                    obj = sample._constructor({name: obj})


        # note: this is the BlockManager axis (since DataFrame is transposed)
        self.axis = axis
        self.join_axes = join_axes
        self.keys = keys
        self.names = names or getattr(keys, 'names', None)
        self.levels = levels
        self.sort = sort

        self.ignore_index = ignore_index
        self.verify_integrity = verify_integrity
        self.copy = copy

        self.new_axes = self._get_new_axes()