コード例 #1
0
    def _prep_index(self, data, index, columns):
        N, K = data.shape
        if index is None:
            index = com._default_index(N)
        if columns is None:
            columns = com._default_index(K)

        if len(columns) != K:
            raise ValueError('Column length mismatch: {columns} vs. {K}'
                             .format(columns=len(columns), K=K))
        if len(index) != N:
            raise ValueError('Index length mismatch: {index} vs. {N}'
                             .format(index=len(index), N=N))
        return index, columns
コード例 #2
0
ファイル: series.py プロジェクト: timClicks/pandas
    def __new__(cls, data, index=None, dtype=None, name=None, copy=False):
        if isinstance(data, Series):
            if index is None:
                index = data.index
        elif isinstance(data, dict):
            if index is None:
                index = Index(sorted(data.keys()))
            data = [data[idx] for idx in index]

        # Create array, do *not* copy data by default, infer type
        try:
            subarr = np.array(data, dtype=dtype, copy=copy)
        except ValueError:
            if dtype:
                raise

            subarr = np.array(data, dtype=object)

        if subarr.ndim == 0:
            if isinstance(data, list): # pragma: no cover
                subarr = np.array(data, dtype=object)
            elif index is not None:
                value = data

                # If we create an empty array using a string to infer
                # the dtype, NumPy will only allocate one character per entry
                # so this is kind of bad. Alternately we could use np.repeat
                # instead of np.empty (but then you still don't want things
                # coming out as np.str_!
                if isinstance(value, basestring) and dtype is None:
                    dtype = np.object_

                if dtype is None:
                    subarr = np.empty(len(index), dtype=type(value))
                else:
                    subarr = np.empty(len(index), dtype=dtype)
                subarr.fill(value)
            else:
                return subarr.item()

        elif subarr.ndim > 1:
            raise Exception('Data must be 1-dimensional')

        if index is None:
            index = _default_index(len(subarr))

        # This is to prevent mixed-type Series getting all casted to
        # NumPy string type, e.g. NaN --> '-1#IND'.
        if issubclass(subarr.dtype.type, basestring):
            subarr = np.array(data, dtype=object, copy=copy)

        # Change the class of the array to be the subclass type.
        subarr = subarr.view(cls)
        subarr.index = index
        subarr.name = name

        if subarr.index.is_all_dates():
            subarr = subarr.view(TimeSeries)

        return subarr
コード例 #3
0
ファイル: concat.py プロジェクト: ankostis/pandas
    def _get_concat_axis(self):
        """
        Return index to be used along concatenation axis.
        """
        if self._is_series:
            if self.axis == 0:
                indexes = [x.index for x in self.objs]
            elif self.ignore_index:
                idx = com._default_index(len(self.objs))
                return idx
            elif self.keys is None:
                names = [None] * len(self.objs)
                num = 0
                has_names = False
                for i, x in enumerate(self.objs):
                    if not isinstance(x, Series):
                        raise TypeError("Cannot concatenate type 'Series' "
                                        "with object of type "
                                        "%r" % type(x).__name__)
                    if x.name is not None:
                        names[i] = x.name
                        has_names = True
                    else:
                        names[i] = num
                        num += 1
                if has_names:
                    return Index(names)
                else:
                    return com._default_index(len(self.objs))
            else:
                return _ensure_index(self.keys)
        else:
            indexes = [x._data.axes[self.axis] for x in self.objs]

        if self.ignore_index:
            idx = com._default_index(sum(len(i) for i in indexes))
            return idx

        if self.keys is None:
            concat_axis = _concat_indexes(indexes)
        else:
            concat_axis = _make_concat_multiindex(indexes, self.keys,
                                                  self.levels, self.names)

        self._maybe_check_integrity(concat_axis)

        return concat_axis
コード例 #4
0
ファイル: concat.py プロジェクト: s4chin/pandas
    def _get_concat_axis(self):
        """
        Return index to be used along concatenation axis.
        """
        if self._is_series:
            if self.axis == 0:
                indexes = [x.index for x in self.objs]
            elif self.ignore_index:
                idx = com._default_index(len(self.objs))
                return idx
            elif self.keys is None:
                names = [None] * len(self.objs)
                num = 0
                has_names = False
                for i, x in enumerate(self.objs):
                    if not isinstance(x, Series):
                        raise TypeError("Cannot concatenate type 'Series' "
                                        "with object of type {type!r}".format(
                                            type=type(x).__name__))
                    if x.name is not None:
                        names[i] = x.name
                        has_names = True
                    else:
                        names[i] = num
                        num += 1
                if has_names:
                    return Index(names)
                else:
                    return com._default_index(len(self.objs))
            else:
                return _ensure_index(self.keys)
        else:
            indexes = [x._data.axes[self.axis] for x in self.objs]

        if self.ignore_index:
            idx = com._default_index(sum(len(i) for i in indexes))
            return idx

        if self.keys is None:
            concat_axis = _concat_indexes(indexes)
        else:
            concat_axis = _make_concat_multiindex(indexes, self.keys,
                                                  self.levels, self.names)

        self._maybe_check_integrity(concat_axis)

        return concat_axis
コード例 #5
0
ファイル: sparse_frame.py プロジェクト: kayibal/sparsity
    def __init__(self, data, index=None, columns=None, **kwargs):
        if len(data.shape) > 2:
            raise ValueError("Only two dimensional data supported")

        if len(data.shape) == 1 and isinstance(data, pd.Series):
            data = data.to_frame()

        elif len(data.shape) == 1:
            data = data.reshape(-1, 1)

        self.empty = False
        N, K = data.shape

        if index is None:
            self._index = _default_index(N)
        else:
            # assert len(index) == N
            self._index = _ensure_index(index)

        if columns is None:
            self._columns = _default_index(K)
        else:
            # assert len(columns) == K
            self._columns = _ensure_index(columns)

        if not sparse.isspmatrix_csr(data):
            try:
                self._init_values(data, kwargs)
            except TypeError:
                raise TypeError(traceback.format_exc() +
                                "\nThe error described above occurred while "
                                "converting data to sparse matrix.")
        else:
            self._init_csr(data)

        # register indexers
        self.ndim = 2
        self.iloc = _CsrILocationIndexer(self, 'iloc')
        self.loc = _CsrLocIndexer(self, 'loc')
コード例 #6
0
ファイル: panel.py プロジェクト: hiseba/pandas
    def _init_matrix(self, data, axes, dtype=None, copy=False):
        values = self._prep_ndarray(self, data, copy=copy)

        if dtype is not None:
            try:
                values = values.astype(dtype)
            except Exception:
                raise ValueError('failed to cast to %s' % dtype)

        shape = values.shape
        fixed_axes = []
        for i, ax in enumerate(axes):
            if ax is None:
                ax = _default_index(shape[i])
            else:
                ax = _ensure_index(ax)
            fixed_axes.append(ax)

        return create_block_manager_from_blocks([values], fixed_axes)
コード例 #7
0
ファイル: panel.py プロジェクト: datajunkie007/pandas
    def _init_matrix(self, data, axes, dtype=None, copy=False):
        values = self._prep_ndarray(self, data, copy=copy)

        if dtype is not None:
            try:
                values = values.astype(dtype)
            except Exception:
                raise ValueError('failed to cast to %s' % dtype)

        shape = values.shape
        fixed_axes = []
        for i, ax in enumerate(axes):
            if ax is None:
                ax = _default_index(shape[i])
            else:
                ax = _ensure_index(ax)
            fixed_axes.append(ax)

        return create_block_manager_from_blocks([values], fixed_axes)
コード例 #8
0
ファイル: panel.py プロジェクト: EmlynC/pandas
    def _init_matrix(self, data, axes, dtype=None, copy=False):
        values = _prep_ndarray(data, copy=copy)

        if dtype is not None:
            try:
                values = values.astype(dtype)
            except Exception:
                raise ValueError('failed to cast to %s' % dtype)

        shape = values.shape
        fixed_axes = []
        for i, ax in enumerate(axes):
            if ax is None:
                ax = _default_index(shape[i])
            else:
                ax = _ensure_index(ax)
            fixed_axes.append(ax)

        items = fixed_axes[0]
        block = make_block(values, items, items)
        return BlockManager([block], fixed_axes)
コード例 #9
0
    def _init_matrix(self, data, axes, dtype=None, copy=False):
        values = _prep_ndarray(data, copy=copy)

        if dtype is not None:
            try:
                values = values.astype(dtype)
            except Exception:
                raise ValueError('failed to cast to %s' % dtype)

        shape = values.shape
        fixed_axes = []
        for i, ax in enumerate(axes):
            if ax is None:
                ax = _default_index(shape[i])
            else:
                ax = _ensure_index(ax)
            fixed_axes.append(ax)

        items = fixed_axes[0]
        block = make_block(values, items, items)
        return BlockManager([block], fixed_axes)
コード例 #10
0
ファイル: series.py プロジェクト: surfer1-dev/pandas
    def __init__(self,
                 data=None,
                 index=None,
                 sparse_index=None,
                 kind='block',
                 fill_value=None,
                 name=None,
                 dtype=None,
                 copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            is_sparse_array = isinstance(data, SparseArray)
            if fill_value is None:
                if is_sparse_array:
                    fill_value = data.fill_value
                else:
                    fill_value = np.nan

            if is_sparse_array:
                if isinstance(data, SparseSeries) and index is None:
                    index = data.index.view()
                elif index is not None:
                    assert (len(index) == len(data))

                sparse_index = data.sp_index
                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()

                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                if index is None:
                    index = data.index.view()

                data = Series(data)
                data, sparse_index = make_sparse(data,
                                                 kind=kind,
                                                 fill_value=fill_value)

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    data, sparse_index = make_sparse(data,
                                                     kind=kind,
                                                     fill_value=fill_value)
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                else:

                    data = data.reindex(index, copy=False)

            else:
                length = len(index)

                if data == fill_value or (isnull(data) and isnull(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data,
                                       sparse_index=sparse_index,
                                       fill_value=fill_value,
                                       dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name
コード例 #11
0
ファイル: series.py プロジェクト: AllenDowney/pandas
    def __init__(self, data=None, index=None, sparse_index=None, kind='block',
                 fill_value=None, name=None, dtype=None, copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            if isinstance(data, SparseArray):
                if index is not None:
                    assert (len(index) == len(data))
                sparse_index = data.sp_index
                if fill_value is None:
                    fill_value = data.fill_value

                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()
                if fill_value is None:
                    fill_value = data.fill_value
                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                if index is None:
                    index = data.index.view()

                data = Series(data)
                res = make_sparse(data, kind=kind, fill_value=fill_value)
                data, sparse_index, fill_value = res

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    res = make_sparse(data, kind=kind, fill_value=fill_value)
                    data, sparse_index, fill_value = res
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                else:

                    data = data.reindex(index, copy=False)

            else:
                length = len(index)

                if data == fill_value or (isnull(data) and isnull(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data, sparse_index=sparse_index,
                                       fill_value=fill_value, dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name
コード例 #12
0
    def __new__(cls, data, index=None, sparse_index=None, kind='block',
                fill_value=None, name=None, copy=False):

        is_sparse_array = isinstance(data, SparseArray)
        if fill_value is None:
            if is_sparse_array:
                fill_value = data.fill_value
            else:
                fill_value = nan

        if is_sparse_array:
            if isinstance(data, SparseSeries) and index is None:
                index = data.index
            elif index is not None:
                assert(len(index) == len(data))

            sparse_index = data.sp_index
            values = np.asarray(data)
        elif isinstance(data, (Series, dict)):
            if index is None:
                index = data.index

            data = Series(data)
            values, sparse_index = make_sparse(data, kind=kind,
                                               fill_value=fill_value)
        elif isinstance(data, (tuple, list, np.ndarray)):
            # array-like
            if sparse_index is None:
                values, sparse_index = make_sparse(data, kind=kind,
                                                   fill_value=fill_value)
            else:
                values = data
                assert(len(values) == sparse_index.npoints)
        else:
            if index is None:
                raise Exception('must pass index!')

            length = len(index)

            if data == fill_value or (isnull(data)
                    and isnull(fill_value)):
                if kind == 'block':
                    sparse_index = BlockIndex(length, [], [])
                else:
                    sparse_index = IntIndex(length, [])
                values = np.array([])
            else:
                if kind == 'block':
                    locs, lens = ([0], [length]) if length else ([], [])
                    sparse_index = BlockIndex(length, locs, lens)
                else:
                    sparse_index = IntIndex(length, index)
                values = np.empty(length)
                values.fill(data)

        if index is None:
            index = com._default_index(sparse_index.length)
        index = _ensure_index(index)

        # Create array, do *not* copy data by default
        if copy:
            subarr = np.array(values, dtype=np.float64, copy=True)
        else:
            subarr = np.asarray(values, dtype=np.float64)

        if index.is_all_dates:
            cls = SparseTimeSeries

        # Change the class of the array to be the subclass type.
        output = subarr.view(cls)
        output.sp_index = sparse_index
        output.fill_value = np.float64(fill_value)
        output.index = index
        output.name = name
        return output
コード例 #13
0
    def __init__(self,
                 data=None,
                 index=None,
                 sparse_index=None,
                 kind='block',
                 fill_value=None,
                 name=None,
                 dtype=None,
                 copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            if isinstance(data, SparseArray):
                if index is not None:
                    assert (len(index) == len(data))
                sparse_index = data.sp_index
                if fill_value is None:
                    fill_value = data.fill_value

                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()
                if fill_value is None:
                    fill_value = data.fill_value
                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                data = Series(data, index=index)
                index = data.index.view()

                res = make_sparse(data, kind=kind, fill_value=fill_value)
                data, sparse_index, fill_value = res

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    res = make_sparse(data, kind=kind, fill_value=fill_value)
                    data, sparse_index, fill_value = res
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                elif not data.index.equals(index) or copy:  # pragma: no cover
                    # GH#19275 SingleBlockManager input should only be called
                    # internally
                    raise AssertionError('Cannot pass both SingleBlockManager '
                                         '`data` argument and a different '
                                         '`index` argument.  `copy` must '
                                         'be False.')

            else:
                length = len(index)

                if data == fill_value or (isna(data) and isna(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data,
                                       sparse_index=sparse_index,
                                       fill_value=fill_value,
                                       dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name
コード例 #14
0
    def __init__(self, data, index=None, columns=None, **kwargs):
        """Init SparseFrame

        Parameters
        ----------
        data: sparse.csr_matrix | np.ndarray | pandas.DataFrame
            Data to initialize matrix with. Can be one of above types, or
            anything accepted by sparse.csr_matrix along with the correct
            kwargs.
        index: pd.Index or array-like
            Index to use for resulting frame. Will default to RangeIndex if
            input data has no indexing information and no index provided.
        columns : pd.Index or array-like
            Column labels to use for resulting frame. Defaults like in index.
        """
        if len(data.shape) > 2:
            raise ValueError("Only two dimensional data supported")

        if len(data.shape) == 1 and isinstance(data, pd.Series):
            data = data.to_frame()

        elif len(data.shape) == 1:
            data = data.reshape(-1, 1)

        self.empty = False
        N, K = data.shape

        if index is None:
            self._index = _default_index(N)
        elif len(index) != N and data.size:
            if columns is not None:
                implied_axis_1 = len(columns)
            else:
                implied_axis_1 = data.shape[1]
            raise ValueError('Shape of passed values is {},'
                             'indices imply {}'.format(
                                 data.shape, (len(index), implied_axis_1)))
        else:
            self._index = _ensure_index(index)

        if columns is None:
            self._columns = _default_index(K)
        elif len(columns) != K and data.size:
            if index is not None:
                implied_axis_0 = len(index)
            else:
                implied_axis_0 = data.shape[0]
            raise ValueError('Shape of passed values is {},'
                             'indices imply {}'.format(
                                 data.shape, (implied_axis_0, len(columns))))
        else:
            self._columns = _ensure_index(columns)

        if not sparse.isspmatrix_csr(data):
            try:
                self._init_values(data,
                                  init_index=index is None,
                                  init_columns=columns is None,
                                  **kwargs)
            except TypeError:
                raise TypeError(traceback.format_exc() +
                                "\nThe error described above occurred while "
                                "converting data to sparse matrix.")
        else:
            self.empty = True if _is_empty(data) else False
            self._init_csr(data)

        self.ndim = 2
コード例 #15
0
 def reset_index(self, drop=False):
     if not drop:
         raise NotImplementedError("drop=False is not supported.")
     new_idx = _default_index(len(self))
     return SparseFrame(self.data, index=new_idx, columns=self.columns)