Esempio n. 1
0
    def get_result(self):
        if self._is_series:
            if self.axis == 0:
                new_data = com._concat_compat([x.get_values() for x in self.objs])
                name = com._consensus_name_attr(self.objs)
                return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')
            else:
                data = dict(zip(range(len(self.objs)), self.objs))
                index, columns = self.new_axes
                tmpdf = DataFrame(data, index=index)
                if columns is not None:
                    tmpdf.columns = columns
                return tmpdf.__finalize__(self, method='concat')
        else:
            mgrs_indexers = []
            for obj in self.objs:
                mgr = obj._data
                indexers = {}
                for ax, new_labels in enumerate(self.new_axes):
                    if ax == self.axis:
                        # Suppress reindexing on concat axis
                        continue

                    obj_labels = mgr.axes[ax]
                    if not new_labels.equals(obj_labels):
                        indexers[ax] = obj_labels.reindex(new_labels)[1]

                mgrs_indexers.append((obj._data, indexers))

            new_data = concatenate_block_managers(
                mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy)
            if not self.copy:
                new_data._consolidate_inplace()

            return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat')
Esempio n. 2
0
    def _concat_blocks(self, blocks):
        values_list = [b.values for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                raise Exception('dtypes are not consistent throughout '
                                'DataFrames')
            return make_block(concat_values, blocks[0].items, self.new_axes[0])
        else:
            offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for
                                            x in self.objs])]
            indexer = np.concatenate([offsets[i] + b.ref_locs
                                      for i, b in enumerate(blocks)
                                      if b is not None])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            return make_block(concat_values, concat_items, self.new_axes[0])
Esempio n. 3
0
    def _concat_single_item(self, item):
        all_values = []
        dtypes = set()
        for obj in self.objs:
            try:
                values = obj._data.get(item)
                dtypes.add(values.dtype)
                all_values.append(values)
            except KeyError:
                all_values.append(None)

        # this stinks
        have_object = False
        for dtype in dtypes:
            if issubclass(dtype.type, (np.object_, np.bool_)):
                have_object = True
        if have_object:
            empty_dtype = np.object_
        else:
            empty_dtype = np.float64

        to_concat = []
        for obj, item_values in zip(self.objs, all_values):
            if item_values is None:
                shape = obj._data.shape[1:]
                missing_arr = np.empty(shape, dtype=empty_dtype)
                missing_arr.fill(np.nan)
                to_concat.append(missing_arr)
            else:
                to_concat.append(item_values)

        # this method only gets called with axis >= 1
        assert (self.axis >= 1)
        return com._concat_compat(to_concat, axis=self.axis - 1)
Esempio n. 4
0
    def _concat_blocks(self, blocks):
        values_list = [b.values for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                raise Exception("dtypes are not consistent throughout " "DataFrames")
            return make_block(concat_values, blocks[0].items, self.new_axes[0])
        else:

            offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for x in self.objs])]
            indexer = np.concatenate([offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            block = make_block(concat_values, concat_items, self.new_axes[0])

            # we need to set the ref_locs in this block so we have the mapping
            # as we now have a non-unique index across dtypes, and we need to
            # map the column location to the block location
            # GH3602
            if not self.new_axes[0].is_unique:
                block._ref_locs = indexer

            return block
Esempio n. 5
0
    def _fast_union(self, other):
        if len(other) == 0:
            return self.view(type(self))

        if len(self) == 0:
            return other.view(type(self))

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        left_start, left_end = left[0], left[-1]
        right_end = right[-1]

        if not self.offset._should_cache():
            # concatenate dates
            if left_end < right_end:
                loc = right.searchsorted(left_end, side='right')
                right_chunk = right.values[loc:]
                dates = com._concat_compat((left.values, right_chunk))
                return self._view_like(dates)
            else:
                return left
        else:
            return type(self)(start=left_start,
                              end=max(left_end, right_end),
                              freq=left.offset)
Esempio n. 6
0
    def get_result(self):
        if self._is_series:
            if self.axis == 0:
                new_data = com._concat_compat([x.get_values() for x in self.objs])
                name = com._consensus_name_attr(self.objs)
                return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')
            else:
                data = dict(zip(range(len(self.objs)), self.objs))
                index, columns = self.new_axes
                tmpdf = DataFrame(data, index=index)
                if columns is not None:
                    tmpdf.columns = columns
                return tmpdf.__finalize__(self, method='concat')
        else:
            mgrs_indexers = []
            for obj in self.objs:
                mgr = obj._data
                indexers = {}
                for ax, new_labels in enumerate(self.new_axes):
                    if ax == self.axis:
                        # Suppress reindexing on concat axis
                        continue

                    obj_labels = mgr.axes[ax]
                    if not new_labels.equals(obj_labels):
                        indexers[ax] = obj_labels.reindex(new_labels)[1]

                mgrs_indexers.append((obj._data, indexers))

            new_data = concatenate_block_managers(
                mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=True)

            return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat')
Esempio n. 7
0
    def _concat_single_item(self, item):
        all_values = []
        dtypes = set()
        for obj in self.objs:
            try:
                values = obj._data.get(item)
                dtypes.add(values.dtype)
                all_values.append(values)
            except KeyError:
                all_values.append(None)

        # this stinks
        have_object = False
        for dtype in dtypes:
            if issubclass(dtype.type, (np.object_, np.bool_)):
                have_object = True
        if have_object:
            empty_dtype = np.object_
        else:
            empty_dtype = np.float64

        to_concat = []
        for obj, item_values in zip(self.objs, all_values):
            if item_values is None:
                shape = obj._data.shape[1:]
                missing_arr = np.empty(shape, dtype=empty_dtype)
                missing_arr.fill(np.nan)
                to_concat.append(missing_arr)
            else:
                to_concat.append(item_values)

        # this method only gets called with axis >= 1
        assert(self.axis >= 1)
        return com._concat_compat(to_concat, axis=self.axis - 1)
Esempio n. 8
0
    def append(self, other):
        """
        Append a collection of Index options together

        Parameters
        ----------
        other : Index or list/tuple of indices

        Returns
        -------
        appended : Index
        """
        from pandas.core.index import _ensure_compat_concat

        name = self.name
        to_concat = [self]

        if isinstance(other, (list, tuple)):
            to_concat = to_concat + list(other)
        else:
            to_concat.append(other)

        for obj in to_concat:
            if isinstance(obj, Index) and obj.name != name:
                name = None
                break

        to_concat = _ensure_compat_concat(to_concat)
        to_concat = [x.values if isinstance(x, Index) else x
                     for x in to_concat]

        return Index(com._concat_compat(to_concat), name=name)
Esempio n. 9
0
    def _concat_blocks(self, blocks):
        values_list = [b.values for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                raise Exception('dtypes are not consistent throughout '
                                'DataFrames')
            return make_block(concat_values, blocks[0].items, self.new_axes[0])
        else:
            offsets = np.r_[
                0, np.cumsum([len(x._data.axes[0]) for x in self.objs])]
            indexer = np.concatenate([
                offsets[i] + b.ref_locs for i, b in enumerate(blocks)
                if b is not None
            ])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            return make_block(concat_values, concat_items, self.new_axes[0])
Esempio n. 10
0
    def _fast_union(self, other):
        if len(other) == 0:
            return self.view(type(self))

        if len(self) == 0:
            return other.view(type(self))

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        left_start, left_end = left[0], left[-1]
        right_end = right[-1]

        if not self.offset._should_cache():
            # concatenate dates
            if left_end < right_end:
                loc = right.searchsorted(left_end, side='right')
                right_chunk = right.values[loc:]
                dates = com._concat_compat((left.values, right_chunk))
                return self._view_like(dates)
            else:
                return left
        else:
            return type(self)(start=left_start,
                              end=max(left_end, right_end),
                              freq=left.offset)
Esempio n. 11
0
    def append(self, other):
        """
        Append a collection of Index options together

        Parameters
        ----------
        other : Index or list/tuple of indices

        Returns
        -------
        appended : Index
        """
        name = self.name
        to_concat = [self]

        if isinstance(other, (list, tuple)):
            to_concat = to_concat + list(other)
        else:
            to_concat.append(other)

        for obj in to_concat:
            if isinstance(obj, Index) and obj.name != name:
                name = None
                break

        to_concat = self._ensure_compat_concat(to_concat)
        return Index(com._concat_compat(to_concat), name=name)
Esempio n. 12
0
    def get_result(self):

        # series only
        if self._is_series:

            # stack blocks
            if self.axis == 0:
                new_data = com._concat_compat([x._values for x in self.objs])
                name = com._consensus_name_attr(self.objs)
                return (Series(new_data, index=self.new_axes[0],
                               name=name,
                               dtype=new_data.dtype)
                        .__finalize__(self, method='concat'))

            # combine as columns in a frame
            else:
                data = dict(zip(range(len(self.objs)), self.objs))
                index, columns = self.new_axes
                tmpdf = DataFrame(data, index=index)
                # checks if the column variable already stores valid column
                # names (because set via the 'key' argument in the 'concat'
                # function call. If that's not the case, use the series names
                # as column names
                if (columns.equals(Index(np.arange(len(self.objs)))) and
                        not self.ignore_index):
                    columns = np.array([data[i].name
                                        for i in range(len(data))],
                                       dtype='object')
                    indexer = isnull(columns)
                    if indexer.any():
                        columns[indexer] = np.arange(len(indexer[indexer]))
                tmpdf.columns = columns
                return tmpdf.__finalize__(self, method='concat')

        # combine block managers
        else:
            mgrs_indexers = []
            for obj in self.objs:
                mgr = obj._data
                indexers = {}
                for ax, new_labels in enumerate(self.new_axes):
                    if ax == self.axis:
                        # Suppress reindexing on concat axis
                        continue

                    obj_labels = mgr.axes[ax]
                    if not new_labels.equals(obj_labels):
                        indexers[ax] = obj_labels.reindex(new_labels)[1]

                mgrs_indexers.append((obj._data, indexers))

            new_data = concatenate_block_managers(
                mgrs_indexers, self.new_axes,
                concat_axis=self.axis, copy=self.copy)
            if not self.copy:
                new_data._consolidate_inplace()

            return (self.objs[0]._from_axes(new_data, self.new_axes)
                    .__finalize__(self, method='concat'))
Esempio n. 13
0
    def get_result(self):

        # series only
        if self._is_series:

            # stack blocks
            if self.axis == 0:
                # concat Series with length to keep dtype as much
                non_empties = [x for x in self.objs if len(x) > 0]
                if len(non_empties) > 0:
                    values = [x._values for x in non_empties]
                else:
                    values = [x._values for x in self.objs]
                new_data = com._concat_compat(values)

                name = com._consensus_name_attr(self.objs)
                return (Series(new_data,
                               index=self.new_axes[0],
                               name=name,
                               dtype=new_data.dtype).__finalize__(
                                   self, method='concat'))

            # combine as columns in a frame
            else:
                data = dict(zip(range(len(self.objs)), self.objs))
                index, columns = self.new_axes
                tmpdf = DataFrame(data, index=index)
                tmpdf.columns = columns
                return tmpdf.__finalize__(self, method='concat')

        # combine block managers
        else:
            mgrs_indexers = []
            for obj in self.objs:
                mgr = obj._data
                indexers = {}
                for ax, new_labels in enumerate(self.new_axes):
                    if ax == self.axis:
                        # Suppress reindexing on concat axis
                        continue

                    obj_labels = mgr.axes[ax]
                    if not new_labels.equals(obj_labels):
                        indexers[ax] = obj_labels.reindex(new_labels)[1]

                mgrs_indexers.append((obj._data, indexers))

            new_data = concatenate_block_managers(mgrs_indexers,
                                                  self.new_axes,
                                                  concat_axis=self.axis,
                                                  copy=self.copy)
            if not self.copy:
                new_data._consolidate_inplace()

            return (self.objs[0]._from_axes(
                new_data, self.new_axes).__finalize__(self, method='concat'))
Esempio n. 14
0
 def get_result(self):
     if self._is_series and self.axis == 0:
         new_data = com._concat_compat([x.values for x in self.objs])
         name = com._consensus_name_attr(self.objs)
         return Series(new_data, index=self.new_axes[0], name=name)
     elif self._is_series:
         data = dict(zip(self.new_axes[1], self.objs))
         return DataFrame(data, index=self.new_axes[0], columns=self.new_axes[1])
     else:
         new_data = self._get_concatenated_data()
         return self.objs[0]._from_axes(new_data, self.new_axes)
Esempio n. 15
0
    def get_result(self):

        # series only
        if self._is_series:

            # stack blocks
            if self.axis == 0:
                # concat Series with length to keep dtype as much
                non_empties = [x for x in self.objs if len(x) > 0]
                if len(non_empties) > 0:
                    values = [x._values for x in non_empties]
                else:
                    values = [x._values for x in self.objs]
                new_data = com._concat_compat(values)

                name = com._consensus_name_attr(self.objs)
                return (Series(new_data, index=self.new_axes[0],
                               name=name,
                               dtype=new_data.dtype)
                        .__finalize__(self, method='concat'))

            # combine as columns in a frame
            else:
                data = dict(zip(range(len(self.objs)), self.objs))
                index, columns = self.new_axes
                tmpdf = DataFrame(data, index=index)
                tmpdf.columns = columns
                return tmpdf.__finalize__(self, method='concat')

        # combine block managers
        else:
            mgrs_indexers = []
            for obj in self.objs:
                mgr = obj._data
                indexers = {}
                for ax, new_labels in enumerate(self.new_axes):
                    if ax == self.axis:
                        # Suppress reindexing on concat axis
                        continue

                    obj_labels = mgr.axes[ax]
                    if not new_labels.equals(obj_labels):
                        indexers[ax] = obj_labels.reindex(new_labels)[1]

                mgrs_indexers.append((obj._data, indexers))

            new_data = concatenate_block_managers(
                mgrs_indexers, self.new_axes,
                concat_axis=self.axis, copy=self.copy)
            if not self.copy:
                new_data._consolidate_inplace()

            return (self.objs[0]._from_axes(new_data, self.new_axes)
                    .__finalize__(self, method='concat'))
Esempio n. 16
0
 def get_result(self):
     if self._is_series and self.axis == 0:
         new_data = com._concat_compat([x.values for x in self.objs])
         name = com._consensus_name_attr(self.objs)
         return Series(new_data, index=self.new_axes[0], name=name)
     elif self._is_series:
         data = dict(zip(self.new_axes[1], self.objs))
         return DataFrame(data, index=self.new_axes[0],
                          columns=self.new_axes[1])
     else:
         new_data = self._get_concatenated_data()
         return self.objs[0]._from_axes(new_data, self.new_axes)
Esempio n. 17
0
 def get_result(self):
     if self._is_series and self.axis == 0:
         new_data = com._concat_compat([x.values for x in self.objs])
         name = com._consensus_name_attr(self.objs)
         return Series(new_data, index=self.new_axes[0], name=name)
     elif self._is_series:
         data = dict(itertools.izip(xrange(len(self.objs)), self.objs))
         tmpdf = DataFrame(data, index=self.new_axes[0])
         tmpdf.columns = self.new_axes[1]
         return tmpdf
     else:
         new_data = self._get_concatenated_data()
         return self.objs[0]._from_axes(new_data, self.new_axes)
Esempio n. 18
0
 def get_result(self):
     if self._is_series and self.axis == 0:
         new_data = com._concat_compat([x.values for x in self.objs])
         name = com._consensus_name_attr(self.objs)
         return Series(new_data, index=self.new_axes[0], name=name)
     elif self._is_series:
         data = dict(itertools.izip(xrange(len(self.objs)), self.objs))
         tmpdf = DataFrame(data, index=self.new_axes[0])
         tmpdf.columns = self.new_axes[1]
         return tmpdf
     else:
         new_data = self._get_concatenated_data()
         return self.objs[0]._from_axes(new_data, self.new_axes)
Esempio n. 19
0
 def get_result(self):
     if self._is_series and self.axis == 0:
         new_data = com._concat_compat([x.get_values() for x in self.objs])
         name = com._consensus_name_attr(self.objs)
         new_data = self._post_merge(new_data)
         return Series(new_data, index=self.new_axes[0], name=name)
     elif self._is_series:
         data = dict(zip(range(len(self.objs)), self.objs))
         index, columns = self.new_axes
         tmpdf = DataFrame(data, index=index)
         if columns is not None:
             tmpdf.columns = columns
         return tmpdf
     else:
         new_data = self._get_concatenated_data()
         new_data = self._post_merge(new_data)
         return self.objs[0]._from_axes(new_data, self.new_axes)
Esempio n. 20
0
 def get_result(self):
     if self._is_series and self.axis == 0:
         new_data = com._concat_compat([x.get_values() for x in self.objs])
         name = com._consensus_name_attr(self.objs)
         new_data = self._post_merge(new_data)
         return Series(new_data, index=self.new_axes[0], name=name)
     elif self._is_series:
         data = dict(zip(range(len(self.objs)), self.objs))
         index, columns = self.new_axes
         tmpdf = DataFrame(data, index=index)
         if columns is not None:
             tmpdf.columns = columns
         return tmpdf
     else:
         new_data = self._get_concatenated_data()
         new_data = self._post_merge(new_data)
         return self.objs[0]._from_axes(new_data, self.new_axes)
Esempio n. 21
0
    def _concat_blocks(self, blocks):

        values_list = [b.get_values() for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                # TODO: Either profile this piece or remove.
                # FIXME: Need to figure out how to test whether this line exists or does not...(unclear if even possible
                #        or maybe would require performance test)
                raise PandasError('dtypes are not consistent throughout '
                                  'DataFrames')
            return make_block(concat_values,
                              blocks[0].items,
                              self.new_axes[0],
                              placement=blocks[0]._ref_locs)
        else:

            offsets = np.r_[
                0, np.cumsum([len(x._data.axes[0]) for x in self.objs])]
            indexer = np.concatenate([
                offsets[i] + b.ref_locs for i, b in enumerate(blocks)
                if b is not None
            ])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            block = make_block(concat_values, concat_items, self.new_axes[0])

            # we need to set the ref_locs in this block so we have the mapping
            # as we now have a non-unique index across dtypes, and we need to
            # map the column location to the block location
            # GH3602
            if not self.new_axes[0].is_unique:
                block.set_ref_locs(indexer)

            return block
Esempio n. 22
0
    def _concat_blocks(self, blocks):

        values_list = [b.get_values() for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                # TODO: Either profile this piece or remove.
                # FIXME: Need to figure out how to test whether this line exists or does not...(unclear if even possible
                #        or maybe would require performance test)
                raise PandasError('dtypes are not consistent throughout '
                                  'DataFrames')
            return make_block(concat_values,
                              blocks[0].items,
                              self.new_axes[0],
                              placement=blocks[0]._ref_locs)
        else:

            offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for
                                          x in self.objs])]
            indexer = np.concatenate([offsets[i] + b.ref_locs
                                      for i, b in enumerate(blocks)
                                      if b is not None])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            block = make_block(concat_values, concat_items, self.new_axes[0])

            # we need to set the ref_locs in this block so we have the mapping
            # as we now have a non-unique index across dtypes, and we need to
            # map the column location to the block location
            # GH3602
            if not self.new_axes[0].is_unique:
                block.set_ref_locs(indexer)

            return block
Esempio n. 23
0
    def _concat_single_item(self, objs, item):
        all_values = []
        dtypes = set()

        # le sigh
        if isinstance(self.objs[0], SparseDataFrame):
            objs = [x._data for x in self.objs]

        for data, orig in zip(objs, self.objs):
            if item in orig:
                values = data.get(item)
                dtypes.add(values.dtype)
                all_values.append(values)
            else:
                all_values.append(None)

        # this stinks
        have_object = False
        for dtype in dtypes:
            if issubclass(dtype.type, (np.object_, np.bool_)):
                have_object = True
        if have_object:
            empty_dtype = np.object_
        else:
            empty_dtype = np.float64

        to_concat = []
        for obj, item_values in zip(objs, all_values):
            if item_values is None:
                shape = obj.shape[1:]
                missing_arr = np.empty(shape, dtype=empty_dtype)
                missing_arr.fill(np.nan)
                to_concat.append(missing_arr)
            else:
                to_concat.append(item_values)

        # this method only gets called with axis >= 1
        if not ((self.axis >= 1)):
            raise AssertionError()
        return com._concat_compat(to_concat, axis=self.axis - 1)
Esempio n. 24
0
    def _concat_single_item(self, objs, item):
        all_values = []
        dtypes = set()

        # le sigh
        if isinstance(self.objs[0], SparseDataFrame):
            objs = [x._data for x in self.objs]

        for data, orig in zip(objs, self.objs):
            if item in orig:
                values = data.get(item)
                dtypes.add(values.dtype)
                all_values.append(values)
            else:
                all_values.append(None)

        # this stinks
        have_object = False
        for dtype in dtypes:
            if issubclass(dtype.type, (np.object_, np.bool_)):
                have_object = True
        if have_object:
            empty_dtype = np.object_
        else:
            empty_dtype = np.float64

        to_concat = []
        for obj, item_values in zip(objs, all_values):
            if item_values is None:
                shape = obj.shape[1:]
                missing_arr = np.empty(shape, dtype=empty_dtype)
                missing_arr.fill(np.nan)
                to_concat.append(missing_arr)
            else:
                to_concat.append(item_values)

        # this method only gets called with axis >= 1
        if not ((self.axis >= 1)):
            raise AssertionError()
        return com._concat_compat(to_concat, axis=self.axis - 1)
Esempio n. 25
0
File: period.py Progetto: DT021/wau
    def append(self, other):
        """
        Append a collection of Index options together

        Parameters
        ----------
        other : Index or list/tuple of indices

        Returns
        -------
        appended : Index
        """
        name = self.name
        to_concat = [self]

        if isinstance(other, (list, tuple)):
            to_concat = to_concat + list(other)
        else:
            to_concat.append(other)

        for obj in to_concat:
            if isinstance(obj, Index) and obj.name != name:
                name = None
                break

        to_concat = self._ensure_compat_concat(to_concat)

        if isinstance(to_concat[0], PeriodIndex):
            if len(set([x.freq for x in to_concat])) > 1:
                # box
                to_concat = [x.asobject.values for x in to_concat]
            else:
                cat_values = np.concatenate([x.values for x in to_concat])
                return PeriodIndex(cat_values, freq=self.freq, name=name)

        to_concat = [
            x.values if isinstance(x, Index) else x for x in to_concat
        ]
        return Index(com._concat_compat(to_concat), name=name)
Esempio n. 26
0
    def append(self, other):
        """
        Append a collection of Index options together

        Parameters
        ----------
        other : Index or list/tuple of indices

        Returns
        -------
        appended : Index
        """
        name = self.name
        to_concat = [self]

        if isinstance(other, (list, tuple)):
            to_concat = to_concat + list(other)
        else:
            to_concat.append(other)

        for obj in to_concat:
            if isinstance(obj, Index) and obj.name != name:
                name = None
                break

        to_concat = self._ensure_compat_concat(to_concat)

        if isinstance(to_concat[0], PeriodIndex):
            if len(set([x.freq for x in to_concat])) > 1:
                # box
                to_concat = [x.asobject.values for x in to_concat]
            else:
                cat_values = np.concatenate([x.values for x in to_concat])
                return PeriodIndex(cat_values, freq=self.freq, name=name)

        to_concat = [x.values if isinstance(x, Index) else x
                     for x in to_concat]
        return Index(com._concat_compat(to_concat), name=name)
Esempio n. 27
0
    def _concat_blocks(self, blocks):
        values_list = [b.values for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                raise Exception('dtypes are not consistent throughout '
                                'DataFrames')
            return make_block(concat_values, blocks[0].items, self.new_axes[0])
        else:

            offsets = np.r_[
                0, np.cumsum([len(x._data.axes[0]) for x in self.objs])]
            indexer = np.concatenate([
                offsets[i] + b.ref_locs for i, b in enumerate(blocks)
                if b is not None
            ])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            block = make_block(concat_values, concat_items, self.new_axes[0])

            # we need to set the ref_locs in this block so we have the mapping
            # as we now have a non-unique index across dtypes, and we need to
            # map the column location to the block location
            # GH3602
            if not self.new_axes[0].is_unique:
                block._ref_locs = indexer

            return block
Esempio n. 28
0
    def _fast_union(self, other):
        if len(other) == 0:
            return self.view(type(self))

        if len(self) == 0:
            return other.view(type(self))

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        left_end = left[-1]
        right_end = right[-1]

        # concatenate
        if left_end < right_end:
            loc = right.searchsorted(left_end, side='right')
            right_chunk = right.values[loc:]
            dates = com._concat_compat((left.values, right_chunk))
            return self._shallow_copy(dates)
        else:
            return left
Esempio n. 29
0
    def _fast_union(self, other):
        if len(other) == 0:
            return self.view(type(self))

        if len(self) == 0:
            return other.view(type(self))

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        left_start, left_end = left[0], left[-1]
        right_end = right[-1]

        # concatenate
        if left_end < right_end:
            loc = right.searchsorted(left_end, side='right')
            right_chunk = right.values[loc:]
            dates = com._concat_compat((left.values, right_chunk))
            return self._shallow_copy(dates)
        else:
            return left
Esempio n. 30
0
def lreshape(data, groups, dropna=True, label=None):
    """
    Reshape long-format data to wide. Generalized inverse of DataFrame.pivot

    Parameters
    ----------
    data : DataFrame
    groups : dict
        {new_name : list_of_columns}
    dropna : boolean, default True

    Examples
    --------
    >>> data
       hr1  hr2     team  year1  year2
    0  514  545  Red Sox   2007   2008
    1  573  526  Yankees   2007   2008

    >>> pd.lreshape(data, {'year': ['year1', 'year2'],
                           'hr': ['hr1', 'hr2']})
          team   hr  year
    0  Red Sox  514  2007
    1  Yankees  573  2007
    2  Red Sox  545  2008
    3  Yankees  526  2008

    Returns
    -------
    reshaped : DataFrame
    """
    if isinstance(groups, dict):
        keys = groups.keys()
        values = groups.values()
    else:
        keys, values = zip(*groups)

    all_cols = list(set.union(*[set(x) for x in values]))
    id_cols = list(data.columns.diff(all_cols))

    K = len(values[0])

    for seq in values:
        if len(seq) != K:
            raise ValueError('All column lists must be same length')

    mdata = {}
    pivot_cols = []

    for target, names in zip(keys, values):
        mdata[target] = com._concat_compat([data[col].values for col in names])
        pivot_cols.append(target)

    for col in id_cols:
        mdata[col] = np.tile(data[col].values, K)

    if dropna:
        mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool)
        for c in pivot_cols:
            mask &= notnull(mdata[c])
        if not mask.all():
            mdata = dict((k, v[mask]) for k, v in mdata.iteritems())

    return DataFrame(mdata, columns=id_cols + pivot_cols)
Esempio n. 31
0
    def _concat_single_item(self, objs, item):
        # this is called if we don't have consistent dtypes in a row-wise append

        all_values = []
        dtypes = set()

        for data, orig in zip(objs, self.objs):
            if item in orig:
                values = data.get(item)
                if hasattr(values,'to_dense'):
                    values = values.to_dense()
                dtypes.add(values.dtype)
                all_values.append(values)
            else:
                all_values.append(None)

        # figure out the resulting dtype of the combination
        alls = set()
        seen = []
        for dtype in dtypes:
            d = dict([ (t,False) for t in ['object','datetime','timedelta','other'] ])
            if issubclass(dtype.type, (np.object_, np.bool_)):
                d['object'] = True
                alls.add('object')
            elif is_datetime64_dtype(dtype):
                d['datetime'] = True
                alls.add('datetime')
            elif is_timedelta64_dtype(dtype):
                d['timedelta'] = True
                alls.add('timedelta')
            else:
                d['other'] = True
                alls.add('other')
            seen.append(d)

        if 'datetime' in alls or 'timedelta' in alls:

            if 'object' in alls or 'other' in alls:
                for v, s in zip(all_values,seen):
                    if s.get('datetime') or s.get('timedelta'):
                        pass

                    # if we have all null, then leave a date/time like type
                    # if we have only that type left
                    elif isnull(v).all():

                        alls.remove('other')
                        alls.remove('object')

        # create the result
        if 'object' in alls:
            empty_dtype, fill_value = np.object_, np.nan
        elif 'other' in alls:
            empty_dtype, fill_value = np.float64, np.nan
        elif 'datetime' in alls:
            empty_dtype, fill_value = 'M8[ns]', tslib.iNaT
        elif 'timedelta' in alls:
            empty_dtype, fill_value = 'm8[ns]', tslib.iNaT
        else: # pragma
            raise AssertionError("invalid dtype determination in concat_single_item")

        to_concat = []
        for obj, item_values in zip(objs, all_values):
            if item_values is None:
                shape = obj.shape[1:]
                missing_arr = np.empty(shape, dtype=empty_dtype)
                missing_arr.fill(fill_value)
                to_concat.append(missing_arr)
            else:
                to_concat.append(item_values)

        # this method only gets called with axis >= 1
        if not ((self.axis >= 1)):
            raise AssertionError()
        return com._concat_compat(to_concat, axis=self.axis - 1)
Esempio n. 32
0
def lreshape(data, groups, dropna=True, label=None):
    """
    Reshape long-format data to wide. Generalized inverse of DataFrame.pivot

    Parameters
    ----------
    data : DataFrame
    groups : dict
        {new_name : list_of_columns}
    dropna : boolean, default True

    Examples
    --------
    >>> data
       hr1  hr2     team  year1  year2
    0  514  545  Red Sox   2007   2008
    1  573  526  Yankees   2007   2008

    >>> pd.lreshape(data, {'year': ['year1', 'year2'],
                           'hr': ['hr1', 'hr2']})
          team   hr  year
    0  Red Sox  514  2007
    1  Yankees  573  2007
    2  Red Sox  545  2008
    3  Yankees  526  2008

    Returns
    -------
    reshaped : DataFrame
    """
    if isinstance(groups, dict):
        keys = groups.keys()
        values = groups.values()
    else:
        keys, values = zip(*groups)

    all_cols = list(set.union(*[set(x) for x in values]))
    id_cols = list(data.columns.diff(all_cols))

    K = len(values[0])

    for seq in values:
        if len(seq) != K:
            raise ValueError('All column lists must be same length')

    mdata = {}
    pivot_cols = []

    for target, names in zip(keys, values):
        mdata[target] = com._concat_compat([data[col].values for col in names])
        pivot_cols.append(target)

    for col in id_cols:
        mdata[col] = np.tile(data[col].values, K)

    if dropna:
        mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool)
        for c in pivot_cols:
            mask &= notnull(mdata[c])
        if not mask.all():
            mdata = dict((k, v[mask]) for k, v in mdata.iteritems())

    return DataFrame(mdata, columns=id_cols + pivot_cols)
Esempio n. 33
0
    def _concat_single_item(self, objs, item):
        # this is called if we don't have consistent dtypes in a row-wise append

        all_values = []
        dtypes = set()

        for data, orig in zip(objs, self.objs):
            if item in orig:
                values = data.get(item)
                if hasattr(values,'to_dense'):
                    values = values.to_dense()
                dtypes.add(values.dtype)
                all_values.append(values)
            else:
                all_values.append(None)

        # figure out the resulting dtype of the combination
        alls = set()
        seen = []
        for dtype in dtypes:
            d = dict([ (t,False) for t in ['object','datetime','timedelta','other'] ])
            if issubclass(dtype.type, (np.object_, np.bool_)):
                d['object'] = True
                alls.add('object')
            elif is_datetime64_dtype(dtype):
                d['datetime'] = True
                alls.add('datetime')
            elif is_timedelta64_dtype(dtype):
                d['timedelta'] = True
                alls.add('timedelta')
            else:
                d['other'] = True
                alls.add('other')
            seen.append(d)

        if 'datetime' in alls or 'timedelta' in alls:

            if 'object' in alls or 'other' in alls:
                for v, s in zip(all_values,seen):
                    if s.get('datetime') or s.get('timedelta'):
                        pass

                    # if we have all null, then leave a date/time like type
                    # if we have only that type left
                    elif isnull(v).all():

                        alls.remove('other')
                        alls.remove('object')

        # create the result
        if 'object' in alls:
            empty_dtype, fill_value = np.object_, np.nan
        elif 'other' in alls:
            empty_dtype, fill_value = np.float64, np.nan
        elif 'datetime' in alls:
            empty_dtype, fill_value = 'M8[ns]', tslib.iNaT
        elif 'timedelta' in alls:
            empty_dtype, fill_value = 'm8[ns]', tslib.iNaT
        else: # pragma
            raise AssertionError("invalid dtype determination in concat_single_item")

        to_concat = []
        for obj, item_values in zip(objs, all_values):
            if item_values is None:
                shape = obj.shape[1:]
                missing_arr = np.empty(shape, dtype=empty_dtype)
                missing_arr.fill(fill_value)
                to_concat.append(missing_arr)
            else:
                to_concat.append(item_values)

        # this method only gets called with axis >= 1
        if self.axis < 1:
            raise AssertionError("axis must be >= 1, input was"
                                 " {0}".format(self.axis))
        return com._concat_compat(to_concat, axis=self.axis - 1)
Esempio n. 34
0
    def _concat_single_item(self, objs, item):
        # this is called if we don't have consistent dtypes in a row-wise append

        all_values = []
        dtypes = set()

        for data, orig in zip(objs, self.objs):
            if item in orig:
                values = data.get(item)
                if hasattr(values, "to_dense"):
                    values = values.to_dense()
                dtypes.add(values.dtype)
                all_values.append(values)
            else:
                all_values.append(None)

        # figure out the resulting dtype of the combination
        alls = set()
        seen = []
        for dtype in dtypes:
            d = dict([(t, False) for t in ["object", "datetime", "timedelta", "other"]])
            if issubclass(dtype.type, (np.object_, np.bool_)):
                d["object"] = True
                alls.add("object")
            elif is_datetime64_dtype(dtype):
                d["datetime"] = True
                alls.add("datetime")
            elif is_timedelta64_dtype(dtype):
                d["timedelta"] = True
                alls.add("timedelta")
            else:
                d["other"] = True
                alls.add("other")
            seen.append(d)

        if "datetime" in alls or "timedelta" in alls:

            if "object" in alls or "other" in alls:
                for v, s in zip(all_values, seen):
                    if s.get("datetime") or s.get("timedelta"):
                        pass

                    # if we have all null, then leave a date/time like type
                    # if we have only that type left
                    elif isnull(v).all():

                        alls.remove("other")
                        alls.remove("object")

        # create the result
        if "object" in alls:
            empty_dtype, fill_value = np.object_, np.nan
        elif "other" in alls:
            empty_dtype, fill_value = np.float64, np.nan
        elif "datetime" in alls:
            empty_dtype, fill_value = "M8[ns]", tslib.iNaT
        elif "timedelta" in alls:
            empty_dtype, fill_value = "m8[ns]", tslib.iNaT
        else:  # pragma
            raise AssertionError("invalid dtype determination in concat_single_item")

        to_concat = []
        for obj, item_values in zip(objs, all_values):
            if item_values is None:
                shape = obj.shape[1:]
                missing_arr = np.empty(shape, dtype=empty_dtype)
                missing_arr.fill(fill_value)
                to_concat.append(missing_arr)
            else:
                to_concat.append(item_values)

        # this method only gets called with axis >= 1
        if self.axis < 1:
            raise AssertionError("axis must be >= 1, input was" " {0}".format(self.axis))
        return com._concat_compat(to_concat, axis=self.axis - 1)