Esempio n. 1
0
    def _concat_blocks(self, blocks):
        values_list = [b.values for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                raise Exception('dtypes are not consistent throughout '
                                'DataFrames')
            return make_block(concat_values, blocks[0].items, self.new_axes[0])
        else:
            offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for
                                            x in self.objs])]
            indexer = np.concatenate([offsets[i] + b.ref_locs
                                      for i, b in enumerate(blocks)
                                      if b is not None])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            return make_block(concat_values, concat_items, self.new_axes[0])
Esempio n. 2
0
    def _concat_blocks(self, blocks):
        values_list = [b.values for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                raise Exception("dtypes are not consistent throughout " "DataFrames")
            return make_block(concat_values, blocks[0].items, self.new_axes[0])
        else:

            offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for x in self.objs])]
            indexer = np.concatenate([offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            block = make_block(concat_values, concat_items, self.new_axes[0])

            # we need to set the ref_locs in this block so we have the mapping
            # as we now have a non-unique index across dtypes, and we need to
            # map the column location to the block location
            # GH3602
            if not self.new_axes[0].is_unique:
                block._ref_locs = indexer

            return block
Esempio n. 3
0
    def _concat_blocks(self, blocks):
        values_list = [b.values for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                raise Exception('dtypes are not consistent throughout '
                                'DataFrames')
            return make_block(concat_values, blocks[0].items, self.new_axes[0])
        else:
            offsets = np.r_[
                0, np.cumsum([len(x._data.axes[0]) for x in self.objs])]
            indexer = np.concatenate([
                offsets[i] + b.ref_locs for i, b in enumerate(blocks)
                if b is not None
            ])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            return make_block(concat_values, concat_items, self.new_axes[0])
Esempio n. 4
0
    def _concat_blocks(self, blocks):

        values_list = [b.get_values() for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                # TODO: Either profile this piece or remove.
                # FIXME: Need to figure out how to test whether this line exists or does not...(unclear if even possible
                #        or maybe would require performance test)
                raise PandasError('dtypes are not consistent throughout '
                                  'DataFrames')
            return make_block(concat_values,
                              blocks[0].items,
                              self.new_axes[0],
                              placement=blocks[0]._ref_locs)
        else:

            offsets = np.r_[
                0, np.cumsum([len(x._data.axes[0]) for x in self.objs])]
            indexer = np.concatenate([
                offsets[i] + b.ref_locs for i, b in enumerate(blocks)
                if b is not None
            ])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            block = make_block(concat_values, concat_items, self.new_axes[0])

            # we need to set the ref_locs in this block so we have the mapping
            # as we now have a non-unique index across dtypes, and we need to
            # map the column location to the block location
            # GH3602
            if not self.new_axes[0].is_unique:
                block.set_ref_locs(indexer)

            return block
Esempio n. 5
0
    def _concat_blocks(self, blocks):

        values_list = [b.get_values() for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                # TODO: Either profile this piece or remove.
                # FIXME: Need to figure out how to test whether this line exists or does not...(unclear if even possible
                #        or maybe would require performance test)
                raise PandasError('dtypes are not consistent throughout '
                                  'DataFrames')
            return make_block(concat_values,
                              blocks[0].items,
                              self.new_axes[0],
                              placement=blocks[0]._ref_locs)
        else:

            offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for
                                          x in self.objs])]
            indexer = np.concatenate([offsets[i] + b.ref_locs
                                      for i, b in enumerate(blocks)
                                      if b is not None])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            block = make_block(concat_values, concat_items, self.new_axes[0])

            # we need to set the ref_locs in this block so we have the mapping
            # as we now have a non-unique index across dtypes, and we need to
            # map the column location to the block location
            # GH3602
            if not self.new_axes[0].is_unique:
                block.set_ref_locs(indexer)

            return block
Esempio n. 6
0
    def _concat_blocks(self, blocks):
        values_list = [b.values for b in blocks if b is not None]
        concat_values = com._concat_compat(values_list, axis=self.axis)

        if self.axis > 0:
            # Not safe to remove this check, need to profile
            if not _all_indexes_same([b.items for b in blocks]):
                raise Exception('dtypes are not consistent throughout '
                                'DataFrames')
            return make_block(concat_values, blocks[0].items, self.new_axes[0])
        else:

            offsets = np.r_[
                0, np.cumsum([len(x._data.axes[0]) for x in self.objs])]
            indexer = np.concatenate([
                offsets[i] + b.ref_locs for i, b in enumerate(blocks)
                if b is not None
            ])
            if self.ignore_index:
                concat_items = indexer
            else:
                concat_items = self.new_axes[0].take(indexer)

            if self.ignore_index:
                ref_items = self._get_fresh_axis()
                return make_block(concat_values, concat_items, ref_items)

            block = make_block(concat_values, concat_items, self.new_axes[0])

            # we need to set the ref_locs in this block so we have the mapping
            # as we now have a non-unique index across dtypes, and we need to
            # map the column location to the block location
            # GH3602
            if not self.new_axes[0].is_unique:
                block._ref_locs = indexer

            return block
Esempio n. 7
0
def _make_concat_multiindex(indexes, keys, levels=None, names=None):
    if ((levels is None and isinstance(keys[0], tuple)) or
            (levels is not None and len(levels) > 1)):
        zipped = lzip(*keys)
        if names is None:
            names = [None] * len(zipped)

        if levels is None:
            levels = [Categorical.from_array(
                zp, ordered=True).categories for zp in zipped]
        else:
            levels = [_ensure_index(x) for x in levels]
    else:
        zipped = [keys]
        if names is None:
            names = [None]

        if levels is None:
            levels = [_ensure_index(keys)]
        else:
            levels = [_ensure_index(x) for x in levels]

    if not _all_indexes_same(indexes):
        label_list = []

        # things are potentially different sizes, so compute the exact labels
        # for each level and pass those to MultiIndex.from_arrays

        for hlevel, level in zip(zipped, levels):
            to_concat = []
            for key, index in zip(hlevel, indexes):
                try:
                    i = level.get_loc(key)
                except KeyError:
                    raise ValueError('Key %s not in level %s'
                                     % (str(key), str(level)))

                to_concat.append(np.repeat(i, len(index)))
            label_list.append(np.concatenate(to_concat))

        concat_index = _concat_indexes(indexes)

        # these go at the end
        if isinstance(concat_index, MultiIndex):
            levels.extend(concat_index.levels)
            label_list.extend(concat_index.labels)
        else:
            factor = Categorical.from_array(concat_index, ordered=True)
            levels.append(factor.categories)
            label_list.append(factor.codes)

        if len(names) == len(levels):
            names = list(names)
        else:
            # make sure that all of the passed indices have the same nlevels
            if not len(set([idx.nlevels for idx in indexes])) == 1:
                raise AssertionError("Cannot concat indices that do"
                                     " not have the same number of levels")

            # also copies
            names = names + _get_consensus_names(indexes)

        return MultiIndex(levels=levels, labels=label_list, names=names,
                          verify_integrity=False)

    new_index = indexes[0]
    n = len(new_index)
    kpieces = len(indexes)

    # also copies
    new_names = list(names)
    new_levels = list(levels)

    # construct labels
    new_labels = []

    # do something a bit more speedy

    for hlevel, level in zip(zipped, levels):
        hlevel = _ensure_index(hlevel)
        mapped = level.get_indexer(hlevel)

        mask = mapped == -1
        if mask.any():
            raise ValueError('Values not found in passed level: %s'
                             % str(hlevel[mask]))

        new_labels.append(np.repeat(mapped, n))

    if isinstance(new_index, MultiIndex):
        new_levels.extend(new_index.levels)
        new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels])
    else:
        new_levels.append(new_index)
        new_labels.append(np.tile(np.arange(n), kpieces))

    if len(new_names) < len(new_levels):
        new_names.extend(new_index.names)

    return MultiIndex(levels=new_levels, labels=new_labels, names=new_names,
                      verify_integrity=False)
Esempio n. 8
0
def _make_concat_multiindex(indexes, keys, levels=None, names=None):

    if (levels is None
            and isinstance(keys[0], tuple)) or (levels is not None
                                                and len(levels) > 1):
        zipped = list(zip(*keys))
        if names is None:
            names = [None] * len(zipped)

        if levels is None:
            _, levels = _factorize_from_iterables(zipped)
        else:
            levels = [ensure_index(x) for x in levels]
    else:
        zipped = [keys]
        if names is None:
            names = [None]

        if levels is None:
            levels = [ensure_index(keys)]
        else:
            levels = [ensure_index(x) for x in levels]

    if not _all_indexes_same(indexes):
        codes_list = []

        # things are potentially different sizes, so compute the exact codes
        # for each level and pass those to MultiIndex.from_arrays

        for hlevel, level in zip(zipped, levels):
            to_concat = []
            for key, index in zip(hlevel, indexes):
                try:
                    i = level.get_loc(key)
                except KeyError:
                    raise ValueError(
                        "Key {key!s} not in level {level!s}".format(
                            key=key, level=level))

                to_concat.append(np.repeat(i, len(index)))
            codes_list.append(np.concatenate(to_concat))

        concat_index = _concat_indexes(indexes)

        # these go at the end
        if isinstance(concat_index, MultiIndex):
            levels.extend(concat_index.levels)
            codes_list.extend(concat_index.codes)
        else:
            codes, categories = _factorize_from_iterable(concat_index)
            levels.append(categories)
            codes_list.append(codes)

        if len(names) == len(levels):
            names = list(names)
        else:
            # make sure that all of the passed indices have the same nlevels
            if not len({idx.nlevels for idx in indexes}) == 1:
                raise AssertionError("Cannot concat indices that do"
                                     " not have the same number of levels")

            # also copies
            names = names + _get_consensus_names(indexes)

        return MultiIndex(levels=levels,
                          codes=codes_list,
                          names=names,
                          verify_integrity=False)

    new_index = indexes[0]
    n = len(new_index)
    kpieces = len(indexes)

    # also copies
    new_names = list(names)
    new_levels = list(levels)

    # construct codes
    new_codes = []

    # do something a bit more speedy

    for hlevel, level in zip(zipped, levels):
        hlevel = ensure_index(hlevel)
        mapped = level.get_indexer(hlevel)

        mask = mapped == -1
        if mask.any():
            raise ValueError(
                "Values not found in passed level: {hlevel!s}".format(
                    hlevel=hlevel[mask]))

        new_codes.append(np.repeat(mapped, n))

    if isinstance(new_index, MultiIndex):
        new_levels.extend(new_index.levels)
        new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes])
    else:
        new_levels.append(new_index)
        new_codes.append(np.tile(np.arange(n), kpieces))

    if len(new_names) < len(new_levels):
        new_names.extend(new_index.names)

    return MultiIndex(levels=new_levels,
                      codes=new_codes,
                      names=new_names,
                      verify_integrity=False)
Esempio n. 9
0
def _make_concat_multiindex(indexes, keys, levels=None, names=None):
    if (levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1):
        zipped = zip(*keys)
        if names is None:
            names = [None] * len(zipped)

        if levels is None:
            levels = [Factor.from_array(zp).levels for zp in zipped]
        else:
            levels = [_ensure_index(x) for x in levels]
    else:
        zipped = [keys]
        if names is None:
            names = [None]

        if levels is None:
            levels = [_ensure_index(keys)]
        else:
            levels = [_ensure_index(x) for x in levels]

    if not _all_indexes_same(indexes):
        label_list = []

        # things are potentially different sizes, so compute the exact labels
        # for each level and pass those to MultiIndex.from_arrays

        for hlevel, level in zip(zipped, levels):
            to_concat = []
            for key, index in zip(hlevel, indexes):
                i = level.get_loc(key)
                to_concat.append(np.repeat(i, len(index)))
            label_list.append(np.concatenate(to_concat))

        concat_index = _concat_indexes(indexes)

        # these go at the end
        if isinstance(concat_index, MultiIndex):
            levels.extend(concat_index.levels)
            label_list.extend(concat_index.labels)
        else:
            factor = Factor.from_array(concat_index)
            levels.append(factor.levels)
            label_list.append(factor.labels)

        # also copies
        names = names + _get_consensus_names(indexes)

        return MultiIndex(levels=levels, labels=label_list, names=names)

    new_index = indexes[0]
    n = len(new_index)
    kpieces = len(indexes)

    # also copies
    new_names = list(names)
    new_levels = list(levels)

    # construct labels
    new_labels = []

    # do something a bit more speedy

    for hlevel, level in zip(zipped, levels):
        mapped = level.get_indexer(hlevel)
        new_labels.append(np.repeat(mapped, n))

    if isinstance(new_index, MultiIndex):
        new_levels.extend(new_index.levels)
        new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels])
        new_names.extend(new_index.names)
    else:
        new_levels.append(new_index)
        new_names.append(new_index.name)
        new_labels.append(np.tile(np.arange(n), kpieces))

    return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
Esempio n. 10
0
def _make_concat_multiindex(indexes, keys, levels=None, names=None):
    if ((levels is None and isinstance(keys[0], tuple))
            or (levels is not None and len(levels) > 1)):
        zipped = zip(*keys)
        if names is None:
            names = [None] * len(zipped)

        if levels is None:
            levels = [Factor.from_array(zp).levels for zp in zipped]
        else:
            levels = [_ensure_index(x) for x in levels]
    else:
        zipped = [keys]
        if names is None:
            names = [None]

        if levels is None:
            levels = [_ensure_index(keys)]
        else:
            levels = [_ensure_index(x) for x in levels]

    if not _all_indexes_same(indexes):
        label_list = []

        # things are potentially different sizes, so compute the exact labels
        # for each level and pass those to MultiIndex.from_arrays

        for hlevel, level in zip(zipped, levels):
            to_concat = []
            for key, index in zip(hlevel, indexes):
                i = level.get_loc(key)
                to_concat.append(np.repeat(i, len(index)))
            label_list.append(np.concatenate(to_concat))

        concat_index = _concat_indexes(indexes)

        # these go at the end
        if isinstance(concat_index, MultiIndex):
            levels.extend(concat_index.levels)
            label_list.extend(concat_index.labels)
        else:
            factor = Factor.from_array(concat_index)
            levels.append(factor.levels)
            label_list.append(factor.labels)

        # also copies
        names = names + _get_consensus_names(indexes)

        return MultiIndex(levels=levels, labels=label_list, names=names)

    new_index = indexes[0]
    n = len(new_index)
    kpieces = len(indexes)

    # also copies
    new_names = list(names)
    new_levels = list(levels)

    # construct labels
    new_labels = []

    # do something a bit more speedy

    for hlevel, level in zip(zipped, levels):
        mapped = level.get_indexer(hlevel)
        new_labels.append(np.repeat(mapped, n))

    if isinstance(new_index, MultiIndex):
        new_levels.extend(new_index.levels)
        new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels])
        new_names.extend(new_index.names)
    else:
        new_levels.append(new_index)
        new_names.append(new_index.name)
        new_labels.append(np.tile(np.arange(n), kpieces))

    return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)