def _concat_blocks(self, blocks): values_list = [b.values for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): raise Exception('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0]) else: offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) return make_block(concat_values, concat_items, self.new_axes[0])
def _concat_blocks(self, blocks): values_list = [b.values for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): raise Exception("dtypes are not consistent throughout " "DataFrames") return make_block(concat_values, blocks[0].items, self.new_axes[0]) else: offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) block = make_block(concat_values, concat_items, self.new_axes[0]) # we need to set the ref_locs in this block so we have the mapping # as we now have a non-unique index across dtypes, and we need to # map the column location to the block location # GH3602 if not self.new_axes[0].is_unique: block._ref_locs = indexer return block
def _concat_blocks(self, blocks): values_list = [b.values for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): raise Exception('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0]) else: offsets = np.r_[ 0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([ offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None ]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) return make_block(concat_values, concat_items, self.new_axes[0])
def _concat_blocks(self, blocks): values_list = [b.get_values() for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): # TODO: Either profile this piece or remove. # FIXME: Need to figure out how to test whether this line exists or does not...(unclear if even possible # or maybe would require performance test) raise PandasError('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0], placement=blocks[0]._ref_locs) else: offsets = np.r_[ 0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([ offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None ]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) block = make_block(concat_values, concat_items, self.new_axes[0]) # we need to set the ref_locs in this block so we have the mapping # as we now have a non-unique index across dtypes, and we need to # map the column location to the block location # GH3602 if not self.new_axes[0].is_unique: block.set_ref_locs(indexer) return block
def _concat_blocks(self, blocks): values_list = [b.get_values() for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): # TODO: Either profile this piece or remove. # FIXME: Need to figure out how to test whether this line exists or does not...(unclear if even possible # or maybe would require performance test) raise PandasError('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0], placement=blocks[0]._ref_locs) else: offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) block = make_block(concat_values, concat_items, self.new_axes[0]) # we need to set the ref_locs in this block so we have the mapping # as we now have a non-unique index across dtypes, and we need to # map the column location to the block location # GH3602 if not self.new_axes[0].is_unique: block.set_ref_locs(indexer) return block
def _concat_blocks(self, blocks): values_list = [b.values for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): raise Exception('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0]) else: offsets = np.r_[ 0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([ offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None ]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) block = make_block(concat_values, concat_items, self.new_axes[0]) # we need to set the ref_locs in this block so we have the mapping # as we now have a non-unique index across dtypes, and we need to # map the column location to the block location # GH3602 if not self.new_axes[0].is_unique: block._ref_locs = indexer return block
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): zipped = lzip(*keys) if names is None: names = [None] * len(zipped) if levels is None: levels = [Categorical.from_array( zp, ordered=True).categories for zp in zipped] else: levels = [_ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [_ensure_index(keys)] else: levels = [_ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] # things are potentially different sizes, so compute the exact labels # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): try: i = level.get_loc(key) except KeyError: raise ValueError('Key %s not in level %s' % (str(key), str(level))) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) label_list.extend(concat_index.labels) else: factor = Categorical.from_array(concat_index, ordered=True) levels.append(factor.categories) label_list.append(factor.codes) if len(names) == len(levels): names = list(names) else: # make sure that all of the passed indices have the same nlevels if not len(set([idx.nlevels for idx in indexes])) == 1: raise AssertionError("Cannot concat indices that do" " not have the same number of levels") # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names, verify_integrity=False) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct labels new_labels = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): hlevel = _ensure_index(hlevel) mapped = level.get_indexer(hlevel) mask = mapped == -1 if mask.any(): raise ValueError('Values not found in passed level: %s' % str(hlevel[mask])) new_labels.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) else: new_levels.append(new_index) new_labels.append(np.tile(np.arange(n), kpieces)) if len(new_names) < len(new_levels): new_names.extend(new_index.names) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, verify_integrity=False)
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if (levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1): zipped = list(zip(*keys)) if names is None: names = [None] * len(zipped) if levels is None: _, levels = _factorize_from_iterables(zipped) else: levels = [ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [ensure_index(keys)] else: levels = [ensure_index(x) for x in levels] if not _all_indexes_same(indexes): codes_list = [] # things are potentially different sizes, so compute the exact codes # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): try: i = level.get_loc(key) except KeyError: raise ValueError( "Key {key!s} not in level {level!s}".format( key=key, level=level)) to_concat.append(np.repeat(i, len(index))) codes_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) codes_list.extend(concat_index.codes) else: codes, categories = _factorize_from_iterable(concat_index) levels.append(categories) codes_list.append(codes) if len(names) == len(levels): names = list(names) else: # make sure that all of the passed indices have the same nlevels if not len({idx.nlevels for idx in indexes}) == 1: raise AssertionError("Cannot concat indices that do" " not have the same number of levels") # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, codes=codes_list, names=names, verify_integrity=False) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct codes new_codes = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): hlevel = ensure_index(hlevel) mapped = level.get_indexer(hlevel) mask = mapped == -1 if mask.any(): raise ValueError( "Values not found in passed level: {hlevel!s}".format( hlevel=hlevel[mask])) new_codes.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) else: new_levels.append(new_index) new_codes.append(np.tile(np.arange(n), kpieces)) if len(new_names) < len(new_levels): new_names.extend(new_index.names) return MultiIndex(levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False)
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if (levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1): zipped = zip(*keys) if names is None: names = [None] * len(zipped) if levels is None: levels = [Factor.from_array(zp).levels for zp in zipped] else: levels = [_ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [_ensure_index(keys)] else: levels = [_ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] # things are potentially different sizes, so compute the exact labels # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): i = level.get_loc(key) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) label_list.extend(concat_index.labels) else: factor = Factor.from_array(concat_index) levels.append(factor.levels) label_list.append(factor.labels) # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct labels new_labels = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): mapped = level.get_indexer(hlevel) new_labels.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) new_names.extend(new_index.names) else: new_levels.append(new_index) new_names.append(new_index.name) new_labels.append(np.tile(np.arange(n), kpieces)) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): zipped = zip(*keys) if names is None: names = [None] * len(zipped) if levels is None: levels = [Factor.from_array(zp).levels for zp in zipped] else: levels = [_ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [_ensure_index(keys)] else: levels = [_ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] # things are potentially different sizes, so compute the exact labels # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): i = level.get_loc(key) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) label_list.extend(concat_index.labels) else: factor = Factor.from_array(concat_index) levels.append(factor.levels) label_list.append(factor.labels) # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct labels new_labels = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): mapped = level.get_indexer(hlevel) new_labels.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) new_names.extend(new_index.names) else: new_levels.append(new_index) new_names.append(new_index.name) new_labels.append(np.tile(np.arange(n), kpieces)) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)