def pandas_series_resolver(obj, resolver): meta = obj.meta name = json.loads(meta['name']) index = resolver.run(obj.member('index_')) np_value = resolver.run(obj.member('value_')) block = Block(np_value, slice(0, len(np_value), 1), ndim=1) return pd.Series(SingleBlockManager(block, index), name=name)
def pandas_dataframe_resolver(obj, resolver): meta = obj.meta columns = from_json(meta['columns_']) if not columns: return pd.DataFrame() names = [] # ensure zero-copy blocks = [] index_size = 0 for idx, _ in enumerate(columns): names.append(from_json(meta['__values_-key-%d' % idx])) np_value = resolver.run(obj.member('__values_-value-%d' % idx)) index_size = len(np_value) # ndim: 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame if BlockPlacement: placement = BlockPlacement(slice(idx, idx + 1, 1)) else: placement = slice(idx, idx + 1, 1) if DatetimeArray is not None and isinstance(np_value, DatetimeArray): values = np_value.reshape(1, -1) setattr(values, '__vineyard_ref', getattr(np_value, '__vineyard_ref', None)) block = DatetimeLikeBlock(values, placement, ndim=2) else: values = np.expand_dims(np_value, 0).view(ndarray) setattr(values, '__vineyard_ref', getattr(np_value, '__vineyard_ref', None)) block = Block(values, placement, ndim=2) blocks.append(block) if 'index_' in meta: index = resolver.run(obj.member('index_')) else: index = pd.RangeIndex(index_size) return pd.DataFrame(BlockManager(blocks, [ensure_index(names), index]))
def pandas_dataframe_resolver(obj, resolver): meta = obj.meta columns = from_json(meta['columns_']) if not columns: return pd.DataFrame() # ensure zero-copy blocks = [] index_size = 0 for idx, name in enumerate(columns): np_value = resolver.run(obj.member('__values_-value-%d' % idx)) index_size = len(np_value) # ndim: 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame if BlockPlacement: placement = BlockPlacement(slice(idx, idx + 1, 1)) else: placement = slice(idx, idx + 1, 1) values = np.expand_dims(np_value, 0).view(ndarray) setattr(values, '__vineyard_ref', getattr(np_value, '__vineyard_ref', None)) blocks.append(Block(values, placement, ndim=2)) if 'index_' in meta: index = resolver.run(obj.member('index_')) else: index = pd.RangeIndex(index_size) return pd.DataFrame(BlockManager(blocks, [pd.Index(columns), index]))
def pandas_series_resolver(obj, resolver): meta = obj.meta name = from_json(meta['name']) index = resolver.run(obj.member('index_')) np_value = resolver.run(obj.member('value_')) if BlockPlacement: placement = BlockPlacement(slice(0, len(np_value), 1)) else: placement = slice(0, len(np_value), 1) block = Block(np_value, placement, ndim=1) return pd.Series(SingleBlockManager(block, index), name=name)
def dataframe_resolver(obj, resolver): meta = obj.meta columns = json.loads(meta['columns_']) if not columns: return pd.DataFrame() # ensure zero-copy blocks = [] index_size = 0 for idx, name in enumerate(columns): np_value = resolver.run(obj.member('__values_-value-%d' % idx)) blocks.append( Block(np.expand_dims(np_value, 0), slice(idx, idx + 1, 1))) index_size = len(np_value) return pd.DataFrame(BlockManager(blocks, [columns, np.arange(index_size)]))
def pandas_dataframe_resolver(obj, resolver): meta = obj.meta columns = from_json(meta['columns_']) index = resolver.run(obj.member('index_')) if not columns: return pd.DataFrame() # ensure zero-copy blocks = [] for idx, name in enumerate(columns): np_value = resolver.run(obj.member('__values_-value-%d' % idx)) # ndim: 1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame blocks.append( Block(np.expand_dims(np_value, 0), slice(idx, idx + 1, 1), ndim=2)) return pd.DataFrame(BlockManager(blocks, [columns, index]))
def execute(cls, ctx, op): if vineyard is None: raise RuntimeError('vineyard is not available') client = vineyard.connect(op.vineyard_socket) # chunk has no tensor chunk df_chunk = client.get(op.object_id) if not df_chunk.columns: ctx[op.outputs[0].key] = pd.DataFrame() else: # ensure zero-copy blocks = [] index_size = 0 for idx, name in enumerate(df_chunk.columns): value = df_chunk[name].numpy() blocks.append( Block(np.expand_dims(value, 0), slice(idx, idx + 1, 1))) index_size = len(value) ctx[op.outputs[0].key] = pd.DataFrame( BlockManager( blocks, [df_chunk.columns, np.arange(index_size)]))