def execute(cls, ctx, op): if vineyard is None: raise RuntimeError("vineyard is not available") socket, needs_put = resolve_vineyard_socket(ctx, op) client = vineyard.connect(socket) # some op might be fused and executed twice on different workers if not needs_put: # might be fused try: # pragma: no cover meta = ctx.get_chunks_meta([op.inputs[0].key])[0] df_id = vineyard.ObjectID(meta["object_ref"]) if not client.exists(df_id): needs_put = True except KeyError: needs_put = True if needs_put: df_id = client.put(ctx[op.inputs[0].key], partition_index=op.inputs[0].index) else: # pragma: no cover meta = client.get_meta(df_id) new_meta = vineyard.ObjectMeta() for k, v in meta.items(): if k not in ["id", "signature", "instance_id"]: if isinstance(v, vineyard.ObjectMeta): new_meta.add_member(k, v) else: new_meta[k] = v new_meta["partition_index_"] = to_json(op.inputs[0].index) df_id = client.create_metadata(new_meta).id client.persist(df_id) ctx[op.outputs[0].key] = pd.DataFrame({0: [df_id]})
def torch_tensor_builder(client, value, **kw): meta = ObjectMeta() meta['typename'] = 'vineyard::Tensor' meta['partition_index_'] = to_json(kw.get('partition_index', [])) data = value data = DataLoader(data, batch_size=len(value)) for x, y in data: meta.add_member('buffer_data_', build_numpy_buffer(client, x.numpy())) meta.add_member('buffer_label_', build_numpy_buffer(client, y.numpy())) meta['data_shape_'] = to_json(x.numpy().shape) meta['label_shape_'] = to_json(y.numpy().shape) meta['data_type_'] = x.numpy().dtype.name meta['label_type_'] = y.numpy().dtype.name meta['data_type_meta_'] = x.numpy().dtype.str meta['label_type_meta_'] = y.numpy().dtype.str return client.create_metadata(meta)
def dali_tensor_builder(client, value, **kw): assert dali is not None, "Nvidia DALI is not available" meta = ObjectMeta() meta['typename'] = 'vineyard::Tensor' meta['partition_index_'] = to_json(kw.get('partition_index', [])) data = np.array(value[0]) label = np.array(value[1]) meta.add_member('buffer_data_', build_numpy_buffer(client, data)) meta.add_member('buffer_label_', build_numpy_buffer(client, label)) meta['data_shape_'] = to_json(data.shape) meta['label_shape_'] = to_json(label.shape) meta['data_type_'] = data.dtype.name meta['label_type_'] = label.dtype.name meta['data_type_meta_'] = data.dtype.str meta['label_type_meta_'] = label.dtype.str return client.create_metadata(meta)
def execute(cls, ctx, op): if vineyard is None: raise RuntimeError('vineyard is not available') socket, needs_put = resolve_vineyard_socket(ctx, op) client = vineyard.connect(socket) # some op might be fused and executed twice on different workers if not needs_put: # might be fused try: # pragma: no cover meta = ctx.get_chunks_meta([op.inputs[0].key])[0] tensor_id = vineyard.ObjectID(meta['object_ref']) if not client.exists(tensor_id): needs_put = True except KeyError: needs_put = True if needs_put: tensor_id = client.put(ctx[op.inputs[0].key], partition_index=op.inputs[0].index) else: # pragma: no cover meta = client.get_meta(tensor_id) new_meta = vineyard.ObjectMeta() for k, v in meta.items(): if k not in ['id', 'signature', 'instance_id']: if isinstance(v, vineyard.ObjectMeta): new_meta.add_member(k, v) else: new_meta[k] = v new_meta['partition_index_'] = to_json(op.inputs[0].index) tensor_id = client.create_metadata(new_meta).id client.persist(tensor_id) holder = np.empty((1, ), dtype=object) holder[0] = tensor_id ctx[op.outputs[0].key] = holder
def tf_tensor_builder(client, value, **kw): meta = ObjectMeta() meta['typename'] = 'vineyard::Tensor' meta['num'] = to_json(len(value)) meta['partition_index_'] = to_json(kw.get('partition_index', [])) data = value data = value.batch(len(value)) for i in data: meta.add_member('buffer_data_', build_numpy_buffer(client, i[0].numpy())) meta.add_member('buffer_label_', build_numpy_buffer(client, i[1].numpy())) meta['data_shape_'] = to_json(i[0].numpy().shape) meta['label_shape_'] = to_json(i[1].numpy().shape) meta['data_type_'] = i[0].numpy().dtype.name meta['label_type_'] = i[1].numpy().dtype.name meta['data_type_meta_'] = i[0].numpy().dtype.str meta['label_type_meta_'] = i[1].numpy().dtype.str return client.create_metadata(meta)
def mxnet_dataframe_builder(client, value, builder, **kw): meta = ObjectMeta() meta['typename'] = 'vineyard::DataFrame' cols = kw.get('cols') label = kw.get('label') meta['label'] = to_json(label) meta['columns_'] = to_json(cols) meta['__values_-key-%d' % (len(cols) - 1)] = to_json(label) meta.add_member('__values_-value-%d' % (len(cols) - 1), builder.run(client, value[1])) for i in range(len(cols) - 1): meta['__values_-key-%d' % i] = to_json(cols[i]) meta.add_member('__values_-value-%d' % i, builder.run(client, value[0][:, i])) meta['__values_-size'] = len(cols) meta['partition_index_row_'] = kw.get('partition_index', [0, 0])[0] meta['partition_index_column_'] = kw.get('partition_index', [0, 0])[1] meta['row_batch_index_'] = kw.get('row_batch_index', 0) return client.create_metadata(meta)
def tf_dataframe_builder(client, value, builder, **kw): meta = ObjectMeta() meta['typename'] = 'vineyard::DataFrame' for feat, labels in value.take(1): cols = list(feat.keys()) cols.append('label') meta['columns_'] = to_json(cols) for i in range(len(cols)): ls = [] for feat, labels in value.take(len(value)): if cols[i] == 'label': ls.append(labels.numpy()) else: ls.append(feat[cols[i]].numpy()) meta['__values_-key-%d' % i] = to_json(cols[i]) meta.add_member('__values_-value-%d' % i, builder.run(client, ls)) meta['__values_-size'] = len(cols) meta['partition_index_row_'] = kw.get('partition_index', [0, 0])[0] meta['partition_index_column_'] = kw.get('partition_index', [0, 0])[1] meta['row_batch_index_'] = kw.get('row_batch_index', 0) return client.create_metadata(meta)
def torch_dataframe_builder(client, value, builder, **kw): meta = ObjectMeta() meta['typename'] = 'vineyard::DataFrame' cols = kw.get('cols') label = kw.get('label') meta['label'] = to_json(label) meta['columns_'] = to_json(cols) for i in range(len(cols)): ls = [] for x, y in value: if cols[i] == label: ls.append(y.numpy()) else: ls.append(x[i].numpy()) meta['__values_-key-%d' % i] = to_json(cols[i]) meta.add_member('__values_-value-%d' % i, builder.run(client, ls)) meta['__values_-size'] = len(cols) meta['partition_index_row_'] = kw.get('partition_index', [0, 0])[0] meta['partition_index_column_'] = kw.get('partition_index', [0, 0])[1] meta['row_batch_index_'] = kw.get('row_batch_index', 0) return client.create_metadata(meta)
def mars_sparse_matrix_builder(client, value, builder, **kw): meta = ObjectMeta() meta['typename'] = 'vineyard::SparseMatrix<%s>' % value.dtype.name meta['shape_'] = to_json(value.shape) meta.add_member('spmatrix', builder.run(client, value.spmatrix, **kw)) return client.create_metadata(meta)