Example #1
0
def merge_global_object(vineyard_endpoint,
                        results: List[List[ObjectID]]) -> ObjectID:
    if results is None or len(results) == 0:
        raise ValueError("No available sub objects to merge")

    chunks = []
    for subresults in results:
        chunks.extend(subresults)

    if len(chunks) == 0:
        raise ValueError("No available sub objects to merge")

    if len(chunks) == 1:
        # fastpath: no need to merge
        if not isinstance(chunks[0], ObjectID):
            return ObjectID(chunks[0])
        else:
            return chunks[0]

    vineyard_rpc_client = vineyard.connect(vineyard_endpoint)
    metadatas = []
    for chunk in chunks:
        if not isinstance(chunk, ObjectID):
            chunk = ObjectID(chunk)
        metadatas.append(vineyard_rpc_client.get_meta(chunk))

    chunkmap, isglobal = dict(), False
    for meta in metadatas:
        if meta.isglobal:
            isglobal = True
            for k, v in meta.items():
                if isinstance(v, ObjectMeta):
                    chunkmap[v.id] = k
        else:
            if isglobal:
                raise ValueError('Not all sub objects are global objects: %s' %
                                 results)

    if not isglobal:
        raise ValueError(
            "Unable to merge more than one non-global objects: %s" % results)

    base_meta = ObjectMeta()
    base_meta.set_global(True)
    for k, v in metadatas[0].items():
        if isinstance(v, ObjectMeta):
            continue
        if k in ['id', 'signature', 'instance_id']:
            continue
        base_meta[k] = v
    for v, k in chunkmap.items():
        base_meta.add_member(k, v)
    meta = vineyard_rpc_client.create_metadata(base_meta)
    vineyard_rpc_client.persist(meta.id)
    return meta.id
Example #2
0
def make_global_dataframe(client, blocks, extra_meta=None) -> ObjectMeta:
    meta = ObjectMeta()
    meta['typename'] = 'vineyard::GlobalDataFrame'
    meta.set_global(True)
    meta['partitions_-size'] = len(blocks)
    if extra_meta:
        for k, v in extra_meta.items():
            meta[k] = v

    for idx, block in enumerate(blocks):
        if not isinstance(block, (ObjectMeta, ObjectID, Object)):
            block = ObjectID(block)
        meta.add_member('partitions_-%d' % idx, block)

    gtensor_meta = client.create_metadata(meta)
    client.persist(gtensor_meta)
    return gtensor_meta
Example #3
0
def traverse_to_rebuild(client, stream_id: ObjectID, blobs: Dict[ObjectID,
                                                                 Blob]):
    stream = client.get(stream_id)
    if isinstance(stream, StreamCollection):
        fullpath = stream.meta[StreamCollection.KEY_OF_PATH]
        if fullpath:
            memberpath = fullpath.split(os.path.sep)[-1]
        else:
            memberpath = ''
        logger.info('rebuilding object %s as %s', fullpath, memberpath)
        meta = ObjectMeta()
        for k, v in stream.meta.items():
            # erase existing identifiers
            if k in [
                    'typename',
                    'id',
                    'signature',
                    'instance_id',
                    'transient',
                    StreamCollection.KEY_OF_GLOBAL,
                    StreamCollection.KEY_OF_PATH,
                    StreamCollection.KEY_OF_STREAMS,
            ]:
                continue
            if k == '__typename':
                meta['typename'] = v
            else:
                meta[k] = v
        isglobal = stream.meta[StreamCollection.KEY_OF_GLOBAL]
        if isglobal:
            meta.set_global(True)
        for s in stream.streams:
            name, member = traverse_to_rebuild(client, s, blobs)
            if isglobal:
                client.persist(member.id)
            meta.add_member(name, member)
        meta = client.create_metadata(meta)
        return memberpath, meta
    else:
        memberpath, blob = blobs[stream.id]
        return memberpath, blob