Beispiel #1
0
def read_byte_stream(
    client,
    fs: AbstractFileSystem,
    stream: ByteStream,
    path: str,
    chunk_size: int = CHUNK_SIZE,
):
    logger.info('start reading blob at %s', path)
    with fs.open(path, mode="rb") as f:
        try:
            total_size = f.size()
        except TypeError:
            total_size = f.size

        writer = stream.open_writer(client)
        try:
            begin, end = 0, total_size
            while begin < end:
                buffer = read_block(f, begin, min(chunk_size, end - begin))
                chunk = writer.next(len(buffer))
                vineyard.memory_copy(chunk, 0, buffer)
                begin += len(buffer)
        except Exception:
            report_exception()
            writer.fail()
            sys.exit(-1)

        writer.finish()
        return total_size
Beispiel #2
0
def read_stream_collections(
    client,
    fs: AbstractFileSystem,
    queue: "ConcurrentQueue[Tuple[ByteStream, str]]",
    base_prefix: str,
    prefix: str,
):
    metadata_path = os.path.join(prefix, 'metadata.json')
    blob_path = os.path.join(prefix, 'blob')
    if fs.exists(metadata_path):
        metadata = read_metadata(fs, metadata_path)
        streams = []
        for path in fs.listdir(prefix):
            if path['type'] == 'directory':
                streams.append(
                    read_stream_collections(
                        client, fs, queue, base_prefix, path['name']
                    )
                )
        stream_collection = StreamCollection.new(client, metadata, streams)
        return stream_collection.id
    else:
        # make a blob
        with fs.open(blob_path, 'rb') as f:
            try:
                total_size = f.size()
            except TypeError:
                total_size = f.size
            # create a stream
            stream = ByteStream.new(
                client,
                params={
                    StreamCollection.KEY_OF_PATH: os.path.relpath(
                        blob_path, base_prefix
                    ),
                    'length': total_size,
                },
            )
            queue.put((stream, blob_path))
            return stream.id
Beispiel #3
0
def read_metadata(fs: AbstractFileSystem, path: str) -> Dict:
    logger.info('start reading metadata at %s', path)
    with fs.open(path, mode="rb") as f:
        return json.loads(f.read().decode('utf-8', errors='ignore'))