Example #1
0
def test_files_has_been_entangled_enough(monkeypatch):
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    monkeypatch.setattr(FakeRedisPipeline, "execute", mock_pipeline_get_block)
    monkeypatch.setattr(files.redis, "pipeline", get_me_a_fake_redis_pipeline)
    assert files.has_been_entangled_enough("path", 0) is True
    assert files.has_been_entangled_enough("path", 1) is True
    assert files.has_been_entangled_enough("path", 2) is False
Example #2
0
def test_files_get_provider_returns_empty_list_if_no_blocks(monkeypatch):
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    def return_empty_list():
        return []
    monkeypatch.setattr(files, "list_blocks", return_empty_list)
    result = files.get_blocks_from_provider("NonExistingProvider")
    assert isinstance(result, list)
    assert not result
Example #3
0
def test_files_list_blocks(monkeypatch):
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    def get_block_range(my_range, start, end):
        return ["block1", "block2", "block3"]
    monkeypatch.setattr(files.redis, "zrange", get_block_range)
    blocks = files.list_blocks()
    assert isinstance(blocks, list)
    assert len(blocks) == 3
    assert blocks == ["block1", "block2", "block3"]
Example #4
0
def audit():
    """
    Downloads and checks the integrity of the blocks stored on the storage nodes.
    Failing blocks that cannot be recovered by copying from a replica are grouped
    by documents and added to a queue returned by the function.
    Returns:
        dict(str, set(int)): Documents and the indices of the blocks that need
                              to be reconstructed
    """
    reconstruction_needed = []
    dispatcher = Dispatcher(get_dispatcher_configuration())
    files = Files(host="metadata")
    # List blocks
    blocks = files.get_blocks(files.list_blocks())
    # For each block
    for block in blocks:
        LOGGER.debug("Looking at block {:s}".format(block.key))
    #   For each replica of the block
        providers = list(set(block.providers))
        for provider_name in providers:
            LOGGER.debug("Looking at replica of {:s} on {:s}".format(block.key, provider_name))
    #       Download replica
            replica = dispatcher.providers[provider_name].get(block.key)
            computed_checksum = None
            if replica:
                computed_checksum = hashlib.sha256(replica).digest()
    #       If the replica does not match its checksum
            if not replica or computed_checksum != block.checksum:
                repaired = False
                if not replica:
                    LOGGER.warn("Could not load replica of {:s} on {:s}".format(block.key, provider_name))
                if computed_checksum:
                    LOGGER.warn("Replica of {:s} on {:s} does not match expected checksum (actual = {:s}, expected = {:s})".format(block.key, provider_name, convert_binary_to_hex_digest(computed_checksum), convert_binary_to_hex_digest(block.checksum)))
    #           Look for sane replicas
                other_providers = list(set(providers).difference(set([provider_name])))
                if other_providers:
                    for other_provider in other_providers:
                        candidate_replica = dispatcher.providers[other_provider].get(block.key)
                        if not candidate_replica:
                            continue
                        candidate_checksum = hashlib.sha256(candidate_replica).digest()
                        if candidate_checksum == block.checksum:
    #                       Copy the new valid replica
                            dispatcher.providers[provider_name].put(candidate_replica, block.key)
                            repaired = True
                            break
    #           Otherwise
                if not repaired:
    #               Queue the block for reconstruction
                    LOGGER.warn("Replica of {:s} on {:s} must be reconstructed".format(block.key, provider_name))
                    reconstruction_needed.append(block.key)
            else:
                LOGGER.debug("Replica of {:s} on {:s} is OK".format(block.key, provider_name))
    return group_blocks_by_path(reconstruction_needed)
Example #5
0
def test_files_get_provider(monkeypatch):
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    def return_block_names():
        return ["doc-00"]
    monkeypatch.setattr(files, "list_blocks", return_block_names)
    def return_fake_blocks(block_list):
        return [MetaBlock(key, providers=["FakeProvider"]) for key in block_list]
    monkeypatch.setattr(files, "get_blocks", return_fake_blocks)
    result = files.get_blocks_from_provider("FakeProvider")
    assert isinstance(result, list)
    assert len(result) == 1
Example #6
0
def test_files_exists(monkeypatch):
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    def always_true(_):
        return True
    def always_false(_):
        return False
    monkeypatch.setattr(files.redis, "exists", always_false)
    assert files.exists("path") == False
    monkeypatch.delattr(files.redis, "exists")
    monkeypatch.setattr(files.redis, "exists", always_true)
    assert files.exists("path") == True
Example #7
0
def get_block_metadata(path, index):
    """
    Returns the block metadata
    Args:
        path(str): Path to the file
        index(int): Index of the block in the file
    Returns:
        MetaBlock: Block metadata
    """
    files = Files()
    key = compute_block_key(path, index)
    return files.get_block(key)
Example #8
0
def test_files_exists_bad_path(monkeypatch):
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    error_message = "path argument must be a non empty string"
    with pytest.raises(ValueError, match=error_message):
        files.exists(None)
    with pytest.raises(ValueError, match=error_message):
        files.exists("")
    with pytest.raises(ValueError, match=error_message):
        files.exists({"a": 0})
    with pytest.raises(ValueError, match=error_message):
        files.exists([])
Example #9
0
def find_pointing_documents(path, index):
    """
    Returns the Metadata of the documents that use a given block as a pointer.
    Args:
        path(str): Path to the file
        index(int): Index of the block in the file
    Returns:
        list(Metadata): List of documents that used the block as a pointer
    """
    block_metadata = get_block_metadata(path, index)
    files = Files()
    pointing_documents = files.get_files(block_metadata.entangled_with)
    return pointing_documents
Example #10
0
def reconstruct_as_pointer(path, index):
    """
    Reconstruct a block by reassembling a codeword it was part of and plucking it
    from the result.
    Args:
        path(str): Path to the file the block belongs to
        index(int): Index of the block
    Returns:
        bytes: The constructed block
    Raises:
        ValueError: If path is not a string or is empty or if the index is
                    negative
        RuntimeError: If the block has never been pointed at
    """
    if not isinstance(path, str) or not path:
        raise ValueError("path argument must be a non empty string")
    if not isinstance(index, int) or index < 0:
        raise ValueError("index argument must be an integer greater or equal to 0")
    metablock = get_block_metadata(path, index)
    documents = find_pointing_documents(path, index)
    if not documents:
        raise RuntimeError("Could not find any pointing document")
    with open(os.path.join(os.path.dirname(__file__), "..", "dispatcher.json"), "r") as handle:
        entanglement_configuration = json.load(handle)["entanglement"]["configuration"]
    source_blocks = entanglement_configuration["s"]
    pointer_blocks = entanglement_configuration["t"]
    offset = source_blocks  + pointer_blocks
    files = Files()
    for document in documents:
        index_to_reconstruct = get_position_in_codeword(document, metablock)
        coder_client = CoderClient()
        reconstructed_block = coder_client.reconstruct(document.path, [index_to_reconstruct])[index_to_reconstruct].data
        elements = split_strip_header(reconstructed_block)
        fragment_header = elements[2]
        metadata = files.get(path)
        fragment_header.metadata.index = offset + index
        fragment_size = int(math.ceil(metadata.original_size / float(source_blocks)))
        if (fragment_size % 2) == 1:
            fragment_size += 1
        fragment_header.metadata.size = fragment_size
        fragment_header.metadata.orig_data_size = fragment_size * (offset)
        brand_new = json.dumps(metadata.entangling_blocks) + HEADER_DELIMITER + \
                    str(metadata.original_size) + HEADER_DELIMITER + \
                    fragment_header.pack() + \
                    get_fragment(reconstructed_block)
        brand_new = brand_new[:metablock.size]
        computed_checksum = hashlib.sha256(brand_new).digest()
        if computed_checksum == metablock.checksum:
            return brand_new
    raise RuntimeError("Could not reconstruct {:s} from pointing documents".format(metablock.key))
Example #11
0
def seed_system():
    """
    Checks if the system has enough pointers at disposal and creates extra anchor files if needed.
    """
    seed_logger = logging.getLogger("seed")
    seed_logger.info("Checking if the system needs to be seeded")
    with open(os.path.join(os.path.dirname(__file__),
                           "./dispatcher.json")) as handle:
        configuration = json.load(handle)
    if "entanglement" not in configuration or "type" not in configuration["entanglement"] or \
            configuration["entanglement"]["type"] != "step":
        seed_logger.info("No need to see the system")
        return
    files = Files(host="metadata")
    pointers_available = files.get_number_of_blocks_available()
    pointers_needed = configuration["entanglement"]["configuration"]["t"]
    difference = pointers_needed - pointers_available
    if difference <= 0:
        seed_logger.info("Seeding done")
        return

    storage = offline.Storage("/tmp/")
    parities = configuration["entanglement"]["configuration"]["p"]
    driver = pyproxy.coder.entangled_driver.StepEntangler(
        storage, 1, 0, parities)
    raw_block = pyproxy.coder.entangled_driver.pad("", 1024 * 1024)
    coded_strips = driver.encode(raw_block)
    seed_logger.info("Creating {:d} anchoring blocks".format(difference))
    documents_needed = int(math.ceil(float(pointers_needed) / parities))
    for index in xrange(documents_needed):
        path = "anchor-{:d}".format(index)
        strips = []
        for block_index, coded_strip in enumerate(coded_strips):
            strip = Strip()
            strip.id = "{:s}-{:02d}".format(path, block_index)
            strip.data = coded_strip
            strip.checksum = hashlib.sha256(coded_strip).digest()
            strip.type = Strip.DATA
            strips.append(strip)
        encoded_file = playcloud_pb2.File(path=path,
                                          strips=strips,
                                          original_size=len(raw_block))
        metadata = DISPATCHER.put(path, encoded_file)
        metadata.entangling_blocks = extract_entanglement_data(
            encoded_file.strips[0].data)
        FILES.put(path, metadata)

    seed_logger.info("Seeding done")
Example #12
0
def test_files_get(monkeypatch):
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    def mock_pipeline_get(path):
        return [{
            "path": "path",
            "original_size": "0",
            "creation_date": "2017-01-01 00:00:00.42",
            "blocks": "",
            "providers": "",
            "block_type": BlockType.DATA.name,
            "entangling_blocks": "[]"
        }]
    monkeypatch.setattr(FakeRedisPipeline, "execute", mock_pipeline_get)
    monkeypatch.setattr(files.redis, "pipeline", get_me_a_fake_redis_pipeline)
    metadata = files.get("path")
    assert metadata.path == "path"
Example #13
0
def repair(path, indices):
    """
    Repairs one or multiple blocks of a document
    Args:
        path(str): Path to the document
        indices(list(int)): Indices of the blocks to retrieve
    """
    if not isinstance(path, str) or not path:
        raise ValueError("Argument path must be a non-empty string")
    if not isinstance(indices, list):
        raise ValueError("Argument indices must be list")
    if not indices:
        return
    for index, index_to_reconstruct in enumerate(indices):
        if not isinstance(index_to_reconstruct, int):
            error_message = "indices[{:d}] is not an integer".format(index)
            raise ValueError(error_message)
    erasures_threshold = get_erasure_threshold()
    dispatcher = Dispatcher(get_dispatcher_configuration())
    files = Files()
    if len(indices) <= erasures_threshold:
        reconstructed_blocks = reconstruct_with_RS(path, indices)
        for index in reconstructed_blocks:
            reconstructed_block = reconstructed_blocks[index]
            metablock = files.get_block(compute_block_key(path, index))
            for provider_name in set(metablock.providers):
                dispatcher.providers[provider_name].put(reconstructed_block,
                                                        metablock.key)
    else:
        #FIXME order of blocks to reach e erasures than do RS reconstuction
        indices.sort()
        while len(indices) > erasures_threshold:
            index = indices.pop(0)
            reconstructed_block = reconstruct_as_pointer(path, index)
            metablock = files.get_block(compute_block_key(path, index))
            for provider_name in set(metablock.providers):
                dispatcher.providers[provider_name].put(reconstructed_block,
                                                        metablock.key)
        reconstructed_blocks = reconstruct_with_RS(path, indices)
        for index in reconstructed_blocks:
            reconstructed_block = reconstructed_blocks[index]
            metablock = files.get_block(compute_block_key(path, index))
            for provider_name in set(metablock.providers):
                dispatcher.providers[provider_name].put(reconstructed_block,
                                                        metablock.key)
Example #14
0
def test_files_has_been_entangled_enough_raises_ValueError_if_pointers_is_lower_than_0():
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    with pytest.raises(ValueError, match="pointers argument must be a valid integer greater or equal to 0"):
        files.has_been_entangled_enough("path", -1)
Example #15
0
def test_files_has_been_entangled_enough_raises_ValueError_if_block_key_is_None():
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    with pytest.raises(ValueError, match="path argument must be a valid non-empty string"):
        files.has_been_entangled_enough(None, 1)
Example #16
0
def test_files_put_raises_ValueError_if_path_is_empty():
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    with pytest.raises(ValueError, match="path argument must be a valid non-empty string"):
        files.put("", MetaDocument("path"))
Example #17
0
def test_files_put_raises_ValueError_if_metadata_is_None():
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    with pytest.raises(ValueError, match="metadata argument must be a valid Metadata object"):
        files.put("path", None)
Example #18
0
def test_files_get_raises_keyerror_when_path_not_in_files(monkeypatch):
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    monkeypatch.setattr(files.redis, "pipeline", get_me_a_fake_redis_pipeline)
    with pytest.raises(KeyError, match="path NonExistingKey not found"):
        files.get("NonExistingKey")
Example #19
0
def test_files_get_raises_valueerror_when_path_is_empty():
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    with pytest.raises(ValueError, match="path argument must be a valid non-empty string"):
        files.get("")
Example #20
0
def test_files_has_been_entangled_enough_raises_KeyError_if_block_key_does_not_match_any_existing_key(monkeypatch):
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    monkeypatch.setattr(files.redis, "pipeline", get_me_a_fake_redis_pipeline)
    with pytest.raises(KeyError, match="key {:s} not found".format("NonExistingKey")):
        files.has_been_entangled_enough("NonExistingKey", 2)
Example #21
0
def test_files_get_provider_with_empty_string_as_provider():
    with mock.patch("socket.gethostbyname", return_value="127.0.0.1"):
        files = Files()
    with pytest.raises(ValueError, match="provider argument must be a non empty string"):
        files.get_blocks_from_provider("")
Example #22
0
                       os.path.join(os.path.dirname(__file__), "logging.conf"))
logging.config.fileConfig(LOG_CONFIG)
LOGGER = logging.getLogger("proxy")

# GRPC setup
DEFAULT_GRPC_TIMEOUT_IN_SECONDS = 60
GRPC_MESSAGE_SIZE = (2 * 1024 * 1024 * 1024) - 1  # 2^31 - 1
GRPC_OPTIONS = [("grpc.max_receive_message_length", GRPC_MESSAGE_SIZE),
                ("grpc.max_send_message_length", GRPC_MESSAGE_SIZE)]
CODER_CLIENT = pyproxy.coder_client.LocalCoderClient()

# Loading dispatcher
DISPATCHER = pyproxy.pyproxy_globals.get_dispatcher_instance()

# Loading the Files metadata structure
FILES = Files()

# Bottle webapp configuration
bottle.BaseRequest.MEMFILE_MAX = 1024 * 1024 * 1024
APP = bottle.app()

# Setup kazoo
KAZOO = None
HOSTNAME = os.uname()[1]

#FIXME persist empty files metadata
EMPTY_FILES = set()


@APP.route("/<key:path>/__meta", method="GET")
def get_file_metadata(key):