Example #1
0
def test_update_failed(some_data_set: DownloadableContent):
    FetcherResult(FetcherStatus.FAILED, FetchedType.FILE,
                  ERROR).update(some_data_set)

    assert not some_data_set.dst
    assert some_data_set.type == FetchedType.FILE
    assert some_data_set.message == ERROR
Example #2
0
        def on_content_locked(content: DownloadableContent, lock: RWLock):
            def _on_done_and_unlock(content: DownloadableContent):
                on_done(content)
                self._download_dispatcher.cleanup(content, event)
                lock.release()

            try:
                content.size_info = self._size_estimator(content.src)
            except Exception as e:
                msg = f"Failed to estimate the size of content {content.src}: {str(e)}"
                logger.exception(f"{msg}")
                FetcherResult(FetcherStatus.FAILED, None, msg).update(content)
                on_done(content)
                lock.release()
                return

            # This node will be killed if I die
            zk_node_path = self._get_node_path(event.client_id, event.action_id, content)
            self._zk.create(zk_node_path, DownloadManager.INITIAL_DATA, ephemeral=True, makepath=True)

            self.__handle_node_state(zk_node_path, _on_done_and_unlock, content)

            content.size_info = self._size_estimator(content.src)

            self._download_dispatcher.dispatch_fetch(content, event, zk_node_path)
Example #3
0
def test_fetcher_updates_zk(mock_http_to_s3, mock_update_zk_node):
    cfg = FetcherJobConfig(HTTP_SRC, DST, zk_node_path=ZK_NODE_PATH, zookeeper_ensemble_hosts=ZK_ENSEMBLE)

    retrying_fetch(cfg)

    mock_update_zk_node.assert_called_with(
        ZK_NODE_PATH, ZK_ENSEMBLE, FetcherResult(FetcherStatus.DONE, FetchedType.FILE, SUCCESS_MESSAGE)
    )
Example #4
0
def test_fetcher_updates_zk_fail(mock_http_to_s3_client_error, mock_update_zk_node):
    cfg = FetcherJobConfig(HTTP_SRC, DST, zk_node_path=ZK_NODE_PATH, zookeeper_ensemble_hosts=ZK_ENSEMBLE)

    retrying_fetch(cfg)

    mock_update_zk_node.assert_called_with(
        ZK_NODE_PATH, ZK_ENSEMBLE, FetcherResult(status=FetcherStatus.FAILED, message=FILE_NOT_FOUND)
    )
Example #5
0
def test_fetcher_updates_zk_with_directory(mock_s3_to_s3, mock_update_zk_node):
    # It's a directory
    mock_s3_to_s3.return_value = FetchedType.DIRECTORY

    cfg = FetcherJobConfig(S3_SRC, DST, zk_node_path=ZK_NODE_PATH, zookeeper_ensemble_hosts=ZK_ENSEMBLE)

    retrying_fetch(cfg)

    mock_update_zk_node.assert_called_with(
        ZK_NODE_PATH, ZK_ENSEMBLE, FetcherResult(FetcherStatus.DONE, FetchedType.DIRECTORY, SUCCESS_MESSAGE)
    )
Example #6
0
def test_fetcher_updates_zk_once(mock_http_to_s3_server_error, mock_update_zk_node):
    cfg = FetcherJobConfig(
        HTTP_SRC,
        DST,
        zk_node_path=ZK_NODE_PATH,
        zookeeper_ensemble_hosts=ZK_ENSEMBLE,
        retry=RetryConfig(max_attempts=1),
    )

    retrying_fetch(cfg)

    mock_update_zk_node.assert_called_with(
        ZK_NODE_PATH, ZK_ENSEMBLE, FetcherResult(status=FetcherStatus.FAILED, message=SERVER_ERROR)
    )
Example #7
0
def retrying_fetch(cfg: FetcherJobConfig):
    @retry(
        retry_on_exception=lambda exc: isinstance(exc, RetryableError),
        wait_exponential_multiplier=cfg.retry.exp_multiplier,
        wait_exponential_max=cfg.retry.exp_max,
        stop_max_attempt_number=cfg.retry.max_attempts,
    )
    def _retry_fetch(cfg) -> FetchedType:
        return _fetch(cfg)

    fetched_type = None
    try:
        fetched_type = _retry_fetch(cfg)
    except (RetryableError, UnRetryableError) as ex:
        logger.exception("Download error. Unretryable or out of attempts")
        _update_zk_node(
            cfg, FetcherResult(status=FetcherStatus.FAILED, message=str(ex)))
        return

    _update_zk_node(
        cfg,
        FetcherResult(status=FetcherStatus.DONE,
                      message=SUCCESS_MESSAGE,
                      type=fetched_type))
Example #8
0
    def __handle_node_state(self, zk_node_path: str, on_done: DownloadOnDone, content: DownloadableContent):
        def _on_zk_changed(evt):
            self.__on_zk_changed(evt, on_done, content)

        data, _ = self._zk.get(zk_node_path, _on_zk_changed)

        result: FetcherResult = FetcherResult.from_binary(data)

        logger.info("Fetch request %s result = %s", content, result)

        if result.status.final:
            result.update(content)

            # We clean up
            self._zk.delete(zk_node_path)

            on_done(content)
Example #9
0
    def _update_nodes_to_cancel(self, client_id: str, action_id: str) -> int:
        # As always with stop-flags, we can face a bunch of race conditions
        zk_node_path = self._get_node_path(client_id, action_id)

        number_of_nodes_updated = 0

        try:
            for child in self._zk.get_children(zk_node_path):
                abs_path = zk_node_path + "/" + child

                logger.info(f"Updating node {abs_path}")

                try:
                    while True:
                        data, zk_stat = self._zk.get(abs_path)

                        result: FetcherResult = FetcherResult.from_binary(data)

                        # The guy is final - it will not take long for us to cancel it.
                        # The job is finished.
                        # So now we are in a race with a zookeeper listener, that will pass the results downstream.
                        if result.status.final:
                            logger.info(f"{abs_path}: not to be canceled - already finished")
                            break
                        result.status = FetcherStatus.CANCELED

                        new_data = result.to_binary()
                        try:
                            self._zk.set(abs_path, new_data, version=zk_stat.version)
                            number_of_nodes_updated = number_of_nodes_updated + 1
                        except BadVersionError:
                            logger.info(f"{abs_path}: the node was updated meanwhile")
                            continue
                        logger.info(f"{abs_path}: canceled")
                        break

                except NoNodeError:
                    logger.info(f"{abs_path}: the node was deleted meanwhile")
                    # The task was just finished - status was repopted to customer and the node got deleted.
                    # OK. It's not our deal anymore
                    continue
        except NoNodeError:
            # Absorb NoNodeError
            logger.info(f"{zk_node_path}: node not found")

        return number_of_nodes_updated
Example #10
0
import pytest
from pytest import fixture

from bai_kafka_utils.events import FetcherStatus, FetchedType, DownloadableContent
from bai_zk_utils.states import FetcherResult

ERROR = "Error"

FETCHER_DONE_RESULT = FetcherResult(FetcherStatus.DONE, FetchedType.FILE,
                                    "Success")

STATE_DONE_BIN = b'{"status": "DONE", "type": "FILE", "message": "Success"}'

STATE_RUNNING_BIN = b'{"status": "RUNNING"}'

STATE_STRANGE_BIN = b'{"status": "STRANGE"}'


def test_serialize_state():
    assert STATE_DONE_BIN == FETCHER_DONE_RESULT.to_binary()


def test_deserialize_state():
    assert FetcherResult.from_binary(STATE_DONE_BIN) == FETCHER_DONE_RESULT


def test_deserialize_state_final():
    result = FetcherResult.from_binary(STATE_DONE_BIN)
    assert result.status.final

Example #11
0
def update_zk_node(zk_node_path: str, zookeeper_ensemble: str, state: FetcherResult):
    zk = KazooClient(hosts=zookeeper_ensemble)
    zk.start()
    zk.set(zk_node_path, state.to_binary())
    zk.stop()
Example #12
0
def test_deserialize_state_final():
    result = FetcherResult.from_binary(STATE_DONE_BIN)
    assert result.status.final
Example #13
0
def test_deserialize_state():
    assert FetcherResult.from_binary(STATE_DONE_BIN) == FETCHER_DONE_RESULT
Example #14
0
def test_deserialize_state_not_final():
    result = FetcherResult.from_binary(STATE_RUNNING_BIN)
    assert not result.status.final
Example #15
0
from kazoo.client import KazooClient
from unittest.mock import patch, create_autospec

from bai_zk_utils import zk_client
from bai_zk_utils.states import FetcherResult
from bai_kafka_utils.events import FetcherStatus
from bai_zk_utils.zk_client import update_zk_node

FETCHER_RESULT = FetcherResult(FetcherStatus.DONE, "Success")
ZK_NODE_PATH = "/zk/path"
ZK_ENSEMBLE = "Z1"


@patch.object(zk_client, "KazooClient")
def test_update_zk_node(mockKazooClient):
    mock_zk_client = mockKazooClient.return_value = create_autospec(
        KazooClient)

    update_zk_node(ZK_NODE_PATH, ZK_ENSEMBLE, FETCHER_RESULT)

    mockKazooClient.assert_called_with(hosts=ZK_ENSEMBLE)

    mock_zk_client.start.assert_called_once()
    mock_zk_client.set.assert_called_with(ZK_NODE_PATH,
                                          FETCHER_RESULT.to_binary())
    mock_zk_client.stop.assert_called_once()
Example #16
0
def test_deserialize_state_strange():
    with pytest.raises(Exception):
        FetcherResult.from_binary(STATE_STRANGE_BIN)
Example #17
0
class DownloadManager:
    @staticmethod
    def __get_node_path(client_id: str, action_id: str = None, content: DownloadableContent = None) -> str:
        # MD5 has impact on the node - so different locks etc.
        path = f"/downloads/{client_id}"
        if action_id:
            path += f"/{action_id}"
            if content:
                path += f"/{md5sum(str(content))}"
        return path

    INITIAL_DATA = FetcherResult(FetcherStatus.PENDING).to_binary()

    @staticmethod
    def _set_failed(content: DownloadableContent, message: str):
        content.message = message
        content.status = FetcherStatus.FAILED
        content.dst = None

    def __init__(
        self,
        zk: KazooClient,
        download_dispatcher: DownloadDispatcher,
        lock_manager: RWLockManager,
        get_node_path: NodePathSource = None,
        size_estimator: ContentSizeEstimator = None,
    ):
        self._zk = zk
        self._download_dispatcher = download_dispatcher
        self._get_node_path = get_node_path or DownloadManager.__get_node_path

        self._lock_manager = lock_manager
        self._size_estimator = size_estimator or estimate_fetch_size

    def start(self) -> None:
        logger.info("Start")
        self._zk.start()

    def fetch(self, content: DownloadableContent, event: BenchmarkEvent, on_done: DownloadOnDone) -> None:
        logger.info("Fetch request %s", content)

        def on_content_locked(content: DownloadableContent, lock: RWLock):
            def _on_done_and_unlock(content: DownloadableContent):
                on_done(content)
                self._download_dispatcher.cleanup(content, event)
                lock.release()

            try:
                content.size_info = self._size_estimator(content.src)
            except Exception as e:
                msg = f"Failed to estimate the size of content {content.src}: {str(e)}"
                logger.exception(f"{msg}")
                FetcherResult(FetcherStatus.FAILED, None, msg).update(content)
                on_done(content)
                lock.release()
                return

            # This node will be killed if I die
            zk_node_path = self._get_node_path(event.client_id, event.action_id, content)
            self._zk.create(zk_node_path, DownloadManager.INITIAL_DATA, ephemeral=True, makepath=True)

            self.__handle_node_state(zk_node_path, _on_done_and_unlock, content)

            content.size_info = self._size_estimator(content.src)

            self._download_dispatcher.dispatch_fetch(content, event, zk_node_path)

        self._lock_manager.acquire_write_lock(content, on_content_locked)

    def __on_zk_changed(self, event: WatchedEvent, on_done: DownloadOnDone, content: DownloadableContent):
        if event.type == EventType.DELETED:
            if not content.status:  # Something not final - and deleted???
                logger.error("Deleted node %s for the not finalized content %s", event.path, content)
                # TODO More sophisticated handling of that?
            return

        self.__handle_node_state(event.path, on_done, content)

    def __handle_node_state(self, zk_node_path: str, on_done: DownloadOnDone, content: DownloadableContent):
        def _on_zk_changed(evt):
            self.__on_zk_changed(evt, on_done, content)

        data, _ = self._zk.get(zk_node_path, _on_zk_changed)

        result: FetcherResult = FetcherResult.from_binary(data)

        logger.info("Fetch request %s result = %s", content, result)

        if result.status.final:
            result.update(content)

            # We clean up
            self._zk.delete(zk_node_path)

            on_done(content)

    def stop(self) -> None:
        logger.info("Stop")
        self._zk.stop()

    def cancel(self, client_id: str, action_id: str) -> Tuple[List[str], int]:
        logger.info(f"Canceling action {client_id}/{action_id}")
        return (
            self._download_dispatcher.cancel_all(client_id, action_id),
            self._update_nodes_to_cancel(client_id, action_id),
        )

    def _update_nodes_to_cancel(self, client_id: str, action_id: str) -> int:
        # As always with stop-flags, we can face a bunch of race conditions
        zk_node_path = self._get_node_path(client_id, action_id)

        number_of_nodes_updated = 0

        try:
            for child in self._zk.get_children(zk_node_path):
                abs_path = zk_node_path + "/" + child

                logger.info(f"Updating node {abs_path}")

                try:
                    while True:
                        data, zk_stat = self._zk.get(abs_path)

                        result: FetcherResult = FetcherResult.from_binary(data)

                        # The guy is final - it will not take long for us to cancel it.
                        # The job is finished.
                        # So now we are in a race with a zookeeper listener, that will pass the results downstream.
                        if result.status.final:
                            logger.info(f"{abs_path}: not to be canceled - already finished")
                            break
                        result.status = FetcherStatus.CANCELED

                        new_data = result.to_binary()
                        try:
                            self._zk.set(abs_path, new_data, version=zk_stat.version)
                            number_of_nodes_updated = number_of_nodes_updated + 1
                        except BadVersionError:
                            logger.info(f"{abs_path}: the node was updated meanwhile")
                            continue
                        logger.info(f"{abs_path}: canceled")
                        break

                except NoNodeError:
                    logger.info(f"{abs_path}: the node was deleted meanwhile")
                    # The task was just finished - status was repopted to customer and the node got deleted.
                    # OK. It's not our deal anymore
                    continue
        except NoNodeError:
            # Absorb NoNodeError
            logger.info(f"{zk_node_path}: node not found")

        return number_of_nodes_updated
Example #18
0
def _mock_result_binary(status: FetcherStatus, msg: str = None):
    return FetcherResult(status, msg).to_binary()