Beispiel #1
0
def test_docker_child_task():
    dp = DockerProvider()

    task = dp.spawn(TaskDefinition(
        name=TEST_TASK,
        image=TEST_IMAGE,
        inputs={'child': True},
    ))

    # wait for the child to spawn
    child = None
    for i in range(0, 10):
        children = dp.find_child_containers(task.id)
        if len(children) > 0:
            child = children[0]
            break
        time.sleep(0.5)

    # make sure we got a child
    assert child is not None

    # test list tasks
    tasks = dp.list_all()
    task_ids = list(map(lambda t: t.id, tasks))
    assert task.id in task_ids
    assert child.labels[LABEL_TASK_ID] in task_ids

    # kill the whole family
    dp.destroy(task.id)

    children = dp.find_child_containers(task.id)
    assert len(children) == 0
Beispiel #2
0
    async def init_node(self):
        cluster = env_get_cluster_provider()
        parent = env_get_task_definition()
        token = os.getenv(ENV_KERNEL_TOKEN)

        taskdef = TaskDefinition(
            name='kernel',
            image=parent.image,
            parent=parent.id,
            volumes=parent.volumes,
            env=parent.env,
            upstream=get_local_url(),
            meta={
                **parent.meta,
                'virtual': True,
            },
        )

        # set up notebook node
        self.node = NotebookNode(taskdef)
        await self.node.start(token)

        # instantiate kernel task
        self.task = KernelTask(node=self.node,
                               cluster=cluster,
                               taskdef=taskdef)

        # write globals
        self.shell.push({
            'cowait': cowait,
            'kernel': self.task,
            'tasks': self.task.subtasks,
            'NotebookRunner': NotebookRunner,
        })
Beispiel #3
0
def extract_pod_taskdef(pod) -> TaskDefinition:
    for container in pod.spec.containers:
        for env in container.env:
            if env.name == ENV_TASK_DEFINITION:
                taskdef = env_unpack(env.value)
                return TaskDefinition(**taskdef)
    raise Exception('Failed to extract pod task definition')
Beispiel #4
0
    async def run(self,
                  name: str,
                  image: str,
                  env: dict = {},
                  routes: dict = {},
                  ports: dict = {},
                  cpu: any = 0,
                  memory: any = 0,
                  **inputs):
        taskdef = TaskDefinition(
            name=name,
            image=image,
            parent=self.id,
            inputs=inputs,
            ports=ports,
            routes=routes,
            cpu=cpu,
            memory=memory,
            env={
                **self.env,
                **env,
            },
        )

        # run it
        task = self.cluster.spawn(taskdef)

        # wait for container to exit
        await self.watch(task)

        # clean up
        self.cluster.destroy(task)
Beispiel #5
0
def agent(
    config: CowaitConfig,
    detach: bool = False,
    upstream: str = None,
) -> None:
    logger = RunLogger(quiet=False, raw=False)
    try:
        cluster = config.get_cluster()

        if cluster.type == 'api':
            raise CliError('Error: Cant deploy agent using an API cluster')

        token = uuid()
        if cluster.type == 'docker':
            token = ''

        cluster.destroy('agent')

        # create task definition
        taskdef = TaskDefinition(
            id='agent',
            name='cowait.tasks.agent',
            image=DEFAULT_BASE_IMAGE,
            upstream=upstream,
            routes={
                '/': 80,
            },
            meta={
                'http_token': token,
            },
        )

        # submit task to cluster
        task = cluster.spawn(taskdef)

        if detach:
            logger.header('detached')
            return

        def destroy(*args):
            logger.header('interrupt')
            cluster.destroy(task.id)
            sys.exit(0)

        with ExitTrap(destroy):
            # capture & print logs
            logs = cluster.logs(task)
            logger.header('task output')
            for log in logs:
                logger.handle(log)

        logger.header()

    except ProviderError as e:
        raise CliError(f'Provider error: {e}')

    except TaskCreationError as e:
        raise CliError(f'Task creation error: {e}')
Beispiel #6
0
def spawn_test_task(name, **taskdef: dict) -> Task:
    """ Spawns a task using the test cluster provider """
    provider = get_test_provider()
    return provider.spawn(
        TaskDefinition(**{
            'name': name,
            'image': DEFAULT_BASE_IMAGE,
            **taskdef
        }))
Beispiel #7
0
def agent(
    config: CowaitConfig,
    detach: bool = False,
    upstream: str = None,
) -> None:
    try:
        context = CowaitContext.open()
        cluster_name = context.get('cluster', config.default_cluster)
        cluster = config.get_cluster(cluster_name)

        if cluster.type == 'api':
            raise CliError('Error: Cant deploy agent using an API cluster')

        cluster.destroy('agent')

        # create task definition
        taskdef = TaskDefinition(
            id='agent',
            name='cowait.tasks.agent',
            image=DEFAULT_BASE_IMAGE,
            upstream=upstream,
            routes={
                '/': 80,
            },
            meta={
                'http_token': uuid(),
            },
        )

        # submit task to cluster
        task = cluster.spawn(taskdef)

        if detach:
            printheader('detached')
            return

        def destroy(*args):
            print()
            printheader('interrupt')
            cluster.destroy(task.id)
            sys.exit(0)

        with ExitTrap(destroy):
            # capture & print logs
            logs = cluster.logs(task)
            printheader('task output')
            for log in logs:
                print(log, flush=True)

        printheader()

    except ProviderError as e:
        raise CliError(f'Provider error: {e}')

    except TaskCreationError as e:
        raise CliError(f'Task creation error: {e}')
Beispiel #8
0
def test_create_docker_task():
    dp = DockerProvider()
    docker = dp.docker

    env_vars = {'hello': 'team'}

    taskdef = TaskDefinition(
        name=TEST_TASK,
        image=TEST_IMAGE,
        parent='parent',
        env=env_vars,
        inputs={
            'hello': '123',
            'child': False,
        },

        # disables any output.
        # this is hacky and should be refactored
        # we need a proper way to disable all logging
        upstream='disabled',
    )

    # run task
    task = dp.spawn(taskdef)

    assert task.id == taskdef.id
    assert hasattr(task, 'container')
    assert hasattr(task.container, 'id')

    # try to grab the container from docker api
    container = docker.containers.get(task.container.id)
    assert task.container == container

    # make sure container is properly labeled
    assert container.labels == {
        LABEL_TASK_ID: task.id,
        LABEL_PARENT_ID: 'parent',
    }

    # wait for container to execute
    result = container.wait()
    assert result['StatusCode'] == 0

    # test task will dump info as json, so we can pick it up
    # make sure it matches what we put in.
    logs = container.logs()
    task_dump = json.loads(logs)

    # taskdef
    assert taskdef.serialize() == task_dump['taskdef']

    # actual environment variables
    for key, val in env_vars.items():
        assert task_dump['env'][key] == val
Beispiel #9
0
def test_rpc():
    dp = DockerProvider()

    task = dp.spawn(TaskDefinition(
        name='cowait.test.tasks.rpc_parent',
        image=DEFAULT_BASE_IMAGE,
    ))

    # wait for execution
    result = task.container.wait()
    assert result['StatusCode'] == 0
Beispiel #10
0
def test_max_env_length():
    """ Passing too large inputs should raise a ProviderError """
    random_data = uuid(2 * MAX_ENV_LENGTH, lower=False)

    with pytest.raises(ProviderError):
        cp = ClusterProvider('test')
        cp.create_env(TaskDefinition(
            'test-task',
            image='imaginary-image',
            inputs={
                'ohshit': random_data,
            },
        ))
Beispiel #11
0
def test_create_docker_task():
    dp = DockerProvider()
    docker = dp.docker

    env_vars = {'hello': 'team'}

    taskdef = TaskDefinition(
        name=TEST_TASK,
        image=TEST_IMAGE,
        parent='parent',
        env=env_vars,
        inputs={
            'hello': '123',
            'child': False,
        },
    )

    # run task
    task = dp.spawn(taskdef)

    assert task.id == taskdef.id
    assert hasattr(task, 'container')
    assert hasattr(task.container, 'id')

    # try to grab the container from docker api
    container = docker.containers.get(task.container.id)
    assert task.container == container

    # make sure container is properly labeled
    assert container.labels[LABEL_TASK_ID] == task.id
    assert container.labels[LABEL_PARENT_ID] == 'parent'

    # test task will dump info as json, so we can pick it up
    # make sure it matches what we put in.
    task_dump = None
    for msg in task.logs():
        print(msg)
        if msg['type'] == 'task/log':
            task_dump = json.loads(msg['data'])

    # wait for container to execute
    result = container.wait()
    assert result['StatusCode'] == 0

    # taskdef
    assert task_dump is not None
    assert taskdef.serialize() == task_dump['taskdef']

    # actual environment variables
    for key, val in env_vars.items():
        assert task_dump['env'][key] == val
Beispiel #12
0
def test_docker_task_error():
    dp = DockerProvider()

    task = dp.spawn(TaskDefinition(
        name=TEST_TASK,
        image=TEST_IMAGE,
        inputs={'error': True},
    ))

    container = dp.docker.containers.get(task.container.id)
    assert task.container == container

    result = container.wait()
    assert result['StatusCode'] != 0
Beispiel #13
0
def test_docker_child_error():
    dp = DockerProvider()

    task = dp.spawn(TaskDefinition(
        name=TEST_TASK,
        image=TEST_IMAGE,
        inputs={'child_error': True},
    ))

    container = dp.docker.containers.get(task.container.id)
    assert task.container == container

    # child error should cause the parent to fail
    result = container.wait()
    assert result['StatusCode'] != 0
Beispiel #14
0
def test(
    config: CowaitConfig,
    push: bool,
):
    logger = TestLogger()
    try:
        context = CowaitContext.open()
        cluster = config.get_cluster()

        if push:
            run_push()
        else:
            run_build()

        # execute the test task within the current image
        task = cluster.spawn(
            TaskDefinition(
                name='cowait.test',
                image=context.image,
            ))

        def destroy(*args):
            logger.header('interrupt')
            cluster.destroy(task.id)
            sys.exit(1)

        with ExitTrap(destroy):
            # capture & print logs
            logs = cluster.logs(task)
            logger.header('task output')
            for msg in logs:
                logger.handle(msg)

        logger.header()

        # grab task result
        passing = cluster.wait(task)
        sys.exit(0 if passing else 1)

    except ProviderError as e:
        logger.print_exception(f'Provider Error: {e}')
        sys.exit(1)

    except TaskCreationError as e:
        logger.print_exception(f'Error creating task: {e}')
        sys.exit(1)
Beispiel #15
0
    async def spawn(
        self,
        name: str,
        image: str,
        id: str = None,
        ports: dict = {},
        routes: dict = {},
        inputs: dict = {},
        meta: dict = {},
        env: dict = {},
        cpu: str = '0',
        memory: str = '0',
        owner: str = '',
        **kwargs: dict,
    ) -> dict:
        if not isinstance(name, str) and issubclass(name, Task):
            name = name.__module__

        # todo: throw error if any input is a coroutine

        task = self.cluster.spawn(
            TaskDefinition(
                id=id,
                name=name,
                image=image,
                upstream=get_local_url(),
                meta=meta,
                ports=ports,
                routes=routes,
                env=env,
                cpu=cpu,
                memory=memory,
                owner=owner,
                inputs={
                    **inputs,
                    **kwargs,
                },
            ))

        # authorize id
        self.node.server.auth.add_token(id)

        # register with subtask manager
        self.subtasks.watch(task)

        return task.serialize()
Beispiel #16
0
def test(
    config: CowaitConfig,
    push: bool,
):
    try:
        context = CowaitContext.open()
        cluster_name = context.get('cluster', config.default_cluster)
        cluster = config.get_cluster(cluster_name)

        if push:
            run_push()
        else:
            run_build()

        # execute the test task within the current image
        task = cluster.spawn(
            TaskDefinition(
                name='cowait.test',
                image=context.get_image_name(),
            ))

        def destroy(*args):
            print()
            printheader('interrupt')
            cluster.destroy(task.id)
            os._exit(1)

        with ExitTrap(destroy):
            # capture & print logs
            logs = cluster.logs(task)
            printheader('task output')
            for log in logs:
                print(log, flush=True)

    except TaskCreationError as e:
        printheader('error')
        print('Error creating task:', str(e))

    except ProviderError as e:
        printheader('error')
        print('Provider error:', str(e))

    finally:
        printheader()
Beispiel #17
0
def test_kill_docker_task():
    dp = DockerProvider()

    task = dp.spawn(
        TaskDefinition(
            name=TEST_TASK,
            image=TEST_IMAGE,
            inputs={'forever': True},
        ))

    # ensure container exists
    dp.docker.containers.get(task.container.id)

    # destroy it
    dp.destroy(task.id)

    # ensure it no longer exists
    with pytest.raises(docker.errors.NotFound):
        dp.docker.containers.get(task.container.id)
Beispiel #18
0
def test_kill_docker_task():
    dp = DockerProvider()

    task = dp.spawn(TaskDefinition(
        name=TEST_TASK,
        image=TEST_IMAGE,
        inputs={'forever': True},
    ))

    # ensure container exists
    dp.docker.containers.get(task.container.id)

    # destroy it
    dp.destroy(task.id)

    # ensure it no longer exists
    with pytest.raises(docker.errors.NotFound):
        try:
            dp.docker.containers.get(task.container.id)
        except requests.exceptions.ChunkedEncodingError:
            # workaround for docker for mac bug:
            # https://github.com/docker/docker-py/issues/2696
            raise docker.errors.NotFound('Not found')
Beispiel #19
0
def run(
    config: CowaitConfig,
    task: str,
    name: str = None,
    inputs: dict = {},
    env: dict = {},
    ports: dict = {},
    routes: dict = {},
    build: bool = False,
    upstream: str = None,
    detach: bool = False,
    cpu: str = None,
    cpu_limit: str = None,
    memory: str = None,
    memory_limit: str = None,
    raw: bool = False,
    quiet: bool = False,
    affinity: str = None,
):
    logger = RunLogger(raw, quiet)
    try:
        context = CowaitContext.open()
        cluster = config.get_cluster()

        # figure out image name
        remote_image = True
        image, task = parse_task_image_name(task, None)
        if image is None:
            if build:
                build_cmd(quiet=quiet or raw)
            image = context.image
            remote_image = False

        volumes = context.get('volumes', {})
        if not isinstance(volumes, dict):
            raise TaskCreationError('Invalid volume configuration')
        if not remote_image:
            volumes['/var/task'] = {
                'bind': {
                    'src': context.root_path,
                    'mode': 'rw',
                },
            }

        if not affinity:
            affinity = {}

        elif affinity.lower() == 'spread':
            affinity = {
                "type": "spread",
                "label": {
                    "key": task + "-key",
                    "value": task + "-value"
                }
            }

        elif affinity.lower() == 'group':
            affinity = {
                "type": "group",
                "label": {
                    "key": task + "-key",
                    "value": task + "-value"
                }
            }

        # default to agent as upstream
        agent = cluster.find_agent()

        # create task definition
        taskdef = TaskDefinition(
            id=name,
            name=task,
            image=image,
            inputs=inputs,
            env={
                **context.environment,
                **env,
            },
            ports=ports,
            routes=routes,
            parent=None,  # root task
            upstream=context.coalesce('upstream', upstream, agent),
            owner=getpass.getuser(),
            volumes=volumes,
            cpu=context.override('cpu', cpu),
            cpu_limit=context.override('cpu_limit', cpu_limit),
            memory=context.override('memory', memory),
            memory_limit=context.override('memory_limit', memory_limit),
            storage=context.get('storage', {}),
            affinity=affinity)

        # print execution info
        logger.print_info(taskdef, config.default_cluster)

        # submit task to cluster
        task = cluster.spawn(taskdef)

        if detach:
            logger.header('detached')
            return

        def destroy(*args):
            logger.header('interrupt')
            cluster.destroy(task.id)
            sys.exit(1)

        with ExitTrap(destroy):
            # capture & print logs
            logs = cluster.logs(task)
            logger.header('task output')
            for msg in logs:
                logger.handle(msg)

        logger.header()

    except ProviderError as e:
        print('Provider error:', str(e))
        logger.print_exception(f'Provider Error: {e}')

    except TaskCreationError as e:
        logger.print_exception(f'Error creating task: {e}')
Beispiel #20
0
def test(
    config: Config,
    cluster_name: str = None,
    mount: bool = True,
    cpu: str = None,
    cpu_limit: str = None,
    memory: str = None,
    memory_limit: str = None,
    marks: str = None,
    verbose: bool = None,
    capture: bool = None,
):
    logger = TestLogger()
    try:
        context = Context.open(config)
        cluster = context.get_cluster(cluster_name)

        volumes = {}
        if mount and cluster.type == 'docker':
            # when testing in docker, mount the local directory
            # this avoids the problem of having to constantly rebuild in order to test
            print('** Mounting', context.root_path)
            volumes['/var/task'] = {
                'bind': {
                    'src': context.root_path,
                    'mode': 'rw',
                    'inherit': 'same-image',
                },
            }

        # execute the test task within the current image
        task = cluster.spawn(
            TaskDefinition(
                name='cowait.test',
                image=context.image,
                owner=getpass.getuser(),
                env={
                    **context.environment,
                    **context.dotenv,
                },
                volumes={
                    **context.get('volumes', {}),
                    **volumes,
                },
                inputs={
                    'marks': marks,
                    'verbose': verbose,
                    'capture': capture,
                },
                cpu=context.override('cpu', cpu),
                cpu_limit=context.override('cpu_limit', cpu_limit),
                memory=context.override('memory', memory),
                memory_limit=context.override('memory_limit', memory_limit),
            ))

        def destroy(*args):
            logger.header('interrupt')
            cluster.destroy(task.id)
            sys.exit(1)

        with ExitTrap(destroy):
            # capture & print logs
            logs = cluster.logs(task.id)
            logger.header('task output')
            for msg in logs:
                logger.handle(msg)

        logger.header()

        # grab task result
        passing = cluster.wait(task)
        sys.exit(0 if passing else 1)

    except ProviderError as e:
        logger.print_exception(f'Provider Error: {e}')
        sys.exit(1)

    except TaskCreationError as e:
        logger.print_exception(f'Error creating task: {e}')
        sys.exit(1)
Beispiel #21
0
def extract_container_taskdef(container) -> TaskDefinition:
    for env in container.attrs['Config']['Env']:
        if ENV_TASK_DEFINITION == env[0:len(ENV_TASK_DEFINITION)]:
            data = env[len(ENV_TASK_DEFINITION) + 1:]
            return TaskDefinition(**env_unpack(data))
    raise Exception('Unable to unpack container task definition')
Beispiel #22
0
def notebook(config, image: str = None, cluster_name: str = None) -> None:
    context = Context.open(config)

    if not context.notebook:
        print('Notebook funcitonaility is not enabled.')
        print('To enable, set features.notebook to True in cowait.yml and rebuild.')
        sys.exit(1)

    if image is not None:
        print('Remote images are currently not supported')
        sys.exit(1)

    volumes = {
        '/var/task': {
            'bind': {
                'src': os.getcwd(),
                'mode': 'rw',
                'inherit': 'same-image',
            },
        }
    }

    cluster = context.get_cluster(cluster_name)

    # Docker
    if cluster.type == 'docker':
        return run_cmd(
            config=config,
            task='cowait.notebook',
            build=False,
            image=image,
            routes={
                '/': '8888',
            },
            cluster_name=cluster_name,
            volumes=volumes,
        )

    # check for clientfs
    clientfs_executable = './clientfs-' + platform.system().lower()
    if not os.path.exists(clientfs_executable):
        print('Kubernetes notebooks are not supported in this build of Cowait')
        sys.exit(1)

    # Kubernetes
    core = client.CoreV1Api()
    notebook_id = 'notebook-' + uuid(4)

    core.create_namespaced_persistent_volume_claim(
        namespace=cluster.namespace,
        body=client.V1PersistentVolumeClaim(
            metadata=client.V1ObjectMeta(
                name=notebook_id,
                namespace=cluster.namespace,
            ),
            spec=client.V1PersistentVolumeClaimSpec(
                storage_class_name='clientfs',
                access_modes=['ReadWriteMany'],
                resources=client.V1ResourceRequirements(
                    requests={
                        'storage': '1G',
                    },
                ),
            ),
        ),
    )

    def delete_pvc(task_id):
        print('destroy', task_id)
        if task_id != notebook_id:
            return

        print('* stopping clientfs')
        clientfs.terminate()

        print('* deleting volume')
        core.delete_namespaced_persistent_volume_claim(notebook_id, cluster.namespace)

    cluster.on('kill', delete_pvc)

    pvc_id = None

    while True:
        time.sleep(1)
        volume = core.read_namespaced_persistent_volume_claim(notebook_id, cluster.namespace)
        if volume.status.phase == 'Bound':
            pvc_id = 'pvc-' + volume.metadata.uid
            print('* created volume', notebook_id, '/', pvc_id)
            break

    volumes['/var/task'] = {
        'persistent_volume_claim': {
            'claim_name': notebook_id,
        },
    }

    # start clientfs
    clientfs_host = cluster.args.get('clientfs', {}).get('host')
    print(f'* connecting clientfs volume to {clientfs_host}...')
    clientfs = subprocess.Popen([
        clientfs_executable,
        f"--proxy={clientfs_host}",
        f"--volume={pvc_id}"
    ])

    logger = RunLogger()
    try:
        # default to agent as upstream
        agent = cluster.find_agent()

        # create task definition
        taskdef = TaskDefinition(
            id=notebook_id,
            name='cowait.notebook',
            image=context.image,
            env={
                **context.extend('environment', {}),
                **context.dotenv,
            },
            routes={
                '/': '8888',
            },
            parent=None,  # root task
            upstream=agent,
            owner=getpass.getuser(),
            volumes=context.extend('volumes', volumes),
        )

        # print execution info
        logger.print_info(taskdef, cluster)

        # submit task to cluster
        task = cluster.spawn(taskdef)

        detach = False
        if detach:
            logger.header('detached')
            return

        def destroy(*args):
            logger.header('interrupt')
            cluster.destroy(task.id)
            sys.exit(1)

        with ExitTrap(destroy):
            # capture & print logs
            logs = cluster.logs(task.id)
            logger.header('task output')
            for msg in logs:
                logger.handle(msg)

    except Exception:
        traceback.print_exc()
        sys.exit(1)
Beispiel #23
0
def run(
    config: CowaitConfig,
    task: str,
    name: str = None,
    cluster_name: str = None,
    inputs: dict = {},
    env: dict = {},
    ports: dict = {},
    routes: dict = {},
    build: bool = False,
    upstream: str = None,
    detach: bool = False,
    cpu: str = '0',
    memory: str = '0',
):
    try:
        context = CowaitContext.open()
        cluster_name = context.get('cluster', config.default_cluster)
        cluster = config.get_cluster(cluster_name)

        # figure out image name
        image, task = parse_task_image_name(task, None)
        if image is None:
            if build:
                build_cmd()
            image = context.get_image_name()

        # default to agent as upstream
        agent = cluster.find_agent()

        # create task definition
        taskdef = TaskDefinition(
            id=name,
            name=task,
            image=image,
            inputs=inputs,
            env={
                **context.get('environment', {}),
                **env,
            },
            ports=ports,
            routes=routes,
            upstream=context.coalesce('upstream', upstream, agent),
            parent=None,  # root task
            owner=os.getlogin(),
            cpu=cpu,
            memory=memory,
        )

        # print execution info
        printheader('task')
        print('   task:      ', taskdef.id)
        print('   cluster:   ', cluster_name)
        if taskdef.upstream:
            print('   upstream:  ', taskdef.upstream)
        print('   image:     ', image)
        print('   inputs:    ', inputs)
        print('   env:       ', env)

        # submit task to cluster
        task = cluster.spawn(taskdef)

        if detach:
            printheader('detached')
            return

        def destroy(*args):
            print()
            printheader('interrupt')
            cluster.destroy(task.id)
            sys.exit(0)

        with ExitTrap(destroy):
            # capture & print logs
            logs = cluster.logs(task)
            printheader('task output')
            for log in logs:
                print(log, flush=True)

    except ProviderError as e:
        printheader('error')
        print('Provider error:', str(e))

    except TaskCreationError as e:
        printheader('error')
        print('Error creating task:', str(e))

    finally:
        printheader()