Example #1
0
    def local_port_forward_postgres():
        print('Port-forwarding postgres')
        postgres_pod_name = (check_output([
            'kubectl',
            'get',
            'pods',
            '--namespace',
            helm_namespace,
            '-l',
            'app=postgresql,release=dagster',
            '-o',
            'jsonpath="{.items[0].metadata.name}"',
        ]).decode('utf-8').strip('"'))
        forward_port = find_free_port()

        wait_for_pod(postgres_pod_name, namespace=helm_namespace)

        try:
            p = subprocess.Popen([
                'kubectl',
                'port-forward',
                '--namespace',
                helm_namespace,
                postgres_pod_name,
                '{forward_port}:5432'.format(forward_port=forward_port),
            ])

            # Validate port forwarding works
            start = time.time()

            while True:
                if time.time() - start > PG_PORT_FORWARDING_TIMEOUT:
                    raise Exception(
                        'Timed out while waiting for postgres port forwarding')

                print(
                    'Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be'
                    ' available...' % (postgres_pod_name, forward_port))
                try:
                    conn = psycopg2.connect(
                        database='test',
                        user='******',
                        password='******',
                        host='localhost',
                        port=forward_port,
                    )
                    conn.close()
                    break
                except:  # pylint: disable=bare-except, broad-except
                    time.sleep(1)
                    continue

            yield forward_port

        finally:
            print('Terminating port-forwarding')
            p.terminate()
Example #2
0
def local_port_forward_postgres(namespace):
    print("Port-forwarding postgres")
    postgres_pod_name = (check_output([
        "kubectl",
        "get",
        "pods",
        "--namespace",
        namespace,
        "-l",
        "app=postgresql,release=dagster",
        "-o",
        'jsonpath="{.items[0].metadata.name}"',
    ]).decode("utf-8").strip('"'))
    forward_port = find_free_port()

    wait_for_pod(postgres_pod_name, namespace=namespace)

    try:
        p = subprocess.Popen([
            "kubectl",
            "port-forward",
            "--namespace",
            namespace,
            postgres_pod_name,
            "{forward_port}:5432".format(forward_port=forward_port),
        ])

        # Validate port forwarding works
        start = time.time()

        while True:
            if time.time() - start > PG_PORT_FORWARDING_TIMEOUT:
                raise Exception(
                    "Timed out while waiting for postgres port forwarding")

            print(
                "Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be"
                " available..." % (postgres_pod_name, forward_port))
            try:
                conn = psycopg2.connect(
                    database="test",
                    user="******",
                    password="******",
                    host="localhost",
                    port=forward_port,
                )
                conn.close()
                break
            except:  # pylint: disable=bare-except, broad-except
                time.sleep(1)
                continue

        yield forward_port

    finally:
        print("Terminating port-forwarding")
        p.terminate()
Example #3
0
def test_wait_for_pod(cluster_provider):  # pylint: disable=unused-argument
    api = kubernetes.client.CoreV1Api()

    with test_namespace() as namespace:
        # Without this sleep, we get the following error on kind:
        # HTTP response body:
        # {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"No API
        # token found for service account \"default\", retry after the token is automatically
        # created and added to the service
        # account","reason":"ServerTimeout","details":{"name":"create
        # pod","kind":"serviceaccounts","retryAfterSeconds":1},"code":500}
        time.sleep(5)

        try:
            api.create_namespaced_pod(body=construct_pod_manifest(
                'sayhi1', 'echo "hello world"'),
                                      namespace=namespace)
            wait_for_pod('sayhi1', namespace=namespace)
            assert retrieve_pod_logs('sayhi1',
                                     namespace=namespace) == 'hello world\n'

            api.create_namespaced_pod(body=construct_pod_manifest(
                'sayhi2', 'echo "hello world"'),
                                      namespace=namespace)
            wait_for_pod('sayhi2',
                         namespace=namespace,
                         wait_for_state=WaitForPodState.Terminated)

            with pytest.raises(
                    DagsterK8sError,
                    match='Timed out while waiting for pod to become ready'):
                api.create_namespaced_pod(
                    body=construct_pod_manifest('sayhi3',
                                                'sleep 5; echo "hello world"'),
                    namespace=namespace,
                )
                wait_for_pod('sayhi3', namespace=namespace, wait_timeout=1)

            with pytest.raises(
                    DagsterK8sError,
                    match=
                    'Pod did not exit successfully. Failed with message: None and pod logs: whoops!',
            ):
                api.create_namespaced_pod(
                    body=construct_pod_manifest('fail',
                                                'echo "whoops!"; exit 1'),
                    namespace=namespace,
                )
                wait_for_pod('fail',
                             namespace=namespace,
                             wait_for_state=WaitForPodState.Terminated)

        finally:
            for pod_name in ['sayhi1', 'sayhi2', 'sayhi3', 'fail']:
                try:
                    api.delete_namespaced_pod(pod_name, namespace=namespace)
                except kubernetes.client.rest.ApiException:
                    pass
Example #4
0
def test_wait_for_pod(cluster_provider):  # pylint: disable=unused-argument
    api = kubernetes.client.CoreV1Api()

    with get_helm_test_namespace() as namespace:
        # Without this sleep, we get the following error on kind:
        # HTTP response body:
        # {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"No API
        # token found for service account \"default\", retry after the token is automatically
        # created and added to the service
        # account","reason":"ServerTimeout","details":{"name":"create
        # pod","kind":"serviceaccounts","retryAfterSeconds":1},"code":500}
        time.sleep(5)

        try:
            api.create_namespaced_pod(body=construct_pod_manifest(
                "sayhi1", 'echo "hello world"'),
                                      namespace=namespace)
            wait_for_pod("sayhi1", namespace=namespace)
            assert retrieve_pod_logs("sayhi1",
                                     namespace=namespace) == "hello world\n"

            api.create_namespaced_pod(body=construct_pod_manifest(
                "sayhi2", 'echo "hello world"'),
                                      namespace=namespace)
            wait_for_pod("sayhi2",
                         namespace=namespace,
                         wait_for_state=WaitForPodState.Terminated)

            with pytest.raises(
                    DagsterK8sError,
                    match="Timed out while waiting for pod to become ready"):
                api.create_namespaced_pod(
                    body=construct_pod_manifest("sayhi3",
                                                'sleep 5; echo "hello world"'),
                    namespace=namespace,
                )
                wait_for_pod("sayhi3", namespace=namespace, wait_timeout=1)

            with pytest.raises(DagsterK8sError) as exc_info:
                api.create_namespaced_pod(
                    body=construct_pod_manifest("fail",
                                                'echo "whoops!"; exit 1'),
                    namespace=namespace,
                )
                wait_for_pod("fail",
                             namespace=namespace,
                             wait_for_state=WaitForPodState.Terminated)

            # not doing total match because integration test. unit tests test full log message
            assert "Pod did not exit successfully." in str(exc_info.value)

        finally:
            for pod_name in ["sayhi1", "sayhi2", "sayhi3", "fail"]:
                try:
                    api.delete_namespaced_pod(pod_name, namespace=namespace)
                except kubernetes.client.rest.ApiException:
                    pass
Example #5
0
def helm_chart(namespace, docker_image, should_cleanup=True):
    '''Install dagster-k8s helm chart.
    '''
    check.str_param(namespace, 'namespace')
    check.str_param(docker_image, 'docker_image')
    check.bool_param(should_cleanup, 'should_cleanup')

    print('--- \033[32m:helm: Installing Helm chart\033[0m')

    try:
        repository, tag = docker_image.split(':')
        pull_policy = image_pull_policy()

        helm_config = {
            'dagit': {
                'image': {'repository': repository, 'tag': tag, 'pullPolicy': pull_policy},
                'env': {'TEST_SET_ENV_VAR': 'test_dagit_env_var'},
                'env_config_maps': [TEST_CONFIGMAP_NAME],
                'env_secrets': [TEST_SECRET_NAME],
                'livenessProbe': {
                    'tcpSocket': {'port': 80},
                    'periodSeconds': 20,
                    'failureThreshold': 3,
                },
                'startupProbe': {
                    'tcpSocket': {'port': 80},
                    'failureThreshold': 6,
                    'periodSeconds': 10,
                },
            },
            'celery': {
                'image': {'repository': repository, 'tag': tag, 'pullPolicy': pull_policy},
                'extraWorkerQueues': [{'name': 'extra-queue-1', 'replicaCount': 1},],
            },
            'pipeline_run': {
                'image': {'repository': repository, 'tag': tag, 'pullPolicy': pull_policy},
                'env': {'TEST_SET_ENV_VAR': 'test_pipeline_run_env_var'},
                'env_config_maps': [TEST_CONFIGMAP_NAME],
                'env_secrets': [TEST_SECRET_NAME],
            },
            'serviceAccount': {'name': 'dagit-admin'},
            'postgresqlPassword': '******',
            'postgresqlDatabase': 'test',
            'postgresqlUser': '******',
        }
        helm_config_yaml = yaml.dump(helm_config, default_flow_style=False)

        dagster_k8s_path = os.path.join(
            git_repository_root(), 'python_modules', 'libraries', 'dagster-k8s'
        )

        helm_cmd = [
            'helm',
            'install',
            '--namespace',
            namespace,
            '-f',
            '-',
            'dagster',
            os.path.join(dagster_k8s_path, 'helm', 'dagster'),
        ]

        print('Running Helm Install: \n', ' '.join(helm_cmd), '\nWith config:\n', helm_config_yaml)

        p = subprocess.Popen(
            helm_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )
        stdout, stderr = p.communicate(six.ensure_binary(helm_config_yaml))
        print('Helm install completed with stdout: ', stdout)
        print('Helm install completed with stderr: ', stderr)
        assert p.returncode == 0

        # Wait for Dagit pod to be ready (won't actually stay up w/out js rebuild)
        kube_api = kubernetes.client.CoreV1Api()

        print('Waiting for Dagit pod to be ready...')
        dagit_pod = None
        while dagit_pod is None:
            pods = kube_api.list_namespaced_pod(namespace=namespace)
            pod_names = [p.metadata.name for p in pods.items if 'dagit' in p.metadata.name]
            if pod_names:
                dagit_pod = pod_names[0]
            time.sleep(1)

        # Wait for Celery worker queues to become ready
        print('Waiting for celery workers')
        pods = kubernetes.client.CoreV1Api().list_namespaced_pod(namespace=namespace)
        pod_names = [p.metadata.name for p in pods.items if 'celery-workers' in p.metadata.name]
        for pod_name in pod_names:
            print('Waiting for Celery worker pod %s' % pod_name)
            wait_for_pod(pod_name, namespace=namespace)

        yield

    finally:
        # Can skip this step as a time saver when we're going to destroy the cluster anyway, e.g.
        # w/ a kind cluster
        if should_cleanup:
            print('Uninstalling helm chart')
            check_output(
                ['helm', 'uninstall', 'dagster', '--namespace', namespace], cwd=dagster_k8s_path,
            )
Example #6
0
File: helm.py Project: zuik/dagster
def _helm_chart_helper(namespace,
                       should_cleanup,
                       helm_config,
                       helm_install_name,
                       chart_name="helm/dagster"):
    """Install helm chart."""
    check.str_param(namespace, "namespace")
    check.bool_param(should_cleanup, "should_cleanup")
    check.str_param(helm_install_name, "helm_install_name")

    print("--- \033[32m:helm: Installing Helm chart {}\033[0m".format(
        helm_install_name))

    try:
        helm_config_yaml = yaml.dump(helm_config, default_flow_style=False)
        release_name = chart_name.split("/")[-1]
        helm_cmd = [
            "helm",
            "install",
            "--namespace",
            namespace,
            "--debug",
            "-f",
            "-",
            release_name,
            os.path.join(git_repository_root(), chart_name),
        ]

        print("Running Helm Install: \n", " ".join(helm_cmd),
              "\nWith config:\n", helm_config_yaml)

        p = subprocess.Popen(helm_cmd,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        stdout, stderr = p.communicate(helm_config_yaml.encode("utf-8"))
        print("Helm install completed with stdout: ", stdout.decode("utf-8"))
        print("Helm install completed with stderr: ", stderr.decode("utf-8"))
        assert p.returncode == 0

        # Wait for Dagit pod to be ready (won't actually stay up w/out js rebuild)
        kube_api = kubernetes.client.CoreV1Api()

        if chart_name == "helm/dagster":
            print("Waiting for Dagit pod to be ready...")
            dagit_pod = None
            while dagit_pod is None:
                pods = kube_api.list_namespaced_pod(namespace=namespace)
                pod_names = [
                    p.metadata.name for p in pods.items
                    if "dagit" in p.metadata.name
                ]
                if pod_names:
                    dagit_pod = pod_names[0]
                time.sleep(1)

            # Wait for Celery worker queues to become ready
            pods = kubernetes.client.CoreV1Api().list_namespaced_pod(
                namespace=namespace)
            deployments = kubernetes.client.AppsV1Api(
            ).list_namespaced_deployment(namespace=namespace)

            pod_names = [
                p.metadata.name for p in pods.items
                if CELERY_WORKER_NAME_PREFIX in p.metadata.name
            ]
            if helm_config.get("runLauncher").get(
                    "type") == "CeleryK8sRunLauncher":
                worker_queues = (helm_config.get("runLauncher").get(
                    "config").get("celeryK8sRunLauncher").get(
                        "workerQueues", []))
                for queue in worker_queues:
                    num_pods_for_queue = len([
                        pod_name for pod_name in pod_names
                        if f"{CELERY_WORKER_NAME_PREFIX}-{queue.get('name')}"
                        in pod_name
                    ])
                    assert queue.get("replicaCount") == num_pods_for_queue

                    labels = queue.get("labels")
                    if labels:
                        target_deployments = []
                        for item in deployments.items:
                            if queue.get("name") in item.metadata.name:
                                target_deployments.append(item)

                        assert len(target_deployments) > 0
                        for target in target_deployments:
                            for key in labels:
                                assert target.spec.template.metadata.labels.get(
                                    key) == labels.get(key)

                print("Waiting for celery workers")
                for pod_name in pod_names:
                    print("Waiting for Celery worker pod %s" % pod_name)
                    wait_for_pod(pod_name, namespace=namespace)

                rabbitmq_enabled = ("rabbitmq" not in helm_config
                                    ) or helm_config.get("rabbitmq")
                if rabbitmq_enabled:
                    print("Waiting for rabbitmq pod to exist...")
                    while True:
                        pods = kube_api.list_namespaced_pod(
                            namespace=namespace)
                        pod_names = [
                            p.metadata.name for p in pods.items
                            if "rabbitmq" in p.metadata.name
                        ]
                        if pod_names:
                            assert len(pod_names) == 1
                            print("Waiting for rabbitmq pod to be ready: " +
                                  str(pod_names[0]))

                            wait_for_pod(pod_names[0], namespace=namespace)
                            break
                        time.sleep(1)

            else:
                assert (
                    len(pod_names) == 0
                ), "celery-worker pods {pod_names} exists when celery is not enabled.".format(
                    pod_names=pod_names)

        dagster_user_deployments_values = helm_config.get(
            "dagster-user-deployments", {})
        if (dagster_user_deployments_values.get("enabled")
                and dagster_user_deployments_values.get("enableSubchart")
                or release_name == "dagster"):
            # Wait for user code deployments to be ready
            print("Waiting for user code deployments")
            pods = kubernetes.client.CoreV1Api().list_namespaced_pod(
                namespace=namespace)
            pod_names = [
                p.metadata.name for p in pods.items
                if "user-code-deployment" in p.metadata.name
            ]
            for pod_name in pod_names:
                print("Waiting for user code deployment pod %s" % pod_name)
                wait_for_pod(pod_name, namespace=namespace)

        yield

    finally:
        # Can skip this step as a time saver when we're going to destroy the cluster anyway, e.g.
        # w/ a kind cluster
        if should_cleanup:
            print("Uninstalling helm chart")
            check_output(
                ["helm", "uninstall", release_name, "--namespace", namespace],
                cwd=git_repository_root(),
            )
Example #7
0
def _helm_chart_helper(namespace, should_cleanup, helm_config):
    """Install helm chart.
    """
    check.str_param(namespace, "namespace")
    check.bool_param(should_cleanup, "should_cleanup")

    print("--- \033[32m:helm: Installing Helm chart\033[0m")

    try:
        helm_config_yaml = yaml.dump(helm_config, default_flow_style=False)

        helm_cmd = [
            "helm",
            "install",
            "--namespace",
            namespace,
            "-f",
            "-",
            "dagster",
            os.path.join(git_repository_root(), "helm", "dagster"),
        ]

        print("Running Helm Install: \n", " ".join(helm_cmd),
              "\nWith config:\n", helm_config_yaml)

        p = subprocess.Popen(helm_cmd,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        stdout, stderr = p.communicate(six.ensure_binary(helm_config_yaml))
        print("Helm install completed with stdout: ", stdout)
        print("Helm install completed with stderr: ", stderr)
        assert p.returncode == 0

        # Wait for Dagit pod to be ready (won't actually stay up w/out js rebuild)
        kube_api = kubernetes.client.CoreV1Api()

        print("Waiting for Dagit pod to be ready...")
        dagit_pod = None
        while dagit_pod is None:
            pods = kube_api.list_namespaced_pod(namespace=namespace)
            pod_names = [
                p.metadata.name for p in pods.items
                if "dagit" in p.metadata.name
            ]
            if pod_names:
                dagit_pod = pod_names[0]
            time.sleep(1)

        # Wait for Celery worker queues to become ready
        pods = kubernetes.client.CoreV1Api().list_namespaced_pod(
            namespace=namespace)
        pod_names = [
            p.metadata.name for p in pods.items
            if "celery-workers" in p.metadata.name
        ]
        if helm_config.get("celery", {}).get("enabled"):
            extra_worker_queues = helm_config.get("celery").get(
                "extraWorkerQueues", [])
            for queue in extra_worker_queues:
                num_pods_for_queue = len([
                    pod_name for pod_name in pod_names
                    if queue.get("name") in pod_name
                ])
                assert queue.get("replicaCount") == num_pods_for_queue

            print("Waiting for celery workers")
            for pod_name in pod_names:
                print("Waiting for Celery worker pod %s" % pod_name)
                wait_for_pod(pod_name, namespace=namespace)
        else:
            assert (
                len(pod_names) == 0
            ), "celery-worker pods {pod_names} exists when celery is not enabled.".format(
                pod_names=pod_names)

        if helm_config.get("userDeployments") and helm_config.get(
                "userDeployments", {}).get("enabled"):
            # Wait for user code deployments to be ready
            print("Waiting for user code deployments")
            pods = kubernetes.client.CoreV1Api().list_namespaced_pod(
                namespace=namespace)
            pod_names = [
                p.metadata.name for p in pods.items
                if "user-code-deployment" in p.metadata.name
            ]
            for pod_name in pod_names:
                print("Waiting for user code deployment pod %s" % pod_name)
                wait_for_pod(pod_name, namespace=namespace)

        yield

    finally:
        # Can skip this step as a time saver when we're going to destroy the cluster anyway, e.g.
        # w/ a kind cluster
        if should_cleanup:
            print("Uninstalling helm chart")
            check_output(
                ["helm", "uninstall", "dagster", "--namespace", namespace],
                cwd=git_repository_root(),
            )
Example #8
0
def helm_chart(namespace,
               image_pull_policy,
               docker_image,
               should_cleanup=True):
    '''Install dagster-k8s helm chart.
    '''
    check.str_param(namespace, 'namespace')
    check.str_param(image_pull_policy, 'image_pull_policy')
    check.str_param(docker_image, 'docker_image')
    check.bool_param(should_cleanup, 'should_cleanup')

    print('--- \033[32m:helm: Installing Helm chart\033[0m')

    try:
        repository, tag = docker_image.split(':')

        helm_config = {
            'imagePullPolicy': image_pull_policy,
            'dagit': {
                'image': {
                    'repository': repository,
                    'tag': tag
                },
                'env': {
                    'TEST_SET_ENV_VAR': 'test_dagit_env_var'
                },
                'env_config_maps': ['test-env-configmap'],
                'env_secrets': ['test-env-secret'],
            },
            'job_runner': {
                'image': {
                    'repository': repository,
                    'tag': tag
                },
                'env': {
                    'TEST_SET_ENV_VAR': 'test_job_runner_env_var'
                },
                'env_config_maps': ['test-env-configmap'],
                'env_secrets': ['test-env-secret'],
            },
            'serviceAccount': {
                'name': 'dagit-admin'
            },
            'postgresqlPassword': '******',
            'postgresqlDatabase': 'test',
            'postgresqlUser': '******',
            'celery': {
                'extraWorkerQueues': [
                    {
                        'name': 'extra-queue-1',
                        'replicaCount': 1
                    },
                    {
                        'name': 'extra-queue-2',
                        'replicaCount': 2
                    },
                ]
            },
        }
        helm_config_yaml = yaml.dump(helm_config, default_flow_style=False)

        helm_cmd = [
            'helm',
            'install',
            '--namespace',
            namespace,
            '-f',
            '-',
            'dagster',
            'helm/dagster/',
        ]

        print('Running Helm Install: \n', ' '.join(helm_cmd),
              '\nWith config:\n', helm_config_yaml)

        p = subprocess.Popen(
            helm_cmd,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        stdout, stderr = p.communicate(six.ensure_binary(helm_config_yaml))
        print('\n\nHelm install stdout:\n', six.ensure_str(stdout))
        print('\n\nHelm install stderr:\n', six.ensure_str(stderr))
        assert p.returncode == 0

        check_output(
            helm_cmd,
            shell=True,
            cwd=os.path.join(git_repository_root(),
                             'python_modules/libraries/dagster-k8s/'),
        )

        # Wait for Dagit pod to be ready (won't actually stay up w/out js rebuild)
        kube_api = kubernetes.client.CoreV1Api()

        print('Waiting for Dagit pod to be ready...')
        dagit_pod = None
        while dagit_pod is None:
            pods = kube_api.list_namespaced_pod(namespace=namespace)
            pod_names = [
                p.metadata.name for p in pods.items
                if 'dagit' in p.metadata.name
            ]
            if pod_names:
                dagit_pod = pod_names[0]
            time.sleep(1)

        # Wait for additional Celery worker queues to become ready
        pods = kubernetes.client.CoreV1Api().list_namespaced_pod(
            namespace=namespace)
        for extra_queue in helm_config['celery']['extraWorkerQueues']:
            pod_names = [
                p.metadata.name for p in pods.items
                if extra_queue['name'] in p.metadata.name
            ]
            assert len(pod_names) == extra_queue['replicaCount']
            for pod in pod_names:
                print('Waiting for pod %s' % pod)
                wait_for_pod(pod, namespace=namespace)

        yield

    finally:
        # Can skip this step as a time saver when we're going to destroy the cluster anyway, e.g.
        # w/ a kind cluster
        if should_cleanup:
            print('Uninstalling helm chart')
            check_output(
                ['helm', 'uninstall', 'dagster', '--namespace', namespace],
                shell=True,
                cwd=os.path.join(git_repository_root(),
                                 'python_modules/libraries/dagster-k8s/'),
            )