def local_port_forward_postgres(): print('Port-forwarding postgres') postgres_pod_name = (check_output([ 'kubectl', 'get', 'pods', '--namespace', helm_namespace, '-l', 'app=postgresql,release=dagster', '-o', 'jsonpath="{.items[0].metadata.name}"', ]).decode('utf-8').strip('"')) forward_port = find_free_port() wait_for_pod(postgres_pod_name, namespace=helm_namespace) try: p = subprocess.Popen([ 'kubectl', 'port-forward', '--namespace', helm_namespace, postgres_pod_name, '{forward_port}:5432'.format(forward_port=forward_port), ]) # Validate port forwarding works start = time.time() while True: if time.time() - start > PG_PORT_FORWARDING_TIMEOUT: raise Exception( 'Timed out while waiting for postgres port forwarding') print( 'Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be' ' available...' % (postgres_pod_name, forward_port)) try: conn = psycopg2.connect( database='test', user='******', password='******', host='localhost', port=forward_port, ) conn.close() break except: # pylint: disable=bare-except, broad-except time.sleep(1) continue yield forward_port finally: print('Terminating port-forwarding') p.terminate()
def local_port_forward_postgres(namespace): print("Port-forwarding postgres") postgres_pod_name = (check_output([ "kubectl", "get", "pods", "--namespace", namespace, "-l", "app=postgresql,release=dagster", "-o", 'jsonpath="{.items[0].metadata.name}"', ]).decode("utf-8").strip('"')) forward_port = find_free_port() wait_for_pod(postgres_pod_name, namespace=namespace) try: p = subprocess.Popen([ "kubectl", "port-forward", "--namespace", namespace, postgres_pod_name, "{forward_port}:5432".format(forward_port=forward_port), ]) # Validate port forwarding works start = time.time() while True: if time.time() - start > PG_PORT_FORWARDING_TIMEOUT: raise Exception( "Timed out while waiting for postgres port forwarding") print( "Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be" " available..." % (postgres_pod_name, forward_port)) try: conn = psycopg2.connect( database="test", user="******", password="******", host="localhost", port=forward_port, ) conn.close() break except: # pylint: disable=bare-except, broad-except time.sleep(1) continue yield forward_port finally: print("Terminating port-forwarding") p.terminate()
def test_wait_for_pod(cluster_provider): # pylint: disable=unused-argument api = kubernetes.client.CoreV1Api() with test_namespace() as namespace: # Without this sleep, we get the following error on kind: # HTTP response body: # {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"No API # token found for service account \"default\", retry after the token is automatically # created and added to the service # account","reason":"ServerTimeout","details":{"name":"create # pod","kind":"serviceaccounts","retryAfterSeconds":1},"code":500} time.sleep(5) try: api.create_namespaced_pod(body=construct_pod_manifest( 'sayhi1', 'echo "hello world"'), namespace=namespace) wait_for_pod('sayhi1', namespace=namespace) assert retrieve_pod_logs('sayhi1', namespace=namespace) == 'hello world\n' api.create_namespaced_pod(body=construct_pod_manifest( 'sayhi2', 'echo "hello world"'), namespace=namespace) wait_for_pod('sayhi2', namespace=namespace, wait_for_state=WaitForPodState.Terminated) with pytest.raises( DagsterK8sError, match='Timed out while waiting for pod to become ready'): api.create_namespaced_pod( body=construct_pod_manifest('sayhi3', 'sleep 5; echo "hello world"'), namespace=namespace, ) wait_for_pod('sayhi3', namespace=namespace, wait_timeout=1) with pytest.raises( DagsterK8sError, match= 'Pod did not exit successfully. Failed with message: None and pod logs: whoops!', ): api.create_namespaced_pod( body=construct_pod_manifest('fail', 'echo "whoops!"; exit 1'), namespace=namespace, ) wait_for_pod('fail', namespace=namespace, wait_for_state=WaitForPodState.Terminated) finally: for pod_name in ['sayhi1', 'sayhi2', 'sayhi3', 'fail']: try: api.delete_namespaced_pod(pod_name, namespace=namespace) except kubernetes.client.rest.ApiException: pass
def test_wait_for_pod(cluster_provider): # pylint: disable=unused-argument api = kubernetes.client.CoreV1Api() with get_helm_test_namespace() as namespace: # Without this sleep, we get the following error on kind: # HTTP response body: # {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"No API # token found for service account \"default\", retry after the token is automatically # created and added to the service # account","reason":"ServerTimeout","details":{"name":"create # pod","kind":"serviceaccounts","retryAfterSeconds":1},"code":500} time.sleep(5) try: api.create_namespaced_pod(body=construct_pod_manifest( "sayhi1", 'echo "hello world"'), namespace=namespace) wait_for_pod("sayhi1", namespace=namespace) assert retrieve_pod_logs("sayhi1", namespace=namespace) == "hello world\n" api.create_namespaced_pod(body=construct_pod_manifest( "sayhi2", 'echo "hello world"'), namespace=namespace) wait_for_pod("sayhi2", namespace=namespace, wait_for_state=WaitForPodState.Terminated) with pytest.raises( DagsterK8sError, match="Timed out while waiting for pod to become ready"): api.create_namespaced_pod( body=construct_pod_manifest("sayhi3", 'sleep 5; echo "hello world"'), namespace=namespace, ) wait_for_pod("sayhi3", namespace=namespace, wait_timeout=1) with pytest.raises(DagsterK8sError) as exc_info: api.create_namespaced_pod( body=construct_pod_manifest("fail", 'echo "whoops!"; exit 1'), namespace=namespace, ) wait_for_pod("fail", namespace=namespace, wait_for_state=WaitForPodState.Terminated) # not doing total match because integration test. unit tests test full log message assert "Pod did not exit successfully." in str(exc_info.value) finally: for pod_name in ["sayhi1", "sayhi2", "sayhi3", "fail"]: try: api.delete_namespaced_pod(pod_name, namespace=namespace) except kubernetes.client.rest.ApiException: pass
def helm_chart(namespace, docker_image, should_cleanup=True): '''Install dagster-k8s helm chart. ''' check.str_param(namespace, 'namespace') check.str_param(docker_image, 'docker_image') check.bool_param(should_cleanup, 'should_cleanup') print('--- \033[32m:helm: Installing Helm chart\033[0m') try: repository, tag = docker_image.split(':') pull_policy = image_pull_policy() helm_config = { 'dagit': { 'image': {'repository': repository, 'tag': tag, 'pullPolicy': pull_policy}, 'env': {'TEST_SET_ENV_VAR': 'test_dagit_env_var'}, 'env_config_maps': [TEST_CONFIGMAP_NAME], 'env_secrets': [TEST_SECRET_NAME], 'livenessProbe': { 'tcpSocket': {'port': 80}, 'periodSeconds': 20, 'failureThreshold': 3, }, 'startupProbe': { 'tcpSocket': {'port': 80}, 'failureThreshold': 6, 'periodSeconds': 10, }, }, 'celery': { 'image': {'repository': repository, 'tag': tag, 'pullPolicy': pull_policy}, 'extraWorkerQueues': [{'name': 'extra-queue-1', 'replicaCount': 1},], }, 'pipeline_run': { 'image': {'repository': repository, 'tag': tag, 'pullPolicy': pull_policy}, 'env': {'TEST_SET_ENV_VAR': 'test_pipeline_run_env_var'}, 'env_config_maps': [TEST_CONFIGMAP_NAME], 'env_secrets': [TEST_SECRET_NAME], }, 'serviceAccount': {'name': 'dagit-admin'}, 'postgresqlPassword': '******', 'postgresqlDatabase': 'test', 'postgresqlUser': '******', } helm_config_yaml = yaml.dump(helm_config, default_flow_style=False) dagster_k8s_path = os.path.join( git_repository_root(), 'python_modules', 'libraries', 'dagster-k8s' ) helm_cmd = [ 'helm', 'install', '--namespace', namespace, '-f', '-', 'dagster', os.path.join(dagster_k8s_path, 'helm', 'dagster'), ] print('Running Helm Install: \n', ' '.join(helm_cmd), '\nWith config:\n', helm_config_yaml) p = subprocess.Popen( helm_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = p.communicate(six.ensure_binary(helm_config_yaml)) print('Helm install completed with stdout: ', stdout) print('Helm install completed with stderr: ', stderr) assert p.returncode == 0 # Wait for Dagit pod to be ready (won't actually stay up w/out js rebuild) kube_api = kubernetes.client.CoreV1Api() print('Waiting for Dagit pod to be ready...') dagit_pod = None while dagit_pod is None: pods = kube_api.list_namespaced_pod(namespace=namespace) pod_names = [p.metadata.name for p in pods.items if 'dagit' in p.metadata.name] if pod_names: dagit_pod = pod_names[0] time.sleep(1) # Wait for Celery worker queues to become ready print('Waiting for celery workers') pods = kubernetes.client.CoreV1Api().list_namespaced_pod(namespace=namespace) pod_names = [p.metadata.name for p in pods.items if 'celery-workers' in p.metadata.name] for pod_name in pod_names: print('Waiting for Celery worker pod %s' % pod_name) wait_for_pod(pod_name, namespace=namespace) yield finally: # Can skip this step as a time saver when we're going to destroy the cluster anyway, e.g. # w/ a kind cluster if should_cleanup: print('Uninstalling helm chart') check_output( ['helm', 'uninstall', 'dagster', '--namespace', namespace], cwd=dagster_k8s_path, )
def _helm_chart_helper(namespace, should_cleanup, helm_config, helm_install_name, chart_name="helm/dagster"): """Install helm chart.""" check.str_param(namespace, "namespace") check.bool_param(should_cleanup, "should_cleanup") check.str_param(helm_install_name, "helm_install_name") print("--- \033[32m:helm: Installing Helm chart {}\033[0m".format( helm_install_name)) try: helm_config_yaml = yaml.dump(helm_config, default_flow_style=False) release_name = chart_name.split("/")[-1] helm_cmd = [ "helm", "install", "--namespace", namespace, "--debug", "-f", "-", release_name, os.path.join(git_repository_root(), chart_name), ] print("Running Helm Install: \n", " ".join(helm_cmd), "\nWith config:\n", helm_config_yaml) p = subprocess.Popen(helm_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate(helm_config_yaml.encode("utf-8")) print("Helm install completed with stdout: ", stdout.decode("utf-8")) print("Helm install completed with stderr: ", stderr.decode("utf-8")) assert p.returncode == 0 # Wait for Dagit pod to be ready (won't actually stay up w/out js rebuild) kube_api = kubernetes.client.CoreV1Api() if chart_name == "helm/dagster": print("Waiting for Dagit pod to be ready...") dagit_pod = None while dagit_pod is None: pods = kube_api.list_namespaced_pod(namespace=namespace) pod_names = [ p.metadata.name for p in pods.items if "dagit" in p.metadata.name ] if pod_names: dagit_pod = pod_names[0] time.sleep(1) # Wait for Celery worker queues to become ready pods = kubernetes.client.CoreV1Api().list_namespaced_pod( namespace=namespace) deployments = kubernetes.client.AppsV1Api( ).list_namespaced_deployment(namespace=namespace) pod_names = [ p.metadata.name for p in pods.items if CELERY_WORKER_NAME_PREFIX in p.metadata.name ] if helm_config.get("runLauncher").get( "type") == "CeleryK8sRunLauncher": worker_queues = (helm_config.get("runLauncher").get( "config").get("celeryK8sRunLauncher").get( "workerQueues", [])) for queue in worker_queues: num_pods_for_queue = len([ pod_name for pod_name in pod_names if f"{CELERY_WORKER_NAME_PREFIX}-{queue.get('name')}" in pod_name ]) assert queue.get("replicaCount") == num_pods_for_queue labels = queue.get("labels") if labels: target_deployments = [] for item in deployments.items: if queue.get("name") in item.metadata.name: target_deployments.append(item) assert len(target_deployments) > 0 for target in target_deployments: for key in labels: assert target.spec.template.metadata.labels.get( key) == labels.get(key) print("Waiting for celery workers") for pod_name in pod_names: print("Waiting for Celery worker pod %s" % pod_name) wait_for_pod(pod_name, namespace=namespace) rabbitmq_enabled = ("rabbitmq" not in helm_config ) or helm_config.get("rabbitmq") if rabbitmq_enabled: print("Waiting for rabbitmq pod to exist...") while True: pods = kube_api.list_namespaced_pod( namespace=namespace) pod_names = [ p.metadata.name for p in pods.items if "rabbitmq" in p.metadata.name ] if pod_names: assert len(pod_names) == 1 print("Waiting for rabbitmq pod to be ready: " + str(pod_names[0])) wait_for_pod(pod_names[0], namespace=namespace) break time.sleep(1) else: assert ( len(pod_names) == 0 ), "celery-worker pods {pod_names} exists when celery is not enabled.".format( pod_names=pod_names) dagster_user_deployments_values = helm_config.get( "dagster-user-deployments", {}) if (dagster_user_deployments_values.get("enabled") and dagster_user_deployments_values.get("enableSubchart") or release_name == "dagster"): # Wait for user code deployments to be ready print("Waiting for user code deployments") pods = kubernetes.client.CoreV1Api().list_namespaced_pod( namespace=namespace) pod_names = [ p.metadata.name for p in pods.items if "user-code-deployment" in p.metadata.name ] for pod_name in pod_names: print("Waiting for user code deployment pod %s" % pod_name) wait_for_pod(pod_name, namespace=namespace) yield finally: # Can skip this step as a time saver when we're going to destroy the cluster anyway, e.g. # w/ a kind cluster if should_cleanup: print("Uninstalling helm chart") check_output( ["helm", "uninstall", release_name, "--namespace", namespace], cwd=git_repository_root(), )
def _helm_chart_helper(namespace, should_cleanup, helm_config): """Install helm chart. """ check.str_param(namespace, "namespace") check.bool_param(should_cleanup, "should_cleanup") print("--- \033[32m:helm: Installing Helm chart\033[0m") try: helm_config_yaml = yaml.dump(helm_config, default_flow_style=False) helm_cmd = [ "helm", "install", "--namespace", namespace, "-f", "-", "dagster", os.path.join(git_repository_root(), "helm", "dagster"), ] print("Running Helm Install: \n", " ".join(helm_cmd), "\nWith config:\n", helm_config_yaml) p = subprocess.Popen(helm_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate(six.ensure_binary(helm_config_yaml)) print("Helm install completed with stdout: ", stdout) print("Helm install completed with stderr: ", stderr) assert p.returncode == 0 # Wait for Dagit pod to be ready (won't actually stay up w/out js rebuild) kube_api = kubernetes.client.CoreV1Api() print("Waiting for Dagit pod to be ready...") dagit_pod = None while dagit_pod is None: pods = kube_api.list_namespaced_pod(namespace=namespace) pod_names = [ p.metadata.name for p in pods.items if "dagit" in p.metadata.name ] if pod_names: dagit_pod = pod_names[0] time.sleep(1) # Wait for Celery worker queues to become ready pods = kubernetes.client.CoreV1Api().list_namespaced_pod( namespace=namespace) pod_names = [ p.metadata.name for p in pods.items if "celery-workers" in p.metadata.name ] if helm_config.get("celery", {}).get("enabled"): extra_worker_queues = helm_config.get("celery").get( "extraWorkerQueues", []) for queue in extra_worker_queues: num_pods_for_queue = len([ pod_name for pod_name in pod_names if queue.get("name") in pod_name ]) assert queue.get("replicaCount") == num_pods_for_queue print("Waiting for celery workers") for pod_name in pod_names: print("Waiting for Celery worker pod %s" % pod_name) wait_for_pod(pod_name, namespace=namespace) else: assert ( len(pod_names) == 0 ), "celery-worker pods {pod_names} exists when celery is not enabled.".format( pod_names=pod_names) if helm_config.get("userDeployments") and helm_config.get( "userDeployments", {}).get("enabled"): # Wait for user code deployments to be ready print("Waiting for user code deployments") pods = kubernetes.client.CoreV1Api().list_namespaced_pod( namespace=namespace) pod_names = [ p.metadata.name for p in pods.items if "user-code-deployment" in p.metadata.name ] for pod_name in pod_names: print("Waiting for user code deployment pod %s" % pod_name) wait_for_pod(pod_name, namespace=namespace) yield finally: # Can skip this step as a time saver when we're going to destroy the cluster anyway, e.g. # w/ a kind cluster if should_cleanup: print("Uninstalling helm chart") check_output( ["helm", "uninstall", "dagster", "--namespace", namespace], cwd=git_repository_root(), )
def helm_chart(namespace, image_pull_policy, docker_image, should_cleanup=True): '''Install dagster-k8s helm chart. ''' check.str_param(namespace, 'namespace') check.str_param(image_pull_policy, 'image_pull_policy') check.str_param(docker_image, 'docker_image') check.bool_param(should_cleanup, 'should_cleanup') print('--- \033[32m:helm: Installing Helm chart\033[0m') try: repository, tag = docker_image.split(':') helm_config = { 'imagePullPolicy': image_pull_policy, 'dagit': { 'image': { 'repository': repository, 'tag': tag }, 'env': { 'TEST_SET_ENV_VAR': 'test_dagit_env_var' }, 'env_config_maps': ['test-env-configmap'], 'env_secrets': ['test-env-secret'], }, 'job_runner': { 'image': { 'repository': repository, 'tag': tag }, 'env': { 'TEST_SET_ENV_VAR': 'test_job_runner_env_var' }, 'env_config_maps': ['test-env-configmap'], 'env_secrets': ['test-env-secret'], }, 'serviceAccount': { 'name': 'dagit-admin' }, 'postgresqlPassword': '******', 'postgresqlDatabase': 'test', 'postgresqlUser': '******', 'celery': { 'extraWorkerQueues': [ { 'name': 'extra-queue-1', 'replicaCount': 1 }, { 'name': 'extra-queue-2', 'replicaCount': 2 }, ] }, } helm_config_yaml = yaml.dump(helm_config, default_flow_style=False) helm_cmd = [ 'helm', 'install', '--namespace', namespace, '-f', '-', 'dagster', 'helm/dagster/', ] print('Running Helm Install: \n', ' '.join(helm_cmd), '\nWith config:\n', helm_config_yaml) p = subprocess.Popen( helm_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) stdout, stderr = p.communicate(six.ensure_binary(helm_config_yaml)) print('\n\nHelm install stdout:\n', six.ensure_str(stdout)) print('\n\nHelm install stderr:\n', six.ensure_str(stderr)) assert p.returncode == 0 check_output( helm_cmd, shell=True, cwd=os.path.join(git_repository_root(), 'python_modules/libraries/dagster-k8s/'), ) # Wait for Dagit pod to be ready (won't actually stay up w/out js rebuild) kube_api = kubernetes.client.CoreV1Api() print('Waiting for Dagit pod to be ready...') dagit_pod = None while dagit_pod is None: pods = kube_api.list_namespaced_pod(namespace=namespace) pod_names = [ p.metadata.name for p in pods.items if 'dagit' in p.metadata.name ] if pod_names: dagit_pod = pod_names[0] time.sleep(1) # Wait for additional Celery worker queues to become ready pods = kubernetes.client.CoreV1Api().list_namespaced_pod( namespace=namespace) for extra_queue in helm_config['celery']['extraWorkerQueues']: pod_names = [ p.metadata.name for p in pods.items if extra_queue['name'] in p.metadata.name ] assert len(pod_names) == extra_queue['replicaCount'] for pod in pod_names: print('Waiting for pod %s' % pod) wait_for_pod(pod, namespace=namespace) yield finally: # Can skip this step as a time saver when we're going to destroy the cluster anyway, e.g. # w/ a kind cluster if should_cleanup: print('Uninstalling helm chart') check_output( ['helm', 'uninstall', 'dagster', '--namespace', namespace], shell=True, cwd=os.path.join(git_repository_root(), 'python_modules/libraries/dagster-k8s/'), )