def execute(self, context): if "run_id" in self.params: self._run_id = self.params["run_id"] elif "dag_run" in context and context["dag_run"] is not None: self._run_id = context["dag_run"].run_id try: if self.instance: tags = {AIRFLOW_EXECUTION_DATE_STR: context.get("ts")} if "ts" in context else {} run = self.instance.register_managed_run( pipeline_name=self.pipeline_name, run_id=self.run_id, run_config=self.run_config, mode=self.mode, solids_to_execute=None, step_keys_to_execute=None, tags=tags, root_run_id=None, parent_run_id=None, pipeline_snapshot=self.pipeline_snapshot, execution_plan_snapshot=self.execution_plan_snapshot, parent_pipeline_snapshot=self.parent_pipeline_snapshot, ) raw_res = self.execute_raw(context) self.log.info("Finished executing container.") res = parse_raw_log_lines(raw_res) try: handle_execution_errors(res, "executePlan") except DagsterGraphQLClientError as err: if self.instance: self.instance.report_engine_event( str(err), run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info()) ), self.__class__, ) raise events = handle_execute_plan_result_raw(res) if self.instance: for event in events: self.instance.handle_new_event(event) events = [e.dagster_event for e in events] check_events_for_failures(events) check_events_for_skips(events) return events finally: self._run_id = None
def execute(self, context): try: from dagster_graphql.implementation.pipeline_execution_manager import ( build_synthetic_pipeline_error_record, ) from dagster_graphql.client.mutations import ( DagsterGraphQLClientError, handle_execution_errors, handle_execute_plan_result_raw, ) except ImportError: raise AirflowException( 'To use the DagsterDockerOperator, dagster and dagster_graphql must be installed ' 'in your Airflow environment.' ) if 'run_id' in self.params: self._run_id = self.params['run_id'] elif 'dag_run' in context and context['dag_run'] is not None: self._run_id = context['dag_run'].run_id try: if self.instance: self.instance.get_or_create_run( PipelineRun( pipeline_name=self.pipeline_name, run_id=self.run_id, environment_dict=self.environment_dict, mode=self.mode, selector=ExecutionSelector(self.pipeline_name), reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.MANAGED, ) ) raw_res = super(DagsterDockerOperator, self).execute(context) self.log.info('Finished executing container.') res = parse_raw_res(raw_res) try: handle_execution_errors(res, 'executePlan') except DagsterGraphQLClientError: event = build_synthetic_pipeline_error_record( self.run_id, serializable_error_info_from_exc_info(sys.exc_info()), self.pipeline_name, ) if self.instance: self.instance.handle_new_event(event) raise events = handle_execute_plan_result_raw(res) if self.instance: for event in events: self.instance.handle_new_event(event) events = [e.dagster_event for e in events] check_events_for_failures(events) check_events_for_skips(events) return events finally: self._run_id = None
def execute(self, context): try: from dagster_graphql.client.mutations import ( DagsterGraphQLClientError, handle_execution_errors, handle_execute_plan_result_raw, ) except ImportError: raise AirflowException( 'To use the DagsterDockerOperator, dagster and dagster_graphql must be installed ' 'in your Airflow environment.') if 'run_id' in self.params: self._run_id = self.params['run_id'] elif 'dag_run' in context and context['dag_run'] is not None: self._run_id = context['dag_run'].run_id try: if self.instance: run = self.instance.register_managed_run( pipeline_name=self.pipeline_name, run_id=self.run_id, environment_dict=self.environment_dict, mode=self.mode, solids_to_execute=None, step_keys_to_execute=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=self.pipeline_snapshot, execution_plan_snapshot=self.execution_plan_snapshot, parent_pipeline_snapshot=self.parent_pipeline_snapshot, ) raw_res = super(DagsterDockerOperator, self).execute(context) self.log.info('Finished executing container.') res = parse_raw_log_lines(raw_res) try: handle_execution_errors(res, 'executePlan') except DagsterGraphQLClientError as err: if self.instance: self.instance.report_engine_event( str(err), run, EngineEventData.engine_error( serializable_error_info_from_exc_info( sys.exc_info())), self.__class__, ) raise events = handle_execute_plan_result_raw(res) if self.instance: for event in events: self.instance.handle_new_event(event) events = [e.dagster_event for e in events] check_events_for_failures(events) check_events_for_skips(events) return events finally: self._run_id = None
def execute(self, context): try: from dagster_graphql.client.mutations import ( DagsterGraphQLClientError, handle_execution_errors, handle_execute_plan_result_raw, ) except ImportError: raise AirflowException( 'To use the DagsterKubernetesPodOperator, dagster and dagster_graphql must be' ' installed in your Airflow environment.' ) if 'run_id' in self.params: self._run_id = self.params['run_id'] elif 'dag_run' in context and context['dag_run'] is not None: self._run_id = context['dag_run'].run_id # return to original execute code: try: client = kube_client.get_kube_client( in_cluster=self.in_cluster, cluster_context=self.cluster_context, config_file=self.config_file, ) gen = pod_generator.PodGenerator() for mount in self.volume_mounts: gen.add_mount(mount) for volume in self.volumes: gen.add_volume(volume) pod = gen.make_pod( namespace=self.namespace, image=self.image, pod_id=self.name, cmds=self.cmds, arguments=self.query(context.get('ts')), labels=self.labels, ) pod.service_account_name = self.service_account_name pod.secrets = self.secrets pod.envs = self.env_vars pod.image_pull_policy = self.image_pull_policy pod.image_pull_secrets = self.image_pull_secrets pod.annotations = self.annotations pod.resources = self.resources pod.affinity = self.affinity pod.node_selectors = self.node_selectors pod.hostnetwork = self.hostnetwork pod.tolerations = self.tolerations pod.configmaps = self.configmaps pod.security_context = self.security_context launcher = pod_launcher.PodLauncher(kube_client=client, extract_xcom=self.xcom_push) try: if self.instance: tags = ( {AIRFLOW_EXECUTION_DATE_STR: context.get('ts')} if 'ts' in context else {} ) run = self.instance.register_managed_run( pipeline_name=self.pipeline_name, run_id=self.run_id, run_config=self.run_config, mode=self.mode, solids_to_execute=None, step_keys_to_execute=None, tags=tags, root_run_id=None, parent_run_id=None, pipeline_snapshot=self.pipeline_snapshot, execution_plan_snapshot=self.execution_plan_snapshot, parent_pipeline_snapshot=self.parent_pipeline_snapshot, ) # we won't use the "result", which is the pod's xcom json file (final_state, _) = launcher.run_pod( pod, startup_timeout=self.startup_timeout_seconds, get_logs=self.get_logs ) # fetch the last line independently of whether logs were read # unbelievably, if you set tail_lines=1, the returned json has its double quotes # turned into unparseable single quotes res = None num_attempts = 0 while not res and num_attempts < LOG_RETRIEVAL_MAX_ATTEMPTS: raw_res = client.read_namespaced_pod_log( name=pod.name, namespace=pod.namespace, container='base' ) res = parse_raw_log_lines(raw_res.split('\n')) time.sleep(LOG_RETRIEVAL_WAITS_BETWEEN_ATTEMPTS_SEC) num_attempts += 1 try: handle_execution_errors(res, 'executePlan') except DagsterGraphQLClientError as err: self.instance.report_engine_event( str(err), run, EngineEventData.engine_error( serializable_error_info_from_exc_info(sys.exc_info()) ), self.__class__, ) raise events = handle_execute_plan_result_raw(res) if self.instance: for event in events: self.instance.handle_new_event(event) events = [e.dagster_event for e in events] check_events_for_failures(events) check_events_for_skips(events) return events finally: self._run_id = None if self.is_delete_operator_pod: launcher.delete_pod(pod) if final_state != State.SUCCESS: raise AirflowException('Pod returned a failure: {state}'.format(state=final_state)) # note the lack of returning the default xcom except AirflowException as ex: raise AirflowException('Pod Launching failed: {error}'.format(error=ex))
def execute(self, context): try: from dagster_graphql.implementation.pipeline_execution_manager import ( build_synthetic_pipeline_error_record, ) from dagster_graphql.client.mutations import ( DagsterGraphQLClientError, handle_execution_errors, handle_execute_plan_result_raw, ) except ImportError: raise AirflowException( 'To use the DagsterKubernetesPodOperator, dagster and dagster_graphql must be' ' installed in your Airflow environment.') if 'run_id' in self.params: self._run_id = self.params['run_id'] elif 'dag_run' in context and context['dag_run'] is not None: self._run_id = context['dag_run'].run_id # return to original execute code: try: client = kube_client.get_kube_client( in_cluster=self.in_cluster, cluster_context=self.cluster_context, config_file=self.config_file, ) gen = pod_generator.PodGenerator() for mount in self.volume_mounts: gen.add_mount(mount) for volume in self.volumes: gen.add_volume(volume) pod = gen.make_pod( namespace=self.namespace, image=self.image, pod_id=self.name, cmds=self.cmds, arguments=self.query, labels=self.labels, ) pod.service_account_name = self.service_account_name pod.secrets = self.secrets pod.envs = self.env_vars pod.image_pull_policy = self.image_pull_policy pod.image_pull_secrets = self.image_pull_secrets pod.annotations = self.annotations pod.resources = self.resources pod.affinity = self.affinity pod.node_selectors = self.node_selectors pod.hostnetwork = self.hostnetwork pod.tolerations = self.tolerations pod.configmaps = self.configmaps pod.security_context = self.security_context launcher = pod_launcher.PodLauncher(kube_client=client, extract_xcom=self.xcom_push) try: if self.instance: self.instance.get_or_create_run( PipelineRun( pipeline_name=self.pipeline_name, run_id=self.run_id, environment_dict=self.environment_dict, mode=self.mode, selector=ExecutionSelector(self.pipeline_name), reexecution_config=None, step_keys_to_execute=None, tags=None, status=PipelineRunStatus.MANAGED, )) # we won't use the "result", which is the pod's xcom json file (final_state, _) = launcher.run_pod( pod, startup_timeout=self.startup_timeout_seconds, get_logs=self.get_logs) # fetch the last line independently of whether logs were read # unbelievably, if you set tail_lines=1, the returned json has its double quotes # turned into unparseable single quotes # TODO: add retries - k8s log servers are _extremely_ flaky raw_res = client.read_namespaced_pod_log( name=pod.name, namespace=pod.namespace, container='base', tail_lines=5) res = parse_raw_res(raw_res.split('\n')) try: handle_execution_errors(res, 'executePlan') except DagsterGraphQLClientError: event = build_synthetic_pipeline_error_record( self.run_id, serializable_error_info_from_exc_info(sys.exc_info()), self.pipeline_name, ) if self.instance: self.instance.handle_new_event(event) raise events = handle_execute_plan_result_raw(res) if self.instance: for event in events: self.instance.handle_new_event(event) check_raw_events_for_skips(events) return events finally: self._run_id = None if self.is_delete_operator_pod: launcher.delete_pod(pod) if final_state != State.SUCCESS: raise AirflowException( 'Pod returned a failure: {state}'.format( state=final_state)) # note the lack of returning the default xcom except AirflowException as ex: raise AirflowException( 'Pod Launching failed: {error}'.format(error=ex))