def queue_task_instance(self, task_instance: TaskInstance, mark_success: bool = False, pickle_id: Optional[str] = None, ignore_all_deps: bool = False, ignore_depends_on_past: bool = False, ignore_task_deps: bool = False, ignore_ti_state: bool = False, pool: Optional[str] = None, cfg_path: Optional[str] = None) -> None: """Queues task instance.""" pool = pool or task_instance.pool # TODO (edgarRd): AIRFLOW-1985: # cfg_path is needed to propagate the config values if using impersonation # (run_as_user), given that there are different code paths running tasks. # For a long term solution we need to address AIRFLOW-1986 command_list_to_run = task_instance.command_as_list( local=True, mark_success=mark_success, ignore_all_deps=ignore_all_deps, ignore_depends_on_past=ignore_depends_on_past, ignore_task_deps=ignore_task_deps, ignore_ti_state=ignore_ti_state, pool=pool, pickle_id=pickle_id, cfg_path=cfg_path) self.queue_command(SimpleTaskInstance(task_instance), command_list_to_run, priority=task_instance.task.priority_weight_total, queue=task_instance.task.queue)
def generate_pod_yaml(args): """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor""" execution_date = args.execution_date dag = get_dag(subdir=args.subdir, dag_id=args.dag_id) yaml_output_path = args.output_path kube_config = KubeConfig() for task in dag.tasks: ti = TaskInstance(task, execution_date) pod = PodGenerator.construct_pod( dag_id=args.dag_id, task_id=ti.task_id, pod_id=create_pod_id(args.dag_id, ti.task_id), try_number=ti.try_number, kube_image=kube_config.kube_image, date=ti.execution_date, args=ti.command_as_list(), pod_override_object=PodGenerator.from_obj(ti.executor_config), scheduler_job_id="worker-config", namespace=kube_config.executor_namespace, base_worker_pod=PodGenerator.deserialize_model_file(kube_config.pod_template_file), ) pod_mutation_hook(pod) api_client = ApiClient() date_string = pod_generator.datetime_to_label_safe_datestring(execution_date) yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml" os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True) with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output: sanitized_pod = api_client.sanitize_for_serialization(pod) output.write(yaml.dump(sanitized_pod)) print(f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
def generate_pod_yaml(args): """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor""" from kubernetes.client.api_client import ApiClient from airflow.executors.kubernetes_executor import AirflowKubernetesScheduler, KubeConfig from airflow.kubernetes import pod_generator from airflow.kubernetes.pod_generator import PodGenerator from airflow.kubernetes.worker_configuration import WorkerConfiguration from airflow.settings import pod_mutation_hook execution_date = args.execution_date dag = get_dag(subdir=args.subdir, dag_id=args.dag_id) yaml_output_path = args.output_path kube_config = KubeConfig() for task in dag.tasks: ti = TaskInstance(task, execution_date) pod = PodGenerator.construct_pod( dag_id=args.dag_id, task_id=ti.task_id, pod_id=AirflowKubernetesScheduler._create_pod_id( # pylint: disable=W0212 args.dag_id, ti.task_id), try_number=ti.try_number, kube_image=kube_config.kube_image, date=ti.execution_date, command=ti.command_as_list(), pod_override_object=PodGenerator.from_obj(ti.executor_config), worker_uuid="worker-config", namespace=kube_config.executor_namespace, base_worker_pod=WorkerConfiguration( kube_config=kube_config).as_pod()) pod_mutation_hook(pod) api_client = ApiClient() date_string = pod_generator.datetime_to_label_safe_datestring( execution_date) yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml" os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True) with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output: sanitized_pod = api_client.sanitize_for_serialization(pod) output.write(yaml.dump(sanitized_pod)) print( f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
def _op_to_task(self, dag_id, image, op, node_map): """ Generate task given an operator inherited from dsl.ContainerOp. :param airflow.models.BaseOperator op: :param dict(Text, SdkNode) node_map: :rtype: Tuple(base_tasks.SdkTask, SdkNode) """ interface_inputs = {} interface_outputs = {} input_mappings = {} processed_args = None # for key, val in six.iteritems(op.params): # interface_inputs[key] = interface_model.Variable( # _type_helpers.python_std_to_sdk_type(Types.String).to_flyte_literal_type(), # '' # ) # # if param.op_name == '': # binding = promise_common.Input(sdk_type=Types.String, name=param.name) # else: # binding = promise_common.NodeOutput( # sdk_node=node_map[param.op_name], # sdk_type=Types.String, # var=param.name) # input_mappings[param.name] = binding # # for param in op.outputs.values(): # interface_outputs[param.name] = interface_model.Variable( # _type_helpers.python_std_to_sdk_type(Types.String).to_flyte_literal_type(), # '' # ) requests = [] if op.resources: requests.append( task_model.Resources.ResourceEntry( task_model.Resources.ResourceName.Cpu, op.resources.cpus)) requests.append( task_model.Resources.ResourceEntry( task_model.Resources.ResourceName.Memory, op.resources.ram)) requests.append( task_model.Resources.ResourceEntry( task_model.Resources.ResourceName.Gpu, op.resources.gpus)) requests.append( task_model.Resources.ResourceEntry( task_model.Resources.ResourceName.Storage, op.resources.disk)) task_instance = TaskInstance(op, datetime.datetime.now()) command = task_instance.command_as_list(local=True, mark_success=False, ignore_all_deps=True, ignore_depends_on_past=True, ignore_task_deps=True, ignore_ti_state=True, pool=task_instance.pool, pickle_id=dag_id, cfg_path=None) task = base_tasks.SdkTask( op.task_id, SingleStepTask, "airflow_op", task_model.TaskMetadata( False, task_model.RuntimeMetadata( type=task_model.RuntimeMetadata.RuntimeType.Other, version=airflow.version.version, flavor='airflow'), datetime.timedelta(seconds=0), literals_model.RetryStrategy(0), '1', None, ), interface_common.TypedInterface(inputs=interface_inputs, outputs=interface_outputs), custom=None, container=task_model.Container( image=image, command=command, args=[], resources=task_model.Resources(limits=[], requests=requests), env={}, config={}, )) return task, task(**input_mappings).assign_id_and_return(op.task_id)