def __init__(self, local_task_job): """ :param local_task_job: The local task job associated with running the associated task instance. :type local_task_job: airflow.jobs.LocalTaskJob """ # Pass task instance context into log handlers to setup the logger. super(BaseTaskRunner, self).__init__(local_task_job.task_instance) self._task_instance = local_task_job.task_instance popen_prepend = [] cfg_path = None if self._task_instance.run_as_user: self.run_as_user = self._task_instance.run_as_user else: try: self.run_as_user = conf.get('core', 'default_impersonation') except conf.AirflowConfigException: self.run_as_user = None # Add sudo commands to change user if we need to. Needed to handle SubDagOperator # case using a SequentialExecutor. if self.run_as_user and (self.run_as_user != getpass.getuser()): self.log.debug("Planning to run as the %s user", self.run_as_user) cfg_dict = conf.as_dict(display_sensitive=True) cfg_subset = { 'core': cfg_dict.get('core', {}), 'smtp': cfg_dict.get('smtp', {}), 'scheduler': cfg_dict.get('scheduler', {}), 'webserver': cfg_dict.get('webserver', {}), } temp_fd, cfg_path = mkstemp() # Give ownership of file to user; only they can read and write subprocess.call(['sudo', 'chown', self.run_as_user, cfg_path], close_fds=True) subprocess.call(['sudo', 'chmod', '600', cfg_path], close_fds=True) with os.fdopen(temp_fd, 'w') as temp_file: json.dump(cfg_subset, temp_file) # propagate PYTHONPATH environment variable pythonpath_value = os.environ.get(PYTHONPATH_VAR, '') popen_prepend = ['sudo', '-H', '-u', self.run_as_user] if pythonpath_value: popen_prepend.append('{}={}'.format(PYTHONPATH_VAR, pythonpath_value)) self._cfg_path = cfg_path self._command = popen_prepend + self._task_instance.command_as_list( raw=True, pickle_id=local_task_job.pickle_id, mark_success=local_task_job.mark_success, job_id=local_task_job.id, pool=local_task_job.pool, cfg_path=cfg_path, ) self.process = None
def __init__(self, local_task_job): """ :param local_task_job: The local task job associated with running the associated task instance. :type local_task_job: airflow.jobs.LocalTaskJob """ # Pass task instance context into log handlers to setup the logger. self._task_instance = local_task_job.task_instance self.set_logger_contexts(self._task_instance) popen_prepend = [] cfg_path = None if self._task_instance.run_as_user: self.run_as_user = self._task_instance.run_as_user else: try: self.run_as_user = conf.get('core', 'default_impersonation') except conf.AirflowConfigException: self.run_as_user = None # Add sudo commands to change user if we need to. Needed to handle SubDagOperator # case using a SequentialExecutor. if self.run_as_user and (self.run_as_user != getpass.getuser()): self.logger.debug("Planning to run as the {} user".format(self.run_as_user)) cfg_dict = conf.as_dict(display_sensitive=True) cfg_subset = { 'core': cfg_dict.get('core', {}), 'smtp': cfg_dict.get('smtp', {}), 'scheduler': cfg_dict.get('scheduler', {}), 'webserver': cfg_dict.get('webserver', {}), } temp_fd, cfg_path = mkstemp() # Give ownership of file to user; only they can read and write subprocess.call( ['sudo', 'chown', self.run_as_user, cfg_path] ) subprocess.call( ['sudo', 'chmod', '600', cfg_path] ) with os.fdopen(temp_fd, 'w') as temp_file: json.dump(cfg_subset, temp_file) popen_prepend = ['sudo', '-H', '-u', self.run_as_user] self._cfg_path = cfg_path self._command = popen_prepend + self._task_instance.command_as_list( raw=True, ignore_all_deps=local_task_job.ignore_all_deps, ignore_depends_on_past=local_task_job.ignore_depends_on_past, ignore_ti_state=local_task_job.ignore_ti_state, pickle_id=local_task_job.pickle_id, mark_success=local_task_job.mark_success, job_id=local_task_job.id, pool=local_task_job.pool, cfg_path=cfg_path, ) self.process = None
def __init__(self, local_task_job): """ :param local_task_job: The local task job associated with running the associated task instance. :type local_task_job: airflow.jobs.LocalTaskJob """ self._task_instance = local_task_job.task_instance popen_prepend = [] cfg_path = None if self._task_instance.run_as_user: self.run_as_user = self._task_instance.run_as_user else: try: self.run_as_user = conf.get('core', 'default_impersonation') except conf.AirflowConfigException: self.run_as_user = None # Add sudo commands to change user if we need to. Needed to handle SubDagOperator # case using a SequentialExecutor. if self.run_as_user and (self.run_as_user != getpass.getuser()): self.logger.debug("Planning to run as the {} user".format(self.run_as_user)) cfg_dict = conf.as_dict(display_sensitive=True) cfg_subset = { 'core': cfg_dict.get('core', {}), 'smtp': cfg_dict.get('smtp', {}), 'scheduler': cfg_dict.get('scheduler', {}), 'webserver': cfg_dict.get('webserver', {}), } temp_fd, cfg_path = mkstemp() # Give ownership of file to user; only they can read and write subprocess.call( ['sudo', 'chown', self.run_as_user, cfg_path] ) subprocess.call( ['sudo', 'chmod', '600', cfg_path] ) with os.fdopen(temp_fd, 'w') as temp_file: json.dump(cfg_subset, temp_file) popen_prepend = ['sudo', '-H', '-u', self.run_as_user] self._cfg_path = cfg_path self._command = popen_prepend + self._task_instance.command_as_list( raw=True, ignore_all_deps=local_task_job.ignore_all_deps, ignore_depends_on_past=local_task_job.ignore_depends_on_past, ignore_ti_state=local_task_job.ignore_ti_state, pickle_id=local_task_job.pickle_id, mark_success=local_task_job.mark_success, job_id=local_task_job.id, pool=local_task_job.pool, cfg_path=cfg_path, ) self.process = None
def tmp_configuration_copy(chmod=0o600): """ Returns a path for a temporary file including a full copy of the configuration settings. :return: a path to a temporary file """ cfg_dict = conf.as_dict(display_sensitive=True, raw=True) temp_fd, cfg_path = mkstemp() with os.fdopen(temp_fd, "w") as temp_file: json.dump(cfg_dict, temp_file) return cfg_path
def tmp_configuration_copy(chmod=0o600): """ Returns a path for a temporary file including a full copy of the configuration settings. :return: a path to a temporary file """ cfg_dict = conf.as_dict(display_sensitive=True, raw=True) temp_fd, cfg_path = mkstemp() with os.fdopen(temp_fd, 'w') as temp_file: if chmod is not None: os.fchmod(temp_fd, chmod) json.dump(cfg_dict, temp_file) return cfg_path
def tmp_configuration_copy(chmod=0o600, include_env=True, include_cmds=True): """ Returns a path for a temporary file including a full copy of the configuration settings. :return: a path to a temporary file """ cfg_dict = conf.as_dict(display_sensitive=True, raw=True) temp_fd, cfg_path = mkstemp() with os.fdopen(temp_fd, 'w') as temp_file: # Set the permissions before we write anything to it. if chmod is not None: os.fchmod(temp_fd, chmod) json.dump(cfg_dict, temp_file) return cfg_path
def tmp_configuration_copy(): """ Returns a path for a temporary file including a full copy of the configuration settings. :return: a path to a temporary file """ cfg_dict = conf.as_dict(display_sensitive=True) temp_fd, cfg_path = mkstemp() cfg_subset = dict() for section in COPY_SECTIONS: cfg_subset[section] = cfg_dict.get(section, {}) with os.fdopen(temp_fd, 'w') as temp_file: json.dump(cfg_subset, temp_file) return cfg_path
def configure_logging(log_format=LOG_FORMAT): def _configure_logging(logging_level): global LOGGING_LEVEL logging.root.handlers = [] logging.basicConfig(format=log_format, stream=sys.stdout, level=logging_level) LOGGING_LEVEL = logging_level if "logging_level" in conf.as_dict()["core"]: logging_level = conf.get('core', 'LOGGING_LEVEL').upper() else: logging_level = LOGGING_LEVEL try: _configure_logging(logging_level) except ValueError: logging.warning("Logging level {} is not defined. " "Use default.".format(logging_level)) _configure_logging(logging.INFO)
def configure_logging(log_format=LOG_FORMAT): def _configure_logging(logging_level): global LOGGING_LEVEL logging.root.handlers = [] logging.basicConfig( format=log_format, stream=sys.stdout, level=logging_level) LOGGING_LEVEL = logging_level if "logging_level" in conf.as_dict()["core"]: logging_level = conf.get('core', 'LOGGING_LEVEL').upper() else: logging_level = LOGGING_LEVEL try: _configure_logging(logging_level) except ValueError: logging.warning("Logging level {} is not defined. " "Use default.".format(logging_level)) _configure_logging(logging.INFO)
def __init__(self): configuration_dict = configuration.as_dict(display_sensitive=True) self.core_configuration = configuration_dict['core'] self.kube_secrets = configuration_dict.get('kubernetes_secrets', {}) self.kube_env_vars = configuration_dict.get( 'kubernetes_environment_variables', {}) self.env_from_configmap_ref = configuration.get( self.kubernetes_section, 'env_from_configmap_ref') self.env_from_secret_ref = configuration.get(self.kubernetes_section, 'env_from_secret_ref') self.airflow_home = settings.AIRFLOW_HOME self.dags_folder = configuration.get(self.core_section, 'dags_folder') self.parallelism = configuration.getint(self.core_section, 'parallelism') self.worker_container_repository = configuration.get( self.kubernetes_section, 'worker_container_repository') self.worker_container_tag = configuration.get(self.kubernetes_section, 'worker_container_tag') self.kube_image = '{}:{}'.format(self.worker_container_repository, self.worker_container_tag) self.kube_image_pull_policy = configuration.get( self.kubernetes_section, "worker_container_image_pull_policy") self.kube_node_selectors = configuration_dict.get( 'kubernetes_node_selectors', {}) self.kube_annotations = configuration_dict.get( 'kubernetes_annotations', {}) self.kube_labels = configuration_dict.get('kubernetes_labels', {}) self.delete_worker_pods = conf.getboolean(self.kubernetes_section, 'delete_worker_pods') self.worker_pods_creation_batch_size = conf.getint( self.kubernetes_section, 'worker_pods_creation_batch_size') self.worker_service_account_name = conf.get( self.kubernetes_section, 'worker_service_account_name') self.image_pull_secrets = conf.get(self.kubernetes_section, 'image_pull_secrets') # NOTE: user can build the dags into the docker image directly, # this will set to True if so self.dags_in_image = conf.getboolean(self.kubernetes_section, 'dags_in_image') # Run as user for pod security context self.worker_run_as_user = self._get_security_context_val('run_as_user') self.worker_fs_group = self._get_security_context_val('fs_group') # NOTE: `git_repo` and `git_branch` must be specified together as a pair # The http URL of the git repository to clone from self.git_repo = conf.get(self.kubernetes_section, 'git_repo') # The branch of the repository to be checked out self.git_branch = conf.get(self.kubernetes_section, 'git_branch') # Optionally, the directory in the git repository containing the dags self.git_subpath = conf.get(self.kubernetes_section, 'git_subpath') # Optionally, the root directory for git operations self.git_sync_root = conf.get(self.kubernetes_section, 'git_sync_root') # Optionally, the name at which to publish the checked-out files under --root self.git_sync_dest = conf.get(self.kubernetes_section, 'git_sync_dest') # Optionally, if git_dags_folder_mount_point is set the worker will use # {git_dags_folder_mount_point}/{git_sync_dest}/{git_subpath} as dags_folder self.git_dags_folder_mount_point = conf.get( self.kubernetes_section, 'git_dags_folder_mount_point') # Optionally a user may supply a (`git_user` AND `git_password`) OR # (`git_ssh_key_secret_name` AND `git_ssh_key_secret_key`) for private repositories self.git_user = conf.get(self.kubernetes_section, 'git_user') self.git_password = conf.get(self.kubernetes_section, 'git_password') self.git_ssh_key_secret_name = conf.get(self.kubernetes_section, 'git_ssh_key_secret_name') self.git_ssh_known_hosts_configmap_name = conf.get( self.kubernetes_section, 'git_ssh_known_hosts_configmap_name') # NOTE: The user may optionally use a volume claim to mount a PV containing # DAGs directly self.dags_volume_claim = conf.get(self.kubernetes_section, 'dags_volume_claim') # This prop may optionally be set for PV Claims and is used to write logs self.logs_volume_claim = conf.get(self.kubernetes_section, 'logs_volume_claim') # This prop may optionally be set for PV Claims and is used to locate DAGs # on a SubPath self.dags_volume_subpath = conf.get(self.kubernetes_section, 'dags_volume_subpath') # This prop may optionally be set for PV Claims and is used to locate logs # on a SubPath self.logs_volume_subpath = conf.get(self.kubernetes_section, 'logs_volume_subpath') # Optionally, hostPath volume containing DAGs self.dags_volume_host = conf.get(self.kubernetes_section, 'dags_volume_host') # Optionally, write logs to a hostPath Volume self.logs_volume_host = conf.get(self.kubernetes_section, 'logs_volume_host') # This prop may optionally be set for PV Claims and is used to write logs self.base_log_folder = configuration.get(self.core_section, 'base_log_folder') # The Kubernetes Namespace in which the Scheduler and Webserver reside. Note # that if your # cluster has RBAC enabled, your scheduler may need service account permissions to # create, watch, get, and delete pods in this namespace. self.kube_namespace = conf.get(self.kubernetes_section, 'namespace') # The Kubernetes Namespace in which pods will be created by the executor. Note # that if your # cluster has RBAC enabled, your workers may need service account permissions to # interact with cluster components. self.executor_namespace = conf.get(self.kubernetes_section, 'namespace') # Task secrets managed by KubernetesExecutor. self.gcp_service_account_keys = conf.get(self.kubernetes_section, 'gcp_service_account_keys') # If the user is using the git-sync container to clone their repository via git, # allow them to specify repository, tag, and pod name for the init container. self.git_sync_container_repository = conf.get( self.kubernetes_section, 'git_sync_container_repository') self.git_sync_container_tag = conf.get(self.kubernetes_section, 'git_sync_container_tag') self.git_sync_container = '{}:{}'.format( self.git_sync_container_repository, self.git_sync_container_tag) self.git_sync_init_container_name = conf.get( self.kubernetes_section, 'git_sync_init_container_name') self.git_sync_run_as_user = self._get_security_context_val( 'git_sync_run_as_user') # The worker pod may optionally have a valid Airflow config loaded via a # configmap self.airflow_configmap = conf.get(self.kubernetes_section, 'airflow_configmap') affinity_json = conf.get(self.kubernetes_section, 'affinity') if affinity_json: self.kube_affinity = json.loads(affinity_json) else: self.kube_affinity = None tolerations_json = conf.get(self.kubernetes_section, 'tolerations') if tolerations_json: self.kube_tolerations = json.loads(tolerations_json) else: self.kube_tolerations = None kube_client_request_args = conf.get(self.kubernetes_section, 'kube_client_request_args') if kube_client_request_args: self.kube_client_request_args = json.loads( kube_client_request_args) if self.kube_client_request_args['_request_timeout'] and \ isinstance(self.kube_client_request_args['_request_timeout'], list): self.kube_client_request_args['_request_timeout'] = \ tuple(self.kube_client_request_args['_request_timeout']) else: self.kube_client_request_args = {} self._validate()
def __init__(self): configuration_dict = configuration.as_dict(display_sensitive=True) self.core_configuration = configuration_dict['core'] self.kube_secrets = configuration_dict.get('kubernetes_secrets', {}) self.airflow_home = configuration.get(self.core_section, 'airflow_home') self.dags_folder = configuration.get(self.core_section, 'dags_folder') self.parallelism = configuration.getint(self.core_section, 'PARALLELISM') self.worker_container_repository = configuration.get( self.kubernetes_section, 'worker_container_repository') self.worker_container_tag = configuration.get( self.kubernetes_section, 'worker_container_tag') self.worker_dags_folder = configuration.get( self.kubernetes_section, 'worker_dags_folder') self.kube_image = '{}:{}'.format( self.worker_container_repository, self.worker_container_tag) self.kube_image_pull_policy = configuration.get( self.kubernetes_section, "worker_container_image_pull_policy" ) self.kube_node_selectors = configuration_dict.get('kubernetes_node_selectors', {}) self.delete_worker_pods = conf.getboolean( self.kubernetes_section, 'delete_worker_pods') self.worker_service_account_name = conf.get( self.kubernetes_section, 'worker_service_account_name') self.image_pull_secrets = conf.get(self.kubernetes_section, 'image_pull_secrets') # NOTE: `git_repo` and `git_branch` must be specified together as a pair # The http URL of the git repository to clone from self.git_repo = conf.get(self.kubernetes_section, 'git_repo') # The branch of the repository to be checked out self.git_branch = conf.get(self.kubernetes_section, 'git_branch') # Optionally, the directory in the git repository containing the dags self.git_subpath = conf.get(self.kubernetes_section, 'git_subpath') # Optionally a user may supply a `git_user` and `git_password` for private # repositories self.git_user = conf.get(self.kubernetes_section, 'git_user') self.git_password = conf.get(self.kubernetes_section, 'git_password') # NOTE: The user may optionally use a volume claim to mount a PV containing # DAGs directly self.dags_volume_claim = conf.get(self.kubernetes_section, 'dags_volume_claim') # This prop may optionally be set for PV Claims and is used to write logs self.logs_volume_claim = conf.get(self.kubernetes_section, 'logs_volume_claim') # This prop may optionally be set for PV Claims and is used to locate DAGs # on a SubPath self.dags_volume_subpath = conf.get( self.kubernetes_section, 'dags_volume_subpath') # This prop may optionally be set for PV Claims and is used to locate logs # on a SubPath self.logs_volume_subpath = conf.get( self.kubernetes_section, 'logs_volume_subpath') # This prop may optionally be set for PV Claims and is used to write logs self.base_log_folder = configuration.get(self.core_section, 'base_log_folder') # The Kubernetes Namespace in which the Scheduler and Webserver reside. Note # that if your # cluster has RBAC enabled, your scheduler may need service account permissions to # create, watch, get, and delete pods in this namespace. self.kube_namespace = conf.get(self.kubernetes_section, 'namespace') # The Kubernetes Namespace in which pods will be created by the executor. Note # that if your # cluster has RBAC enabled, your workers may need service account permissions to # interact with cluster components. self.executor_namespace = conf.get(self.kubernetes_section, 'namespace') # Task secrets managed by KubernetesExecutor. self.gcp_service_account_keys = conf.get(self.kubernetes_section, 'gcp_service_account_keys') # If the user is using the git-sync container to clone their repository via git, # allow them to specify repository, tag, and pod name for the init container. self.git_sync_container_repository = conf.get( self.kubernetes_section, 'git_sync_container_repository') self.git_sync_container_tag = conf.get( self.kubernetes_section, 'git_sync_container_tag') self.git_sync_container = '{}:{}'.format( self.git_sync_container_repository, self.git_sync_container_tag) self.git_sync_init_container_name = conf.get( self.kubernetes_section, 'git_sync_init_container_name') # The worker pod may optionally have a valid Airflow config loaded via a # configmap self.airflow_configmap = conf.get(self.kubernetes_section, 'airflow_configmap') self._validate()
def __init__(self, local_task_job): """ :param local_task_job: The local task job associated with running the associated task instance. :type local_task_job: airflow.jobs.LocalTaskJob """ # Pass task instance context into log handlers to setup the logger. super(BaseTaskRunner, self).__init__(local_task_job.task_instance) self._task_instance = local_task_job.task_instance popen_prepend = [] cfg_path = None if self._task_instance.run_as_user: self.run_as_user = self._task_instance.run_as_user else: try: self.run_as_user = conf.get('core', 'default_impersonation') except conf.AirflowConfigException: self.run_as_user = None # Add sudo commands to change user if we need to. Needed to handle SubDagOperator # case using a SequentialExecutor. if self.run_as_user and (self.run_as_user != getpass.getuser()): self.log.debug("Planning to run as the %s user", self.run_as_user) cfg_dict = conf.as_dict(display_sensitive=True) cfg_subset = { 'core': cfg_dict.get('core', {}), 'smtp': cfg_dict.get('smtp', {}), 'scheduler': cfg_dict.get('scheduler', {}), 'webserver': cfg_dict.get('webserver', {}), 'hive': cfg_dict.get('hive', {}), # we should probably generalized this } temp_fd, cfg_path = mkstemp() # Give ownership of file to user; only they can read and write subprocess.call( ['sudo', 'chown', self.run_as_user, cfg_path], close_fds=True ) subprocess.call( ['sudo', 'chmod', '600', cfg_path], close_fds=True ) with os.fdopen(temp_fd, 'w') as temp_file: json.dump(cfg_subset, temp_file) # propagate PYTHONPATH environment variable pythonpath_value = os.environ.get(PYTHONPATH_VAR, '') popen_prepend = ['sudo', '-H', '-u', self.run_as_user] if pythonpath_value: popen_prepend.append('{}={}'.format(PYTHONPATH_VAR, pythonpath_value)) self._cfg_path = cfg_path self._command = popen_prepend + self._task_instance.command_as_list( raw=True, pickle_id=local_task_job.pickle_id, mark_success=local_task_job.mark_success, job_id=local_task_job.id, pool=local_task_job.pool, cfg_path=cfg_path, ) self.process = None
def start(self): self.task_queue = Queue() self.result_queue = Queue() framework = mesos_pb2.FrameworkInfo() framework.user = '' if not configuration.conf.get('mesos', 'MASTER'): self.log.error("Expecting mesos master URL for mesos executor") raise AirflowException("mesos.master not provided for mesos executor") master = configuration.conf.get('mesos', 'MASTER') framework.name = get_framework_name() task_cpu = configuration.conf.getint('mesos', 'TASK_CPU') task_mem = configuration.conf.getint('mesos', 'TASK_MEMORY') docker_image = None # Configuration errors when DOCKER_IMAGE_SLAVE is not present in config if 'DOCKER_IMAGE_SLAVE' in configuration.as_dict()['mesos']: docker_image = configuration.get('mesos', 'DOCKER_IMAGE_SLAVE') self.default_mesos_config = MesosExecutorConfig( image=docker_image, request_memory=task_mem, request_cpu=task_cpu) if configuration.conf.getboolean('mesos', 'CHECKPOINT'): framework.checkpoint = True if configuration.conf.get('mesos', 'FAILOVER_TIMEOUT'): # Import here to work around a circular import error from airflow.models.connection import Connection # Query the database to get the ID of the Mesos Framework, if available. conn_id = FRAMEWORK_CONNID_PREFIX + framework.name session = Session() connection = session.query(Connection).filter_by(conn_id=conn_id).first() if connection is not None: # Set the Framework ID to let the scheduler reconnect # with running tasks. framework.id.value = connection.extra framework.failover_timeout = configuration.conf.getint( 'mesos', 'FAILOVER_TIMEOUT' ) else: framework.checkpoint = False self.log.info( 'MesosFramework master : %s, name : %s, cpu : %s, mem : %s, checkpoint : %s', master, framework.name, str(task_cpu), str(task_mem), str(framework.checkpoint) ) implicit_acknowledgements = 1 if configuration.conf.getboolean('mesos', 'AUTHENTICATE'): if not configuration.conf.get('mesos', 'DEFAULT_PRINCIPAL'): self.log.error("Expecting authentication principal in the environment") raise AirflowException( "mesos.default_principal not provided in authenticated mode") if not configuration.conf.get('mesos', 'DEFAULT_SECRET'): self.log.error("Expecting authentication secret in the environment") raise AirflowException( "mesos.default_secret not provided in authenticated mode") credential = mesos_pb2.Credential() credential.principal = configuration.conf.get('mesos', 'DEFAULT_PRINCIPAL') credential.secret = configuration.conf.get('mesos', 'DEFAULT_SECRET') framework.principal = credential.principal driver = mesos.native.MesosSchedulerDriver( AirflowMesosScheduler(self.task_queue, self.result_queue), framework, master, implicit_acknowledgements, credential) else: framework.principal = 'Airflow' driver = mesos.native.MesosSchedulerDriver( AirflowMesosScheduler(self.task_queue, self.result_queue), framework, master, implicit_acknowledgements) self.mesos_driver = driver self.mesos_driver.start()
def __init__(self): configuration_dict = configuration.as_dict(display_sensitive=True) self.core_configuration = configuration_dict['core'] self.kube_secrets = configuration_dict.get('kubernetes_secrets', {}) self.kube_env_vars = configuration_dict.get('kubernetes_environment_variables', {}) self.airflow_home = configuration.get(self.core_section, 'airflow_home') self.dags_folder = configuration.get(self.core_section, 'dags_folder') self.parallelism = configuration.getint(self.core_section, 'PARALLELISM') self.worker_container_repository = configuration.get( self.kubernetes_section, 'worker_container_repository') self.worker_container_tag = configuration.get( self.kubernetes_section, 'worker_container_tag') self.kube_image = '{}:{}'.format( self.worker_container_repository, self.worker_container_tag) self.kube_image_pull_policy = configuration.get( self.kubernetes_section, "worker_container_image_pull_policy" ) self.kube_node_selectors = configuration_dict.get('kubernetes_node_selectors', {}) self.kube_annotations = configuration_dict.get('kubernetes_annotations', {}) self.delete_worker_pods = conf.getboolean( self.kubernetes_section, 'delete_worker_pods') self.worker_pods_creation_batch_size = conf.getint( self.kubernetes_section, 'worker_pods_creation_batch_size') self.worker_service_account_name = conf.get( self.kubernetes_section, 'worker_service_account_name') self.image_pull_secrets = conf.get(self.kubernetes_section, 'image_pull_secrets') # NOTE: user can build the dags into the docker image directly, # this will set to True if so self.dags_in_image = conf.getboolean(self.kubernetes_section, 'dags_in_image') # NOTE: `git_repo` and `git_branch` must be specified together as a pair # The http URL of the git repository to clone from self.git_repo = conf.get(self.kubernetes_section, 'git_repo') # The branch of the repository to be checked out self.git_branch = conf.get(self.kubernetes_section, 'git_branch') # Optionally, the directory in the git repository containing the dags self.git_subpath = conf.get(self.kubernetes_section, 'git_subpath') # Optionally, the root directory for git operations self.git_sync_root = conf.get(self.kubernetes_section, 'git_sync_root') # Optionally, the name at which to publish the checked-out files under --root self.git_sync_dest = conf.get(self.kubernetes_section, 'git_sync_dest') # Optionally, if git_dags_folder_mount_point is set the worker will use # {git_dags_folder_mount_point}/{git_sync_dest}/{git_subpath} as dags_folder self.git_dags_folder_mount_point = conf.get(self.kubernetes_section, 'git_dags_folder_mount_point') # Optionally a user may supply a (`git_user` AND `git_password`) OR # (`git_ssh_key_secret_name` AND `git_ssh_key_secret_key`) for private repositories self.git_user = conf.get(self.kubernetes_section, 'git_user') self.git_password = conf.get(self.kubernetes_section, 'git_password') self.git_ssh_key_secret_name = conf.get(self.kubernetes_section, 'git_ssh_key_secret_name') self.git_ssh_known_hosts_configmap_name = conf.get(self.kubernetes_section, 'git_ssh_known_hosts_configmap_name') # NOTE: The user may optionally use a volume claim to mount a PV containing # DAGs directly self.dags_volume_claim = conf.get(self.kubernetes_section, 'dags_volume_claim') # This prop may optionally be set for PV Claims and is used to write logs self.logs_volume_claim = conf.get(self.kubernetes_section, 'logs_volume_claim') # This prop may optionally be set for PV Claims and is used to locate DAGs # on a SubPath self.dags_volume_subpath = conf.get( self.kubernetes_section, 'dags_volume_subpath') # This prop may optionally be set for PV Claims and is used to locate logs # on a SubPath self.logs_volume_subpath = conf.get( self.kubernetes_section, 'logs_volume_subpath') # Optionally, hostPath volume containing DAGs self.dags_volume_host = conf.get(self.kubernetes_section, 'dags_volume_host') # Optionally, write logs to a hostPath Volume self.logs_volume_host = conf.get(self.kubernetes_section, 'logs_volume_host') # This prop may optionally be set for PV Claims and is used to write logs self.base_log_folder = configuration.get(self.core_section, 'base_log_folder') # The Kubernetes Namespace in which the Scheduler and Webserver reside. Note # that if your # cluster has RBAC enabled, your scheduler may need service account permissions to # create, watch, get, and delete pods in this namespace. self.kube_namespace = conf.get(self.kubernetes_section, 'namespace') # The Kubernetes Namespace in which pods will be created by the executor. Note # that if your # cluster has RBAC enabled, your workers may need service account permissions to # interact with cluster components. self.executor_namespace = conf.get(self.kubernetes_section, 'namespace') # Task secrets managed by KubernetesExecutor. self.gcp_service_account_keys = conf.get(self.kubernetes_section, 'gcp_service_account_keys') # If the user is using the git-sync container to clone their repository via git, # allow them to specify repository, tag, and pod name for the init container. self.git_sync_container_repository = conf.get( self.kubernetes_section, 'git_sync_container_repository') self.git_sync_container_tag = conf.get( self.kubernetes_section, 'git_sync_container_tag') self.git_sync_container = '{}:{}'.format( self.git_sync_container_repository, self.git_sync_container_tag) self.git_sync_init_container_name = conf.get( self.kubernetes_section, 'git_sync_init_container_name') # The worker pod may optionally have a valid Airflow config loaded via a # configmap self.airflow_configmap = conf.get(self.kubernetes_section, 'airflow_configmap') affinity_json = conf.get(self.kubernetes_section, 'affinity') if affinity_json: self.kube_affinity = json.loads(affinity_json) else: self.kube_affinity = None tolerations_json = conf.get(self.kubernetes_section, 'tolerations') if tolerations_json: self.kube_tolerations = json.loads(tolerations_json) else: self.kube_tolerations = None self._validate()