class Docker: def __init__(self, base_url=None): self.handle = None self.connect_docker_daemon(base_url) def connect_docker_daemon(self, base_url=None): """ This method is used for connect local/remote docker host daemon :return: Return the docker operation handle for local host """ if base_url is None: base_url = 'unix:///var/run/docker.sock' try: self.handle = APIClient(base_url=base_url) except errors.APIError as e: print e.message global logging logging.error(str(e.message)) def login_registry(self, login_user, login_pass, registry_srv=None): """ This method is used for log into docker registry server. :param login_user: str: user name for login registry :param login_pass: str: password for login registry :param registry_srv: str: uri for registry server address :return: result of login status for that registry """ login_status = self.handle.login(username=login_user, password=login_pass, registry=registry_srv) return login_status def get_docker_info(self): """ Get docker information :return: DICT string """ return self.handle.info() def get_image_list(self): """ Get all of the existing images list :return: DICT string for all of the images """ return self.handle.images() def public_image_search(self, keyword): """ get a result for searching the image from public/logged in registry :return: DICT string of search result """ return self.handle.search(keyword) # TODO: get docker events implementation # def get_docker_events(self, since, until, filters, decode): # """ # get running docker service events # :return: DICT for service envents # """ # return self.handle.event() def get_disk_utils(self): """ get disk utilization for docker images :return: DICT of disk utilization """ return self.handle.df() def pull_image(self, name, tag=None, repo=None): """ pull image from repository by repo/name:tag :param repo: String of repository(registry) name :param name: String of image name :param tag: String of tag name :return: DICT response """ if tag is None: tag = "latest" try: if repo is None: return self.handle.pull(name, tag=tag) else: return self.handle.pull(repo + "/" + name, tag) except errors.NotFound as e: return {'message': 'Image Not Found', 'status': 'failed'} def inspect_image(self, image_id): """ inspect an image :param image_id: String of docker image ID :return: DICT of inspecting results """ # TODO: will support image_id and "repo/name:tag" later return self.handle.inspect_image(image_id) def remove_image(self, image_id, force_remove=False): """ remove the specified image by image id :param image_id: String of Docker Image :param force_remove: True or False :return: DICT of result """ return self.handle.remove_image(image_id, force=force_remove) def tag_image(self, image, repository, force=False, tag=None): """ tag an image to new repository :param image: string of image id which to be tagged :param repository: string of new repository which image will be tag into :param tag: String new tag :param force: True or false :return: Boolean result of tag """ return self.handle.tag(image, repository, tag, force=force) def push_image(self, repository, tag=None, stream=False, auth_config=None): """ push image to new repository :param repository: String for image to be push. Image ID or Repo/Name:tag :param tag: Tag for pushed image, if you don't need to change the tag, keep None. :param stream: by default is false stream the outpu as blocking generator :param auth_config: overrride the credential for login() :return: Result String or Generator(when use stream=True) """ if auth_config is None: return self.handle.push(repository, tag, stream=stream) else: return self.handle.pull(repository, tag, stream=stream, auth_config=auth_config) def save_image(self, image_name, save_path, tarball_name=None): """ save specified image to a tarball :param image_name: string of Image ID or "repository/image:tag" :param save_path: string of path :param tarball_name: string of tarball name. If not specified it will use the image_name_datetime.tar :return: return status """ if tarball_name is None: tarball_name = image_name + "_" + str( time.time()).split('.')[0] + ".tar" try: img = self.handle.get_image(image_name) with open(save_path + '/' + tarball_name, 'w') as f: for chunk in img: f.write(chunk) return { "message": "Image {} saved at {}".format(image_name, save_path + "/" + tarball_name), "status": "succeed" } except Exception as e: return {"message": e.message, "status": "failed"} def load_image(self, tarball_name, repository, tag=None, changes=None): """ load image from local path or url load tarball image :param tarball_name: string of full path of tarball image :param repository: string of full name of image name to be assign 'repo/name' :param tag: string of imported image. If set None, the tag will followed as the original image tag :return: return """ if repository is None or str(repository).strip() == "": repository = None if tag is None or str(tag).strip() == "": tag = None if changes is None or str(changes).strip() == "": changes = None return self.handle.import_image(tarball_name, repository=repository, tag=tag, changes=changes) def get_containers(self, all=False): """ get list of containers. :param all: by default is 'False'. It only shows the running containers. otherwise it shows all containers include the stop/exit ones. :return: return the dict of containers. """ # TODO: 'filter' function will be added later. return self.handle.containers(all=all) def new_container(self, args): """ create container according to the passed in parameters :param args: parameters dict :return: return new container id """ result = self.handle.create_container( image=args.get('image'), command=args.get('command'), hostname=args.get('hostname'), user=args.get('user'), detach=False if args.get('detach') is None else args.get('detach'), stdin_open=False if args.get('stdin_open') is None else args.get('stdin_open'), tty=False if args.get('tty') is None else args.get('tty'), ports=args.get('ports'), environment=args.get('environment'), volumes=args.get('volumes'), network_disabled=False if args.get('network_disabled') is None else args.get('network_disabled'), name=args.get('name'), entrypoint=args.get('entrypoint'), working_dir=args.get('working_dir'), domainname=args.get('domainname'), host_config=args.get('host_config'), mac_address=args.get('mac_address'), labels=args.get('labels'), stop_signal=args.get('stop_signal'), networking_config=args.get('networking_config'), healthcheck=args.get('healthcheck'), stop_timeout=args.get('stop_timeout'), runtime=args.get('runtime')) return result def gen_host_conf(self, args): host_config = self.handle.create_host_config( # auto remove the container after it exited auto_remove=False if args.get('auto_remove') is None else args.get('auto_remove'), # volume binds BOOL binds=args.get('binds'), # BlockIO weight relative device weight in form of : [{"Path":"device_path, "Weight": weight}] DICT blkio_weight_device=args.get('blkio_weight_device'), # Block IO weight, relative weight. accepts a weight value between 10 and 1000 INT blkio_weight=args.get('blkio_weight'), # Add kernel capabilities. eg. ['SYS_ADMIN', "MKNOD"] str or List cap_add=args.get('cap_add'), # Drop kernel capabilities str or LIST cap_drop=args.get('cap_drop'), # The length of a CPU period in microseconds INT cpu_period=args.get('cpu_period'), # Microseconds of CPU time that the container can get in a CPU period INT cpu_quota=args.get('cpu_quota'), # CPU shares (relative weight) INT cpu_shares=args.get('cpu_shares'), # CPUs in which to allow execution (0-3, 0, 1) str cpuset_cpus=args.get('cpuset_cpus'), # Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effecive on NUMA systems cpuset_mems=args.get('cpuset_mems'), # A list of cgroup rules to apply to the container LIST device_cgroup_rules=args.get('device_cgroup_rules'), # Limit read rate (bytes per sec) from a device in the form of : [{"Path":"device_path", "Rate":rate}] device_read_bps=args.get('device_read_bps'), # Limite read rate(IOPS) from a device device_read_iops=args.get('device_read_iops'), # Limit write rate (byte per sec) from a device. device_write_bps=args.get('device_write_bps'), # Limit write rate (IOPS) from a device device_write_iops=args.get('device_write_iops'), # Expose host devices to the container, as a list of string in the form <path_on_host>:<path_in_container>:<cgroup_permissions> LIST # Eg /dev/sda:/dev/xvda:rwm allows container to hve read-write access to the host's /dev/sda via a node name /dev/xvda inside the container devices=args.get('devices'), # Set custom DNS servers LIST dns=args.get('dns'), # Additional options to be added to the container's resolve.conf file LIST dns_opt=args.get('dns_opt'), # DNS search domains LIST dns_search=args.get('dns_search'), # Addtional hostname to resolve inside the container as a mapping of hostname to IP address DICT extra_hosts=args.get('extra_hosts'), # List of additional group names and/or IDs that the container process iwll run as LIST group_add=args.get('group_add'), # Run an init inside the container that forwards signals and reaps process BOOL init=False if args.get('init') is None else args.get('init'), # Path to the docker-init binary init_path=args.get('init_path'), # Set the IPC mode for the container STRING ipc_mode=args.get('ipc_mode'), # Isolation technology to use. Default is None isolation=args.get('isolation'), # Either a dictionary mapping name to alias or as a list of (name, alias) tuples. DICT or LIST of TUPLES links=args.get('links'), # logging configuration, as a dictionary with keys: # type: the logging driver name # config: a dictionary of configuration for the logging driver log_config=args.get('log_config'), # LXC Config DICT lxc_conf=args.get('lxc_conf'), # memory limit. accepts float values which represent the memroy limit of created container in bytes or # a string with a units identification char(10000b, 10000K, 128m, 1g). If a string is specified without a # units character, byte are assumed as an FLOAT or STR mem_limit=args.get('mem_limit'), # Tune a container's memory swappiness behavior. accepts number between 0 and 100. INT mem_swappiness=args.get('mem_swappiness'), # Maximum amount of memory + swap a container is allowed to consume. STR or INT memswap_limit=args.get('memswap_limit'), # Specification for mounts to be added to the container. More powerful alternative to binds. # Each item in the list is expected to be a docker.types.Mount object. LIST mounts=args.get('mounts'), # Network mode: STR # bridge: Create a new network stack for the container on the bridge network # none: No network for this container # container:<name|id> Reuse another container's netowrk stack. # host: Use the host network stack. network_mode=args.get('network_mode'), # whether to disable OOM killer BOOL oom_kill_disable=True if args.get('oom_kill_disable') is None else args.get('oom_kill_disable'), # An integer value containing the score given to the container in order to turn OOM killer preference INT oom_score_adj=args.get('oom_score_adj'), # If set to 'host', use the host PID namespace inside the container. STR pid_mode='host' if args.get('pid_mode') is None else args.get('pid_mode'), # Tune a container's pids limit. Set -1 for unlimited. INT pid_limit=-1 if args.get('pid_limit') is None else args.get('pid_limit'), # binging port for host and container port_bindings=args.get('port_bindings'), # give extended privileges to this container BOOL privileged=False if args.get('privileged') is None else args.get('privileged'), # publish all ports to the hosts BOOL publish_all_ports=False if args.get('publish_all_ports') is None else args.get('publish_all_ports'), # mount the container's root filesystem as read only BOOL read_only=False if args.get('read_only') is None else args.get('read_only'), # restart policy DICT # Name one of 'on-failure' or 'always' # MaximumRetryCount: Number of time to restart to container on failure restart_policy=args.get('restart_policy'), # A list of string values to customize labels for MLS system such as SELinux LIST security_opt=args.get('security_opt'), # Size of /dev/shm (eg.1G) str or int shm_size=args.get('shm_size'), # Storage driver options per container as a key-value mapping DICT storage_opt=args.get('storage_opt'), # kernel parameters to set in the container DICT sysctls=args.get('sysctls'), # Temporary filesystems to mount, as a dictonary mapping a path inside the container to options for that path # eg. {'/mnt/vol1': '', '/mnt/vol2': 'size=3G, uid=1000'} tmpfs=args.get('tmpfs'), # ulimits to set inside the container as a list of dicts ulimits=args.get('ulimits'), # sets the user namespace mode for the container when user namespace remapping option is enables. # Supported values are: host STRING usens_mode=args.get('usens_mode'), # List of container names or IDS to get volumes from LIST volumes_from=args.get('volumes_from'), # runtime to use with this container runtime=args.get('runtime')) return host_config def gen_net_conf(self, args): """ Generate netowrking config for creating a container :param args: paramters for creating network :return: dictionary of a networking configuration file """ # Ref: http://docker-py.readthedocs.io/en/stable/api.html#docker.api.container.ContainerApiMixin.create_networking_config network_dict = self.handle.create_networking_config( {args['network_name']: self.gen_ep_conf(args)}) return network_dict def gen_ep_conf(self, args): """ This function is used for crate an endpoint parameters dictionary for create_networking_config :param args: Pass-in Parameters for Endpoint information :return: Endpoint dictionary """ # Ref: http://docker-py.readthedocs.io/en/stable/api.html#docker.api.container.ContainerApiMixin.create_endpoint_config endpoint_dict = self.handle.create_endpoint_config( aliases=args['aliases'], links=args['links'], ipv4_address=args['ipv4_address'], ipv6_address=args['ipv6_address'], link_local_ips=args['link_local_ips']) return endpoint_dict def start_container(self, container_id): """ This func is for start a created container by ID :param container_id: string of container ID or Name Tag :return: dict of status """ return self.handle.start(container_id) def stop_container(self, container_id): """ This method is for stopping a running container by ID or Name :param container_id: String of container ID or name :return: Dict of return status """ return self.handle.stop(container_id) def restart_container(self, container_id): """ This function is for restart a container by container id or name :param container_id: string of container id or name :return: dict of status """ return self.handle.restart(container_id) def remove_container(self, container_id): """ This function is used for remove a stopped container by ID or Name :param container_id: String of container ID or Name :return: DICT of status """ return self.handle.remove_container(container_id) def list_mapping_ports(self, container_id): """ This func will show all of the mapping of host-> container ports. :param container_id: String of Container Name or ID :return: dict of ports mapping table """ return self.handle.port(container_id) def commit_to_image(self, args): """ This function is used for commiting the changed container to a image :param args[container_id]: container id or name :return: dict of status """ return self.handle.commit(container=args.get('container_id'), repository=args.get('repo_name'), tag=args.get('tag_name'), message=args.get('message'), author=args.get('author'), changes=args.get('changes'), conf=args.get('conf')) def pull_container_log(self, args): """ Pull logs of a running container :param args: args[container_id]: container id or name :return: return list of log lines """ return str(self.handle.logs(args['container_id'])).split('\n') def attach_container(self, container_id): # This 'attach' function also allow multiple parameters, this version only implement one # https://docker-py.readthedocs.io/en/stable/containers.html?highlight=exec#docker.models.containers.Container.attach return self.handle.attach(container_id) def exec_container(self, args): # there will be more parameters for choose, deployment later # in this version, only pass the 'cmd' parameter into method, other parameters keeps default value. # https://docker-py.readthedocs.io/en/stable/containers.html?highlight=exec#docker.models.containers.Container.exec_run return self.handle.exec_run(args['cmd']) def container_top(self, args): return self.handle.top(args['container_id']) def container_res_usage(self, args): # Method 'stats' returns a generator. Need to use next(gen) to get data return self.handle.stats(args['container_id']) def container_info(self, args): return self.handle.inspect_container(args['container_id'])
class DockerOperator(BaseOperator): """ Execute a command inside a docker container. A temporary directory is created on the host and mounted into a container to allow storing files that together exceed the default disk size of 10GB in a container. The path to the mounted directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``. If a login to a private registry is required prior to pulling the image, a Docker connection needs to be configured in Airflow and the connection ID be provided with the parameter ``docker_conn_id``. :param image: Docker image from which to create the container. If image tag is omitted, "latest" will be used. :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. :type api_version: str :param command: Command to be run in the container. (templated) :type command: str or list :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. See https://docs.docker.com/engine/reference/run/#cpu-share-constraint :type cpus: float :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock :type docker_url: str :param environment: Environment variables to set in the container. (templated) :type environment: dict :param force_pull: Pull the docker image on every run. Default is False. :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. :type mem_limit: float or str :param host_tmp_dir: Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. :type host_tmp_dir: str :param network_mode: Network mode for the container. :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. :type tmp_dir: str :param user: Default user inside the docker container. :type user: int or str :param volumes: List of volumes to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. :type volumes: list :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) :type working_dir: str :param xcom_all: Push all the stdout or just the last line. The default is False (last line). :type xcom_all: bool :param docker_conn_id: ID of the Airflow connection to use :type docker_conn_id: str :param dns: Docker custom DNS servers :type dns: list[str] :param dns_search: Docker custom DNS search domain :type dns_search: list[str] :param auto_remove: Auto-removal of the container on daemon side when the container's process exits. The default is False. :type auto_remove: bool :param shm_size: Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. :type shm_size: int """ template_fields = ( 'command', 'environment', ) template_ext = ( '.sh', '.bash', ) @apply_defaults def __init__(self, image: str, api_version: str = None, command: Union[str, List[str]] = None, cpus: float = 1.0, docker_url: str = 'unix://var/run/docker.sock', environment: Dict = None, force_pull: bool = False, mem_limit: Union[float, str] = None, host_tmp_dir: str = None, network_mode: str = None, tls_ca_cert: str = None, tls_client_cert: str = None, tls_client_key: str = None, tls_hostname: Union[str, bool] = None, tls_ssl_version: str = None, tmp_dir: str = '/tmp/airflow', user: Union[str, int] = None, volumes: Iterable[str] = None, working_dir: str = None, xcom_all: bool = False, docker_conn_id: str = None, dns: List[str] = None, dns_search: List[str] = None, auto_remove: bool = False, shm_size: int = None, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.api_version = api_version self.auto_remove = auto_remove self.command = command self.cpus = cpus self.dns = dns self.dns_search = dns_search self.docker_url = docker_url self.environment = environment or {} self.force_pull = force_pull self.image = image self.mem_limit = mem_limit self.host_tmp_dir = host_tmp_dir self.network_mode = network_mode self.tls_ca_cert = tls_ca_cert self.tls_client_cert = tls_client_cert self.tls_client_key = tls_client_key self.tls_hostname = tls_hostname self.tls_ssl_version = tls_ssl_version self.tmp_dir = tmp_dir self.user = user self.volumes = volumes or [] self.working_dir = working_dir self.xcom_all = xcom_all self.docker_conn_id = docker_conn_id self.shm_size = shm_size if kwargs.get('xcom_push') is not None: raise AirflowException( "'xcom_push' was deprecated, use 'BaseOperator.do_xcom_push' instead" ) self.cli = None self.container = None def get_hook(self): return DockerHook(docker_conn_id=self.docker_conn_id, base_url=self.docker_url, version=self.api_version, tls=self.__get_tls_config()) def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info('Pulling docker image %s', self.image) for l in self.cli.pull(self.image, stream=True): output = json.loads(l.decode('utf-8').strip()) if 'status' in output: self.log.info("%s", output['status']) with TemporaryDirectory(prefix='airflowtmp', dir=self.host_tmp_dir) as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit), image=self.image, user=self.user, working_dir=self.working_dir) self.cli.start(self.container['Id']) line = '' for line in self.cli.attach(container=self.container['Id'], stdout=True, stderr=True, stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) result = self.cli.wait(self.container['Id']) if result['StatusCode'] != 0: raise AirflowException('docker container failed: ' + repr(result)) # duplicated conditional logic because of expensive operation if self.do_xcom_push: return self.cli.logs(container=self.container['Id']) \ if self.xcom_all else line.encode('utf-8') def get_command(self): if isinstance(self.command, str) and self.command.strip().find('[') == 0: commands = ast.literal_eval(self.command) else: commands = self.command return commands def on_kill(self): if self.cli is not None: self.log.info('Stopping docker container') self.cli.stop(self.container['Id']) def __get_tls_config(self): tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig(ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname) self.docker_url = self.docker_url.replace('tcp://', 'https://') return tls_config