class TestSystem(unittest.TestCase): podman = None # initialized podman configuration for tests service = None # podman service instance topContainerId = "" def setUp(self): super().setUp() self.client = APIClient(base_url="tcp://127.0.0.1:8080", timeout=15) TestSystem.podman.restore_image_from_cache(self.client) TestSystem.topContainerId = common.run_top_container(self.client) def tearDown(self): common.remove_all_containers(self.client) common.remove_all_images(self.client) self.client.close() return super().tearDown() @classmethod def setUpClass(cls): super().setUpClass() TestSystem.podman = Podman() TestSystem.service = TestSystem.podman.open( "system", "service", "tcp:127.0.0.1:8080", "--time=0" ) # give the service some time to be ready... time.sleep(2) returncode = TestSystem.service.poll() if returncode is not None: raise subprocess.CalledProcessError(returncode, "podman system service") @classmethod def tearDownClass(cls): TestSystem.service.terminate() stdout, stderr = TestSystem.service.communicate(timeout=0.5) if stdout: sys.stdout.write("\nImages Service Stdout:\n" + stdout.decode("utf-8")) if stderr: sys.stderr.write("\nImAges Service Stderr:\n" + stderr.decode("utf-8")) TestSystem.podman.tear_down() return super().tearDownClass() def test_Info(self): self.assertIsNotNone(self.client.info()) def test_info_container_details(self): info = self.client.info() self.assertEqual(info["Containers"], 1) self.client.create_container(image=constant.ALPINE) info = self.client.info() self.assertEqual(info["Containers"], 2) def test_version(self): self.assertIsNotNone(self.client.version())
def CreateContainer(): # FIXME check if existing print "Pulling docking image, first run should take long" if GOT_DOCKERPY_API < 2: cli = Client() cli.pull('regit/suri-buildbot') cli.create_container(name='suri-buildbot', image='regit/suri-buildbot', ports=[8010, 22], volumes=['/data/oisf', '/data/buildbot/master/master.cfg']) else: cli = DockerClient() cli.images.pull('regit/suri-buildbot') suri_src_dir = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0] print "Using base src dir: " + suri_src_dir cli.containers.create('regit/suri-buildbot', name='suri-buildbot', ports={'8010/tcp': 8010, '22/tcp': None} , volumes={suri_src_dir: { 'bind': '/data/oisf', 'mode': 'ro'}, os.path.join(suri_src_dir,'qa','docker','buildbot.cfg'): { 'bind': '/data/buildbot/master/master.cfg', 'mode': 'ro'}}, detach = True) sys.exit(0)
def postgres_container(docker: pydocker.APIClient) -> None: ''' Use docker to spin up a postgres container for the duration of the testing session. Kill it as soon as all tests are run. DB actions persist across the entirety of the testing session. ''' warnings.filterwarnings('ignore', category=DeprecationWarning) # pull image from docker image = 'postgres:12.1-alpine' docker.pull(image) # create the new container using the same image used by our database container = docker.create_container( image=image, name=f'test-postgres-{uuid.uuid4()}', detach=True, ) docker.start(container=container['Id']) config = alembic.config.Config('alembic.ini') try: os.environ['DB_SUFFIX'] = '_test' alembic.command.upgrade(config, 'head') yield container alembic.command.downgrade(config, 'base') finally: # remove container docker.kill(container['Id']) docker.remove_container(container['Id'])
def run_command_in_container(container_image, command): client = APIClient() # Create the container container = client.create_container( image=container_image, command=('/bin/sh -c "sleep {time}; exit {exit}"'.format( time=60, exit=42, )), detach=True, user='******', ) # Start the container container_id = container.get('Id') client.start(container=container_id) # Execute commands to verify versions exec_cmd = client.exec_create( container=container_id, cmd=f"/bin/sh -c '{command}'", stdout=True, stderr=True, ) cmd_output = client.exec_start(exec_id=exec_cmd['Id'], stream=False) cmd_output = cmd_output.decode('utf-8', 'replace').strip() # Kill container client.kill(container_id) return cmd_output
def postgres_container(docker: pydocker.APIClient) -> None: """ Use docker to spin up a postgres container for the duration of the testing session. Kill it as soon as all tests are run. DB actions persist across the entirety of the testing session. """ warnings.filterwarnings("ignore", category=DeprecationWarning) # pull image from docker image = "kartoza/postgis:13.0" docker.pull(image) # create the new container using # the same image used by our database container = docker.create_container( image=image, name=f"test-postgres-{uuid.uuid4()}", detach=True, ) docker.start(container=container["Id"]) config = alembic.config.Config("alembic.ini") try: os.environ["DB_SUFFIX"] = "_test" alembic.command.upgrade(config, "head") yield container alembic.command.downgrade(config, "base") finally: # remove container docker.kill(container["Id"]) docker.remove_container(container["Id"])
def testing_minio(docker: pydocker.APIClient) -> None: warnings.filterwarnings("ignore", category=DeprecationWarning) minio_image = "minio/minio:RELEASE.2020-08-18T19-41-00Z" docker.pull(minio_image) minio_container = docker.create_container( detach=True, image=minio_image, name=f"test-minio-{uuid.uuid4()}", environment={ "MINIO_ACCESS_KEY": "admin", "MINIO_SECRET_KEY": "password" }, ports=[9000], command="minio server /data/dir", volumes="data", host_config=docker.create_host_config(port_bindings={9000: 9002}), ) docker.start(container=minio_container["Id"]) os.environ["OBJECT_STORAGE_PORT"] = "9002" try: yield minio_container finally: docker.kill(minio_container["Id"]) docker.remove_container(minio_container["Id"])
def container_peripherelAccess(self, **kwargs): """ - API creates container and also provides peripherel access. - API is equivalent to create container with host configurations added to it. - Response """ host_config = {} # image = kwargs['image'] # network_disabled = kwargs['network_disabled'] # host_config = {'devices': '/sys/class/leds:/:rwm'} # print image,host_config invoke_clientAPI = APIClient(base_url='unix://var/run/docker.sock', version='auto') containerID = invoke_clientAPI.create_container( 'ubuntu', 'true', stdin_open=bool('True'), command=list['/bin/bash'], host_config=invoke_clientAPI.create_host_config( devices=['/dev/sda:rwm'])) # containerID = invoke_clientAPI.create_container(image) return containerID
def postgres_server(docker: libdocker.APIClient) -> None: warnings.filterwarnings("ignore", category=DeprecationWarning) if not USE_LOCAL_DB: # pragma: no cover pull_image(docker, POSTGRES_DOCKER_IMAGE) container = docker.create_container( image=POSTGRES_DOCKER_IMAGE, name="test-postgres-{}".format(uuid.uuid4()), detach=True, ) docker.start(container=container["Id"]) inspection = docker.inspect_container(container["Id"]) host = inspection["NetworkSettings"]["IPAddress"] dsn = f"postgres://*****:*****@{host}/postgres" try: ping_postgres(dsn) environ["DB_CONNECTION"] = dsn yield container finally: docker.kill(container["Id"]) docker.remove_container(container["Id"]) else: # pragma: no cover yield return
def create_tarball(docker_image=None, docker_tag=None): d = APIClient(base_url='unix://var/run/docker.sock') d.pull(docker_image, docker_tag) container = d.create_container(docker_image) resp = d.export(container=container['Id']) tarfd, tarball = mkstemp(prefix='/tmp/', suffix='.tar') f = open(tarball, 'wb') for chunk in resp: f.write(chunk) f.close() d.remove_container(container['Id']) return tarball
class DockerRunner: def __init__(self): self.docker_client = None def run(self, img, name, options): if self.docker_client is None: self.docker_client = APIClient(version='auto') ports = [] host_config = None # Prepare port configuration if options.ports is not None and len(options.ports) > 0: for port_number in options.ports: ports.append(port_number) host_config = self.docker_client.create_host_config( port_bindings={p: p for p in ports}) # Launch docker container container = self.docker_client.create_container( img, name=name, ports=ports, host_config=host_config, ) self.docker_client.start(container=container.get('Id')) self.container = container logger.info('Starting container {}'.format(container)) def logs(self, *args, **kwargs): if self.docker_client is None: self.docker_client = APIClient(version='auto') # seems like we are hitting bug # https://github.com/docker/docker-py/issues/300 log_stream = self.docker_client.logs(self.container.get('Id'), stream=True, follow=True) for line in log_stream: logger.info(line) def cancel(self, name): if self.docker_client is None: self.docker_client = APIClient(version='auto') self.docker_client.kill(self.container.get('Id')) self.docker_client.remove_container(self.container.get('Id'))
def replicateContainer(containerID, orig_url, dest_url): orig_client = APIClient(base_url=orig_url, tlf=False) dest_client = APIClient(base_url=dest_url, tls=False) image = orig_client.get_image(containerID) data=None for chunck in image: if data == None: data = chunck else: data += chunck dest_client.load_image(data) container = dest_client.create_container(containerID, name=containerID)
def redis_server(docker: dockerlib.APIClient): docker.pull(REDIS_DOCKER_IMAGE) container = docker.create_container( image=REDIS_DOCKER_IMAGE, name=f"test-redis-{uuid.uuid4()}", detach=True, host_config=docker.create_host_config( port_bindings={6379: EXPOSED_PORT}), ) docker.start(container=container["Id"]) yield docker.kill(container["Id"]) docker.remove_container(container["Id"])
def run(args: Namespace) -> None: """Run an analysis framework environment""" cli = APIClient(base_url=args.docker_socket) binds = { # type: Dict[str, Dict[str, str]] '/dev/random': { 'bind': '/dev/random' }, '/dev/urandom': { 'bind': '/dev/urandom' } } if args.dynamic_mount: field_name = os.path.realpath( resource_filename('son_analyze.cli', '../../..')) new_entry = {field_name: {'bind': '/son-analyze', 'mode': 'rw'}} binds.update(new_entry) host_config = cli.create_host_config( port_bindings={8888: args.jupiter_port}, binds=binds) container = cli.create_container(image=_IMAGE_TAG + ':latest', labels=['com.sonata.analyze'], ports=[8888], host_config=host_config) container_id = container.get('Id') cli.start(container=container_id) def cleanup(): """Remove the container""" cli.remove_container(container=container_id, force=True) def signal_term_handler(unused1, unused2): # noqa pylint: disable=unused-argument """Catch signal to clean the containers""" print('Interruption detected, stopping environment') cleanup() sys.exit(1) signal.signal(signal.SIGTERM, signal_term_handler) signal.signal(signal.SIGINT, signal_term_handler) print('Browse http://localhost:{} \n' 'Type Ctrl-C to exit'.format(args.jupiter_port)) exit_code = 0 exit_code = cli.wait(container=container_id) cleanup() sys.exit(exit_code)
def start_containers(client: docker.APIClient): configs = tables('docker').data images = ['ubuntu', 'alpine', 'nginx'] ports_delta = 1 for image in images: base_config = { "image": image, "command": "sleep 1d", "detach": True} for conf in configs: if conf.startswith('vol'): if conf == 'vol1' and image != 'alpine': container = client.create_container( host_config=client.create_host_config(binds=configs[conf]), image=image, command=COMMAND, detach=True) else: container = client.create_container( host_config=client.create_host_config(binds=configs[conf]), **base_config) elif conf.startswith('ports'): ports = {} for p in range(configs[conf]): ports.update({9980 + ports_delta: 9980 + ports_delta}) ports.update({str(9981 + ports_delta) + '/udp': 9985 + ports_delta}) ports_delta += 1 container = client.create_container( host_config=client.create_host_config(port_bindings=ports), ports=[*ports], **base_config) elif conf.startswith('labels'): container = client.create_container( labels=configs[conf], **base_config) elif conf == 'privileged': container = client.create_container( host_config=client.create_host_config(privileged=configs[conf]), **base_config) else: entry_config = copy.copy(base_config) entry_config.pop('command') container = client.create_container( entrypoint=configs[conf], **entry_config) client.start(container)
def testing_postgres(docker: pydocker.APIClient) -> None: warnings.filterwarnings("ignore", category=DeprecationWarning) postgres_image = "postgres:12.1-alpine" docker.pull(postgres_image) db_container = docker.create_container( image=postgres_image, name=f"test-postgres-{uuid.uuid4()}", detach=True, ) docker.start(container=db_container["Id"]) config = alembic.config.Config("alembic.ini") os.environ["DB_SUFFIX"] = "_test" try: alembic.command.upgrade(config, "head") yield db_container alembic.command.downgrade(config, "base") finally: docker.kill(db_container["Id"]) docker.remove_container(db_container["Id"])
def __add_node(): docker_client = APIClient(base_url=TwinHub.DOCKER_CLIENT_URI) responses = docker_client.build( dockerfile=TwinHub.DOCKERFILE_PATH, path=TwinHub.TWIN_DOCKER_SOURCES_DIR, encoding=TwinHub.DEFAULT_DOCKERFILE_ENCODING, rm=True, tag=TwinHub.NODE_IMAGE_TAG) for msg in responses: print(msg) # Creating network if not exists. network = docker_client.networks(names=[TwinHub.NETWORK_NAME]) if not network: docker_client.create_network(TwinHub.NETWORK_NAME, driver="bridge") # Creating new container TwinHub.NODE_COUNTER += 1 node_name = 'twin_node_{}'.format(TwinHub.NODE_COUNTER) container = docker_client.create_container(TwinHub.NODE_IMAGE_TAG, name=node_name, ports=[5000], tty=True, stdin_open=True, detach=True, hostname=node_name) docker_client.start(container['Id']) docker_client.connect_container_to_network(container=node_name, net_id=TwinHub.NETWORK_NAME) return "<html><h1>Hello world</h1></html>"
class DockerClient: def __init__(self, url, name): self.url = url self.name = name self.cli = APIClient(base_url=url) self.bwb_instance_id = socket.gethostname() self.bwbMounts={} self.findVolumeMappings() def getClient(self): return self.cli def getName(self): return self.name def getUrl(self): return self.url def images(self): return self.cli.images(all=True) def has_image(self, name, version="latest"): if not name: return False repoTag = name + ':' + version conId=subprocess.check_output(['docker', 'images', '-q', repoTag]) if conId: return True return False def remove_image(self, id, force=False): self.cli.remove_image(id, force=force) def pull_image(self, id): self.cli.pull(id) def containers(self, all=True): return self.cli.containers(all=all) """ volumes is a dict mapping host directory to container directory { "/Users/host/directory": "path/to/container/directory" } commands is a list of bash commands to run on container ["pwd", "touch newfile.txt"] """ def create_container_cli(self, name, volumes=None, commands=None, environment=None, hostVolumes=None, consoleProc=None, exportGraphics=False): #skips DockerPy and creates the command line equivalent volumeMappings='' for container_dir, host_dir in hostVolumes.items(): volumeMappings=volumeMappings+"-v {}:{} ".format(self.to_best_host_directory(host_dir),container_dir) envs='' for env, var in environment.items(): #strip whitespace env.strip() #strip quotes if present if env[0] == env[-1] and env.startswith(("'",'"')): env=env[1:-1] envs=envs+ "-e {}={} ".format(env,var) #create container consoleProc.cidFile='/tmp/'+ str(datetime.datetime.now().date()) + '_' + str(datetime.datetime.now().time()).replace(':', '.') dockerBaseCmd='docker run -i --rm ' if exportGraphics: dockerBaseCmd+='-e DISPLAY=:1 -v /tmp/.X11-unix:/tmp/.X11-unix ' dockerCmd=dockerBaseCmd + ' --init --cidfile={} {} {} {} {}'.format(consoleProc.cidFile,volumeMappings,envs,name,commands) sys.stderr.write('Docker command is\n{}\n'.format(dockerCmd)) consoleProc.state='running' consoleProc.process.start('/bin/bash',['-c',dockerCmd]) def create_container(self, name, volumes=None, commands=None, environment=None, hostVolumes=None): #hostVolues is a dict with keys being the container volumes # TODO should we use Image ID instead of Image Name? host_config = None if not (hostVolumes is None): binds = [] for container_dir, host_dir in hostVolumes.items(): binds.append(self.to_best_host_directory(host_dir) + ":" + container_dir) host_config = self.cli.create_host_config(binds=binds) volumes = list(hostVolumes.keys()) elif type(volumes) is dict: # this is backwards - it is possible to have the same host directory mapped to multiple containers but not the other way # keep this so as not to break early widgets binds = [] for host_dir, container_dir in volumes.items(): binds.append(self.to_best_host_directory(host_dir) + ":" + container_dir) host_config = self.cli.create_host_config(binds=binds) volumes = list(volumes.values()) if type(commands) is list: commands = "bash -c \"" + ' && '.join(commands) + "\"" return self.cli.create_container(image=name, volumes=volumes, command=commands, environment=environment, stdin_open=True, host_config=host_config) def start_container(self, id): return self.cli.start(id) def container_running(self, id): for container in self.containers(all=False): if container['Id'] == id: return True return False def remove_container(self, id, force=False): self.cli.remove_container(id, force=force) def stop_container(self, id): self.cli.stop(id) def pause_container(self, id): self.cli.pause(id) def unpause_container(self, id): self.cli.unpause(id) def version(self): return self.cli.version() def info(self): return self.cli.info() def volumes(self): return self.cli.volumes()['Volumes'] def remove_volume(self, name): self.cli.remove_volume(name) def findVolumeMappings(self): for c in self.cli.containers(): container_id = c['Id'] if len(container_id) < 12: continue if container_id[:12] == self.bwb_instance_id: for m in c['Mounts']: if not ('/var/run' in m['Source']): self.bwbMounts[m['Source']]=m['Destination'] def to_best_host_directory(self, path, returnNone=False): if self.bwbMounts == {}: return path bestPath = None for source, dest in self.bwbMounts.items(): absPath=self.to_host_directory(path, source, dest) if absPath is not None: if bestPath is None: bestPath=absPath elif len(absPath) < len(bestPath): bestPath=absPath if bestPath is None: if returnNone: return None return path return bestPath def to_host_directory(self, path, source,dest): cleanDestination = os.path.normpath(dest) cleanPath= os.path.normpath(path) cleanSource=os.path.normpath(source) #check if it is already relative to host path if cleanSource in cleanPath: return path # if the path is not mapping from host, will return path if cleanDestination not in cleanPath: return None abspath = os.path.normpath(str.join(os.sep,(cleanSource, path[path.find(cleanDestination) + len(cleanDestination):]))) return abspath
def isolate(pset, filename): notebook = join(HOST_PATH, filename) ref_notebook = join(HOST_PATH, 'notebooks', pset + '.ipynb') result = join('/tmp', basename(filename) + '.txt') tests_module = join(HOST_PATH, 'nlabot', pset + '_tests.py') data = join(HOST_PATH, 'notebooks', 'data') open(result, 'w').close() if exists(ref_notebook): return 5, 'finished grading submission #%d due to wrong pset number.' cli = APIClient() container = cli.create_container( image='nlabot_cell', command=['imprison', '-o', 'result.txt', pset, 'notebook.ipynb'], volumes=['/nlabot/notebook.ipynb'], network_disabled=True, host_config=cli.create_host_config( binds={ notebook: { 'bind': '/nlabot/notebook.ipynb', 'mode': 'rw', }, result: { 'bind': '/nlabot/result.txt', 'mode': 'rw', }, ref_notebook: { 'bind': '/nlabot/data/reference.ipynb', 'mode': 'ro', }, tests_module: { 'bind': '/nlabot/tests.py', 'mode': 'rw', }, data: { 'bind': '/nlabot/data', 'mode': 'ro', }, # TODO: put this settings into settings.py }, cpu_period=100000, cpu_quota=100000, mem_limit='5g')) logging.info('container id is %s', container['Id']) try: cli.start(container) except Exception as e: logging.error('failed to start container.', exc_info=True) # TODO: send alert return 1, 'finished grading submission #%d due to container failure.' try: retcode = cli.wait(container, 1200) # 20 minutes to grade except ConnectionError as e: # FIXME retcode = -1 # return 2, 'finished grading submission #%d due to timeout.' logging.info('retcode is %d', retcode) logging.info('%s', cli.logs(container).decode('utf8')) if retcode == -1: return 2, 'finished grading submission #%d due to timeout.' # if retcode != 0: elif retcode != 0: return 3, 'finished grading submission #%d due to some internal ' \ 'problem.' with open(result) as f: content = f.read() if content == '': logging.warn('file is empty.') return 4, 'finished grading submission #%d due to a problem in ' \ 'the notebook.' try: return 0, loads(content) except JSONDecodeError as e: logging.error('could not decode json', exc_info=True) return 3, 'finished grading submission #%d due to some internal ' \ 'problem.'
class DockerClient: def __init__(self, url, name): self.url = url self.name = name self.cli = APIClient(base_url=url) def getClient(self): return self.cli def getName(self): return self.name def getUrl(self): return self.url def images(self): return self.cli.images(all=True) def has_image(self, name, version): repo_tag = name + ':' + version for image in self.cli.images(): if not image['RepoTags']: continue #DK fix NoneType is not iterable elif repo_tag in image['RepoTags']: return True return False def remove_image(self, id, force=False): self.cli.remove_image(id, force=force) def containers(self, all=True): return self.cli.containers(all=all) """ volumes is a dict mapping host directory to container directory { "/Users/host/directory": "path/to/container/directory" } commands is a list of bash commands to run on container ["pwd", "touch newfile.txt"] """ def create_container(self, name, volumes=None, commands=None): # TODO should we use Image ID instead of Image Name? host_config = None if type(volumes) is dict: binds = [] for host_dir, container_dir in volumes.items(): binds.append(self.to_host_directory(host_dir) + ":" + container_dir) host_config = self.cli.create_host_config(binds=binds) volumes = list(volumes.values()) if type(commands) is list: commands = "bash -c \"" + ' && '.join(commands) + "\"" return self.cli.create_container(image=name, volumes=volumes, command=commands, stdin_open=True, host_config=host_config) def start_container(self, id): return self.cli.start(id) def container_running(self, id): for container in self.containers(all=False): if container['Id'] == id: return True return False def remove_container(self, id, force=False): self.cli.remove_container(id, force=force) def stop_container(self, id): self.cli.stop(id) def pause_container(self, id): self.cli.pause(id) def unpause_container(self, id): self.cli.unpause(id) def version(self): return self.cli.version() def info(self): return self.cli.info() def volumes(self): return self.cli.volumes()['Volumes'] def remove_volume(self, name): self.cli.remove_volume(name) ''' Convert path of container back to host path ''' def to_host_directory(self, path): source = '' destination = '' # locate BwB container for c in self.cli.containers(): if c['Image'] == 'biodepot/bwb': # found BwB container, locate source and destination for m in c['Mounts']: if 'docker.sock' in m['Source']: continue source = m['Source'] destination = m['Destination'] if source is '' or destination is '': return path destination = os.path.join(destination, '') # if the path is not mapping from host, nothing will be done if destination not in path : return path abspath = os.path.join(source, path[path.find(destination)+len(destination) : ]) return abspath
class DockerOperator(BaseOperator): """ Execute a command inside a docker container. A temporary directory is created on the host and mounted into a container to allow storing files that together exceed the default disk size of 10GB in a container. The path to the mounted directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``. If a login to a private registry is required prior to pulling the image, a Docker connection needs to be configured in Airflow and the connection ID be provided with the parameter ``docker_conn_id``. :param image: Docker image from which to create the container. If image tag is omitted, "latest" will be used. :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. :type api_version: str :param auto_remove: Auto-removal of the container on daemon side when the container's process exits. The default is False. :type auto_remove: bool :param command: Command to be run in the container. (templated) :type command: str or list :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. See https://docs.docker.com/engine/reference/run/#cpu-share-constraint :type cpus: float :param dns: Docker custom DNS servers :type dns: list[str] :param dns_search: Docker custom DNS search domain :type dns_search: list[str] :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock :type docker_url: str :param environment: Environment variables to set in the container. (templated) :type environment: dict :param force_pull: Pull the docker image on every run. Default is False. :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. :type mem_limit: float or str :param network_mode: Network mode for the container. :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. :type tmp_dir: str :param user: Default user inside the docker container. :type user: int or str :param volumes: List of volumes to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. :type volumes: list :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) :type working_dir: str :param xcom_all: Push all the stdout or just the last line. The default is False (last line). :type xcom_all: bool :param docker_conn_id: ID of the Airflow connection to use :type docker_conn_id: str :param shm_size: Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. :type shm_size: int """ template_fields = ('command', 'environment',) template_ext = ('.sh', '.bash',) @apply_defaults def __init__( self, image, api_version=None, command=None, cpus=1.0, docker_url='unix://var/run/docker.sock', environment=None, force_pull=False, mem_limit=None, network_mode=None, tls_ca_cert=None, tls_client_cert=None, tls_client_key=None, tls_hostname=None, tls_ssl_version=None, tmp_dir='/tmp/airflow', user=None, volumes=None, working_dir=None, xcom_all=False, docker_conn_id=None, dns=None, dns_search=None, auto_remove=False, shm_size=None, *args, **kwargs): super().__init__(*args, **kwargs) self.api_version = api_version self.auto_remove = auto_remove self.command = command self.cpus = cpus self.dns = dns self.dns_search = dns_search self.docker_url = docker_url self.environment = environment or {} self.force_pull = force_pull self.image = image self.mem_limit = mem_limit self.network_mode = network_mode self.tls_ca_cert = tls_ca_cert self.tls_client_cert = tls_client_cert self.tls_client_key = tls_client_key self.tls_hostname = tls_hostname self.tls_ssl_version = tls_ssl_version self.tmp_dir = tmp_dir self.user = user self.volumes = volumes or [] self.working_dir = working_dir self.xcom_all = xcom_all self.docker_conn_id = docker_conn_id self.shm_size = shm_size if kwargs.get('xcom_push') is not None: raise AirflowException("'xcom_push' was deprecated, use 'BaseOperator.do_xcom_push' instead") self.cli = None self.container = None def get_hook(self): return DockerHook( docker_conn_id=self.docker_conn_id, base_url=self.docker_url, version=self.api_version, tls=self.__get_tls_config() ) def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient( base_url=self.docker_url, version=self.api_version, tls=tls_config ) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info('Pulling docker image %s', self.image) for l in self.cli.pull(self.image, stream=True): output = json.loads(l.decode('utf-8').strip()) if 'status' in output: self.log.info("%s", output['status']) with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit), image=self.image, user=self.user, working_dir=self.working_dir ) self.cli.start(self.container['Id']) line = '' for line in self.cli.logs(container=self.container['Id'], stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) result = self.cli.wait(self.container['Id']) if result['StatusCode'] != 0: raise AirflowException('docker container failed: ' + repr(result)) # duplicated conditional logic because of expensive operation if self.do_xcom_push: return self.cli.logs(container=self.container['Id']) \ if self.xcom_all else line.encode('utf-8') def get_command(self): if self.command is not None and self.command.strip().find('[') == 0: commands = ast.literal_eval(self.command) else: commands = self.command return commands def on_kill(self): if self.cli is not None: self.log.info('Stopping docker container') self.cli.stop(self.container['Id']) def __get_tls_config(self): tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig( ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname ) self.docker_url = self.docker_url.replace('tcp://', 'https://') return tls_config
class DockerOperator(BaseOperator): """ Execute a command inside a docker container. A temporary directory is created on the host and mounted into a container to allow storing files that together exceed the default disk size of 10GB in a container. The path to the mounted directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``. If a login to a private registry is required prior to pulling the image, a Docker connection needs to be configured in Airflow and the connection ID be provided with the parameter ``docker_conn_id``. :param image: Docker image from which to create the container. :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. :type api_version: str :param command: Command to be run in the container. (templated) :type command: str or list :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. See https://docs.docker.com/engine/reference/run/#cpu-share-constraint :type cpus: float :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock :type docker_url: str :param environment: Environment variables to set in the container. (templated) :type environment: dict :param force_pull: Pull the docker image on every run. Default is false. :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. :type mem_limit: float or str :param network_mode: Network mode for the container. :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. :type tmp_dir: str :param user: Default user inside the docker container. :type user: int or str :param volumes: List of volumes to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) :type working_dir: str :param xcom_push: Does the stdout will be pushed to the next step using XCom. The default is False. :type xcom_push: bool :param xcom_all: Push all the stdout or just the last line. The default is False (last line). :type xcom_all: bool :param docker_conn_id: ID of the Airflow connection to use :type docker_conn_id: str """ template_fields = ( 'command', 'environment', ) template_ext = ( '.sh', '.bash', ) @apply_defaults def __init__(self, image, api_version=None, command=None, cpus=1.0, docker_url='unix://var/run/docker.sock', environment=None, force_pull=False, mem_limit=None, network_mode=None, tls_ca_cert=None, tls_client_cert=None, tls_client_key=None, tls_hostname=None, tls_ssl_version=None, tmp_dir='/tmp/airflow', user=None, volumes=None, working_dir=None, xcom_push=False, xcom_all=False, docker_conn_id=None, *args, **kwargs): super(DockerOperator, self).__init__(*args, **kwargs) self.api_version = api_version self.command = command self.cpus = cpus self.docker_url = docker_url self.environment = environment or {} self.force_pull = force_pull self.image = image self.mem_limit = mem_limit self.network_mode = network_mode self.tls_ca_cert = tls_ca_cert self.tls_client_cert = tls_client_cert self.tls_client_key = tls_client_key self.tls_hostname = tls_hostname self.tls_ssl_version = tls_ssl_version self.tmp_dir = tmp_dir self.user = user self.volumes = volumes or [] self.working_dir = working_dir self.xcom_push_flag = xcom_push self.xcom_all = xcom_all self.docker_conn_id = docker_conn_id self.shm_size = kwargs.get('shm_size') self.cli = None self.container = None def get_hook(self): return DockerHook(docker_conn_id=self.docker_conn_id, base_url=self.docker_url, version=self.api_version, tls=self.__get_tls_config()) def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if ':' not in self.image: image = self.image + ':latest' else: image = self.image if self.force_pull or len(self.cli.images(name=image)) == 0: self.log.info('Pulling docker image %s', image) for l in self.cli.pull(image, stream=True): output = json.loads(l.decode('utf-8')) self.log.info("%s", output['status']) cpu_shares = int(round(self.cpus * 1024)) with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), cpu_shares=cpu_shares, environment=self.environment, host_config=self.cli.create_host_config( binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size), image=image, mem_limit=self.mem_limit, user=self.user, working_dir=self.working_dir) self.cli.start(self.container['Id']) line = '' for line in self.cli.logs(container=self.container['Id'], stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) exit_code = self.cli.wait(self.container['Id']) if exit_code != 0: raise AirflowException('docker container failed') if self.xcom_push_flag: return self.cli.logs(container=self.container['Id']) \ if self.xcom_all else str(line) def get_command(self): if self.command is not None and self.command.strip().find('[') == 0: commands = ast.literal_eval(self.command) else: commands = self.command return commands def on_kill(self): if self.cli is not None: self.log.info('Stopping docker container') self.cli.stop(self.container['Id']) def __get_tls_config(self): tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig(ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname) self.docker_url = self.docker_url.replace('tcp://', 'https://') return tls_config
from docker import APIClient as DockerClient from docker.errors import APIError # Jenkins doesn't have our tools which results in import errors # pylint: disable=import-error from openshift_tools.monitoring.metric_sender import MetricSender ZBX_KEY = "docker.container.dns.resolution" ZBX_KEY_TIMEOUT = "docker.container.dns.resolution.timeout" if __name__ == "__main__": cli = DockerClient(version='auto', base_url='unix://var/run/docker.sock', timeout=120) container_id = os.environ['container_uuid'] image = cli.inspect_container(container_id)['Image'] container = cli.create_container(image, command='getent hosts redhat.com') cli.start(container=container.get('Id')) exit_code = cli.wait(container) for i in range(0, 3): try: cli.remove_container(container.get('Id')) break except APIError: print "Error while cleaning up container." time.sleep(5) container = cli.create_container(image, command='timeout 0.2s getent hosts redhat.com') cli.start(container=container.get('Id'))
class Docker: def __init__(self, base_url=None): self.handle = None self.connect_docker_daemon(base_url) def connect_docker_daemon(self, base_url=None): """ This method is used for connect local/remote docker host daemon :return: Return the docker operation handle for local host """ if base_url is None: base_url = 'unix:///var/run/docker.sock' try: self.handle = APIClient(base_url=base_url) except errors.APIError as e: print e.message global logging logging.error(str(e.message)) def login_registry(self, login_user, login_pass, registry_srv=None): """ This method is used for log into docker registry server. :param login_user: str: user name for login registry :param login_pass: str: password for login registry :param registry_srv: str: uri for registry server address :return: result of login status for that registry """ login_status = self.handle.login(username=login_user, password=login_pass, registry=registry_srv) return login_status def get_docker_info(self): """ Get docker information :return: DICT string """ return self.handle.info() def get_image_list(self): """ Get all of the existing images list :return: DICT string for all of the images """ return self.handle.images() def public_image_search(self, keyword): """ get a result for searching the image from public/logged in registry :return: DICT string of search result """ return self.handle.search(keyword) # TODO: get docker events implementation # def get_docker_events(self, since, until, filters, decode): # """ # get running docker service events # :return: DICT for service envents # """ # return self.handle.event() def get_disk_utils(self): """ get disk utilization for docker images :return: DICT of disk utilization """ return self.handle.df() def pull_image(self, name, tag=None, repo=None): """ pull image from repository by repo/name:tag :param repo: String of repository(registry) name :param name: String of image name :param tag: String of tag name :return: DICT response """ if tag is None: tag = "latest" try: if repo is None: return self.handle.pull(name, tag=tag) else: return self.handle.pull(repo + "/" + name, tag) except errors.NotFound as e: return {'message': 'Image Not Found', 'status': 'failed'} def inspect_image(self, image_id): """ inspect an image :param image_id: String of docker image ID :return: DICT of inspecting results """ # TODO: will support image_id and "repo/name:tag" later return self.handle.inspect_image(image_id) def remove_image(self, image_id, force_remove=False): """ remove the specified image by image id :param image_id: String of Docker Image :param force_remove: True or False :return: DICT of result """ return self.handle.remove_image(image_id, force=force_remove) def tag_image(self, image, repository, force=False, tag=None): """ tag an image to new repository :param image: string of image id which to be tagged :param repository: string of new repository which image will be tag into :param tag: String new tag :param force: True or false :return: Boolean result of tag """ return self.handle.tag(image, repository, tag, force=force) def push_image(self, repository, tag=None, stream=False, auth_config=None): """ push image to new repository :param repository: String for image to be push. Image ID or Repo/Name:tag :param tag: Tag for pushed image, if you don't need to change the tag, keep None. :param stream: by default is false stream the outpu as blocking generator :param auth_config: overrride the credential for login() :return: Result String or Generator(when use stream=True) """ if auth_config is None: return self.handle.push(repository, tag, stream=stream) else: return self.handle.pull(repository, tag, stream=stream, auth_config=auth_config) def save_image(self, image_name, save_path, tarball_name=None): """ save specified image to a tarball :param image_name: string of Image ID or "repository/image:tag" :param save_path: string of path :param tarball_name: string of tarball name. If not specified it will use the image_name_datetime.tar :return: return status """ if tarball_name is None: tarball_name = image_name + "_" + str( time.time()).split('.')[0] + ".tar" try: img = self.handle.get_image(image_name) with open(save_path + '/' + tarball_name, 'w') as f: for chunk in img: f.write(chunk) return { "message": "Image {} saved at {}".format(image_name, save_path + "/" + tarball_name), "status": "succeed" } except Exception as e: return {"message": e.message, "status": "failed"} def load_image(self, tarball_name, repository, tag=None, changes=None): """ load image from local path or url load tarball image :param tarball_name: string of full path of tarball image :param repository: string of full name of image name to be assign 'repo/name' :param tag: string of imported image. If set None, the tag will followed as the original image tag :return: return """ if repository is None or str(repository).strip() == "": repository = None if tag is None or str(tag).strip() == "": tag = None if changes is None or str(changes).strip() == "": changes = None return self.handle.import_image(tarball_name, repository=repository, tag=tag, changes=changes) def get_containers(self, all=False): """ get list of containers. :param all: by default is 'False'. It only shows the running containers. otherwise it shows all containers include the stop/exit ones. :return: return the dict of containers. """ # TODO: 'filter' function will be added later. return self.handle.containers(all=all) def new_container(self, args): """ create container according to the passed in parameters :param args: parameters dict :return: return new container id """ result = self.handle.create_container( image=args.get('image'), command=args.get('command'), hostname=args.get('hostname'), user=args.get('user'), detach=False if args.get('detach') is None else args.get('detach'), stdin_open=False if args.get('stdin_open') is None else args.get('stdin_open'), tty=False if args.get('tty') is None else args.get('tty'), ports=args.get('ports'), environment=args.get('environment'), volumes=args.get('volumes'), network_disabled=False if args.get('network_disabled') is None else args.get('network_disabled'), name=args.get('name'), entrypoint=args.get('entrypoint'), working_dir=args.get('working_dir'), domainname=args.get('domainname'), host_config=args.get('host_config'), mac_address=args.get('mac_address'), labels=args.get('labels'), stop_signal=args.get('stop_signal'), networking_config=args.get('networking_config'), healthcheck=args.get('healthcheck'), stop_timeout=args.get('stop_timeout'), runtime=args.get('runtime')) return result def gen_host_conf(self, args): host_config = self.handle.create_host_config( # auto remove the container after it exited auto_remove=False if args.get('auto_remove') is None else args.get('auto_remove'), # volume binds BOOL binds=args.get('binds'), # BlockIO weight relative device weight in form of : [{"Path":"device_path, "Weight": weight}] DICT blkio_weight_device=args.get('blkio_weight_device'), # Block IO weight, relative weight. accepts a weight value between 10 and 1000 INT blkio_weight=args.get('blkio_weight'), # Add kernel capabilities. eg. ['SYS_ADMIN', "MKNOD"] str or List cap_add=args.get('cap_add'), # Drop kernel capabilities str or LIST cap_drop=args.get('cap_drop'), # The length of a CPU period in microseconds INT cpu_period=args.get('cpu_period'), # Microseconds of CPU time that the container can get in a CPU period INT cpu_quota=args.get('cpu_quota'), # CPU shares (relative weight) INT cpu_shares=args.get('cpu_shares'), # CPUs in which to allow execution (0-3, 0, 1) str cpuset_cpus=args.get('cpuset_cpus'), # Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effecive on NUMA systems cpuset_mems=args.get('cpuset_mems'), # A list of cgroup rules to apply to the container LIST device_cgroup_rules=args.get('device_cgroup_rules'), # Limit read rate (bytes per sec) from a device in the form of : [{"Path":"device_path", "Rate":rate}] device_read_bps=args.get('device_read_bps'), # Limite read rate(IOPS) from a device device_read_iops=args.get('device_read_iops'), # Limit write rate (byte per sec) from a device. device_write_bps=args.get('device_write_bps'), # Limit write rate (IOPS) from a device device_write_iops=args.get('device_write_iops'), # Expose host devices to the container, as a list of string in the form <path_on_host>:<path_in_container>:<cgroup_permissions> LIST # Eg /dev/sda:/dev/xvda:rwm allows container to hve read-write access to the host's /dev/sda via a node name /dev/xvda inside the container devices=args.get('devices'), # Set custom DNS servers LIST dns=args.get('dns'), # Additional options to be added to the container's resolve.conf file LIST dns_opt=args.get('dns_opt'), # DNS search domains LIST dns_search=args.get('dns_search'), # Addtional hostname to resolve inside the container as a mapping of hostname to IP address DICT extra_hosts=args.get('extra_hosts'), # List of additional group names and/or IDs that the container process iwll run as LIST group_add=args.get('group_add'), # Run an init inside the container that forwards signals and reaps process BOOL init=False if args.get('init') is None else args.get('init'), # Path to the docker-init binary init_path=args.get('init_path'), # Set the IPC mode for the container STRING ipc_mode=args.get('ipc_mode'), # Isolation technology to use. Default is None isolation=args.get('isolation'), # Either a dictionary mapping name to alias or as a list of (name, alias) tuples. DICT or LIST of TUPLES links=args.get('links'), # logging configuration, as a dictionary with keys: # type: the logging driver name # config: a dictionary of configuration for the logging driver log_config=args.get('log_config'), # LXC Config DICT lxc_conf=args.get('lxc_conf'), # memory limit. accepts float values which represent the memroy limit of created container in bytes or # a string with a units identification char(10000b, 10000K, 128m, 1g). If a string is specified without a # units character, byte are assumed as an FLOAT or STR mem_limit=args.get('mem_limit'), # Tune a container's memory swappiness behavior. accepts number between 0 and 100. INT mem_swappiness=args.get('mem_swappiness'), # Maximum amount of memory + swap a container is allowed to consume. STR or INT memswap_limit=args.get('memswap_limit'), # Specification for mounts to be added to the container. More powerful alternative to binds. # Each item in the list is expected to be a docker.types.Mount object. LIST mounts=args.get('mounts'), # Network mode: STR # bridge: Create a new network stack for the container on the bridge network # none: No network for this container # container:<name|id> Reuse another container's netowrk stack. # host: Use the host network stack. network_mode=args.get('network_mode'), # whether to disable OOM killer BOOL oom_kill_disable=True if args.get('oom_kill_disable') is None else args.get('oom_kill_disable'), # An integer value containing the score given to the container in order to turn OOM killer preference INT oom_score_adj=args.get('oom_score_adj'), # If set to 'host', use the host PID namespace inside the container. STR pid_mode='host' if args.get('pid_mode') is None else args.get('pid_mode'), # Tune a container's pids limit. Set -1 for unlimited. INT pid_limit=-1 if args.get('pid_limit') is None else args.get('pid_limit'), # binging port for host and container port_bindings=args.get('port_bindings'), # give extended privileges to this container BOOL privileged=False if args.get('privileged') is None else args.get('privileged'), # publish all ports to the hosts BOOL publish_all_ports=False if args.get('publish_all_ports') is None else args.get('publish_all_ports'), # mount the container's root filesystem as read only BOOL read_only=False if args.get('read_only') is None else args.get('read_only'), # restart policy DICT # Name one of 'on-failure' or 'always' # MaximumRetryCount: Number of time to restart to container on failure restart_policy=args.get('restart_policy'), # A list of string values to customize labels for MLS system such as SELinux LIST security_opt=args.get('security_opt'), # Size of /dev/shm (eg.1G) str or int shm_size=args.get('shm_size'), # Storage driver options per container as a key-value mapping DICT storage_opt=args.get('storage_opt'), # kernel parameters to set in the container DICT sysctls=args.get('sysctls'), # Temporary filesystems to mount, as a dictonary mapping a path inside the container to options for that path # eg. {'/mnt/vol1': '', '/mnt/vol2': 'size=3G, uid=1000'} tmpfs=args.get('tmpfs'), # ulimits to set inside the container as a list of dicts ulimits=args.get('ulimits'), # sets the user namespace mode for the container when user namespace remapping option is enables. # Supported values are: host STRING usens_mode=args.get('usens_mode'), # List of container names or IDS to get volumes from LIST volumes_from=args.get('volumes_from'), # runtime to use with this container runtime=args.get('runtime')) return host_config def gen_net_conf(self, args): """ Generate netowrking config for creating a container :param args: paramters for creating network :return: dictionary of a networking configuration file """ # Ref: http://docker-py.readthedocs.io/en/stable/api.html#docker.api.container.ContainerApiMixin.create_networking_config network_dict = self.handle.create_networking_config( {args['network_name']: self.gen_ep_conf(args)}) return network_dict def gen_ep_conf(self, args): """ This function is used for crate an endpoint parameters dictionary for create_networking_config :param args: Pass-in Parameters for Endpoint information :return: Endpoint dictionary """ # Ref: http://docker-py.readthedocs.io/en/stable/api.html#docker.api.container.ContainerApiMixin.create_endpoint_config endpoint_dict = self.handle.create_endpoint_config( aliases=args['aliases'], links=args['links'], ipv4_address=args['ipv4_address'], ipv6_address=args['ipv6_address'], link_local_ips=args['link_local_ips']) return endpoint_dict def start_container(self, container_id): """ This func is for start a created container by ID :param container_id: string of container ID or Name Tag :return: dict of status """ return self.handle.start(container_id) def stop_container(self, container_id): """ This method is for stopping a running container by ID or Name :param container_id: String of container ID or name :return: Dict of return status """ return self.handle.stop(container_id) def restart_container(self, container_id): """ This function is for restart a container by container id or name :param container_id: string of container id or name :return: dict of status """ return self.handle.restart(container_id) def remove_container(self, container_id): """ This function is used for remove a stopped container by ID or Name :param container_id: String of container ID or Name :return: DICT of status """ return self.handle.remove_container(container_id) def list_mapping_ports(self, container_id): """ This func will show all of the mapping of host-> container ports. :param container_id: String of Container Name or ID :return: dict of ports mapping table """ return self.handle.port(container_id) def commit_to_image(self, args): """ This function is used for commiting the changed container to a image :param args[container_id]: container id or name :return: dict of status """ return self.handle.commit(container=args.get('container_id'), repository=args.get('repo_name'), tag=args.get('tag_name'), message=args.get('message'), author=args.get('author'), changes=args.get('changes'), conf=args.get('conf')) def pull_container_log(self, args): """ Pull logs of a running container :param args: args[container_id]: container id or name :return: return list of log lines """ return str(self.handle.logs(args['container_id'])).split('\n') def attach_container(self, container_id): # This 'attach' function also allow multiple parameters, this version only implement one # https://docker-py.readthedocs.io/en/stable/containers.html?highlight=exec#docker.models.containers.Container.attach return self.handle.attach(container_id) def exec_container(self, args): # there will be more parameters for choose, deployment later # in this version, only pass the 'cmd' parameter into method, other parameters keeps default value. # https://docker-py.readthedocs.io/en/stable/containers.html?highlight=exec#docker.models.containers.Container.exec_run return self.handle.exec_run(args['cmd']) def container_top(self, args): return self.handle.top(args['container_id']) def container_res_usage(self, args): # Method 'stats' returns a generator. Need to use next(gen) to get data return self.handle.stats(args['container_id']) def container_info(self, args): return self.handle.inspect_container(args['container_id'])
from docker.errors import APIError # Jenkins doesn't have our tools which results in import errors # pylint: disable=import-error from openshift_tools.monitoring.metric_sender import MetricSender ZBX_KEY = "docker.container.dns.resolution" if __name__ == "__main__": cli = DockerClient(version='auto', base_url='unix://var/run/docker.sock', timeout=120) container_id = os.environ['container_uuid'] container = cli.create_container( image=cli.inspect_container(container_id)['Image'], command='getent hosts redhat.com') cli.start(container=container.get('Id')) exit_code = cli.wait(container) for i in range(0, 3): try: cli.remove_container(container.get('Id')) break except APIError: print "Error while cleaning up container." time.sleep(5) ms = MetricSender() ms.add_metric({ZBX_KEY: exit_code})
class DockerProxy: """ A wrapper over docker-py and some utility methods and classes. """ LOG_TAG = "Docker " shell_commands = ["source"] class ImageBuildException(Exception): def __init__(self, message=None): super( "Something went wrong while building docker container image.\n{0}" .format(message)) def __init__(self): self.client = Client(base_url=Constants.DOCKER_BASE_URL) self.build_count = 0 logging.basicConfig(level=logging.DEBUG) @staticmethod def get_container_volume_from_working_dir(working_directory): import os return os.path.join("/home/ubuntu/", os.path.basename(working_directory)) def create_container( self, image_str, working_directory=None, name=None, port_bindings={ Constants.DEFAULT_PUBLIC_WEBSERVER_PORT: ('127.0.0.1', 8080), Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT: ('127.0.0.1', 8081) }): """Creates a new container with elevated privileges. Returns the container ID. Maps port 80 of container to 8080 of locahost by default""" docker_image = DockerImage.from_string(image_str) volume_dir = DockerProxy.get_container_volume_from_working_dir( working_directory) if name is None: import uuid random_str = str(uuid.uuid4()) name = constants.Constants.MolnsDockerContainerNamePrefix + random_str[: 8] image = docker_image.image_id if docker_image.image_id is not Constants.DockerNonExistentTag \ else docker_image.image_tag logging.info("Using image {0}".format(image)) import os if DockerProxy._verify_directory(working_directory) is False: if working_directory is not None: raise InvalidVolumeName( "\n\nMOLNs uses certain reserved names for its configuration files in the " "controller environment, and unfortunately the provided name for working " "directory of the controller cannot be one of these. Please configure this " "controller again with a different volume name and retry. " "Here is the list of forbidden names: \n{0}".format( Constants.ForbiddenVolumeNames)) logging.warning( DockerProxy.LOG_TAG + "Unable to verify provided directory to use to as volume. Volume will NOT " "be created.") hc = self.client.create_host_config(privileged=True, port_bindings=port_bindings) container = self.client.create_container( image=image, name=name, command="/bin/bash", tty=True, detach=True, ports=[ Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT ], host_config=hc, environment={"PYTHONPATH": "/usr/local/"}) else: container_mount_point = '/home/ubuntu/{0}'.format( os.path.basename(working_directory)) hc = self.client.create_host_config(privileged=True, port_bindings=port_bindings, binds={ working_directory: { 'bind': container_mount_point, 'mode': 'rw' } }) container = self.client.create_container( image=image, name=name, command="/bin/bash", tty=True, detach=True, ports=[ Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT ], volumes=container_mount_point, host_config=hc, working_dir=volume_dir, environment={"PYTHONPATH": "/usr/local/"}) container_id = container.get("Id") return container_id # noinspection PyBroadException @staticmethod def _verify_directory(working_directory): import os if working_directory is None or os.path.basename( working_directory) in Constants.ForbiddenVolumeNames: return False try: if not os.path.exists(working_directory): os.makedirs(working_directory) return True except: return False def stop_containers(self, container_ids): """Stops given containers.""" for container_id in container_ids: self.stop_container(container_id) def stop_container(self, container_id): """Stops the container with given ID.""" self.client.stop(container_id) def container_status(self, container_id): """Checks if container with given ID running.""" status = ProviderBase.STATUS_TERMINATED try: ret_val = str( self.client.inspect_container(container_id).get('State').get( 'Status')) if ret_val.startswith("running"): status = ProviderBase.STATUS_RUNNING else: status = ProviderBase.STATUS_STOPPED except NotFound: pass return status def start_containers(self, container_ids): """Starts each container in given list of container IDs.""" for container_id in container_ids: self.start_container(container_id) def start_container(self, container_id): """ Start the container with given ID.""" logging.info(DockerProxy.LOG_TAG + " Starting container " + container_id) try: self.client.start(container=container_id) except (NotFound, NullResource) as e: print( DockerProxy.LOG_TAG + "Something went wrong while starting container.", e) return False return True def execute_command(self, container_id, command): """Executes given command as a shell command in the given container. Returns None is anything goes wrong.""" run_command = "/bin/bash -c \"" + command + "\"" # print("CONTAINER: {0} COMMAND: {1}".format(container_id, run_command)) if self.start_container(container_id) is False: print(DockerProxy.LOG_TAG + "Could not start container.") return None try: exec_instance = self.client.exec_create(container_id, run_command) response = self.client.exec_start(exec_instance) return [self.client.exec_inspect(exec_instance), response] except (NotFound, APIError) as e: print(DockerProxy.LOG_TAG + " Could not execute command.", e) return None def build_image(self, dockerfile): """ Build image from given Dockerfile object and return ID of the image created. """ import uuid logging.info("Building image...") random_string = str(uuid.uuid4()) image_tag = Constants.DOCKER_IMAGE_PREFIX + "{0}".format( random_string[:]) last_line = "" try: for line in self.client.build(fileobj=dockerfile, rm=True, tag=image_tag): print(DockerProxy._decorate(line)) if "errorDetail" in line: raise DockerProxy.ImageBuildException() last_line = line # Return image ID. It's a hack around the fact that docker-py's build image command doesn't return an image # id. image_id = get_docker_image_id_from_string(str(last_line)) logging.info("Image ID: {0}".format(image_id)) return str(DockerImage(image_id, image_tag)) except (DockerProxy.ImageBuildException, IndexError) as e: raise DockerProxy.ImageBuildException(e) @staticmethod def _decorate(some_line): return some_line[11:-4].rstrip() def image_exists(self, image_str): """Checks if an image with the given ID/tag exists locally.""" docker_image = DockerImage.from_string(image_str) if docker_image.image_id is Constants.DockerNonExistentTag \ and docker_image.image_tag is Constants.DockerNonExistentTag: raise InvalidDockerImageException( "Neither image_id nor image_tag provided.") for image in self.client.images(): some_id = image["Id"] some_tags = image["RepoTags"] or [None] if docker_image.image_id in \ some_id[:(Constants.DOCKER_PY_IMAGE_ID_PREFIX_LENGTH + Constants.DOKCER_IMAGE_ID_LENGTH)]: return True if docker_image.image_tag in some_tags: return True return False def terminate_containers(self, container_ids): """ Terminates containers with given container ids.""" for container_id in container_ids: try: if self.container_status( container_id) == ProviderBase.STATUS_RUNNING: self.stop_container(container_id) self.terminate_container(container_id) except NotFound: pass def terminate_container(self, container_id): self.client.remove_container(container_id) def get_mapped_ports(self, container_id): container_ins = self.client.inspect_container(container_id) mapped_ports = container_ins['HostConfig']['PortBindings'] ret_val = [] if mapped_ports is None: logging.info("No mapped ports for {0}".format(container_id)) return for k, v in mapped_ports.iteritems(): host_port = v[0]['HostPort'] ret_val.append(host_port) return ret_val def get_working_directory(self, container_id): return self.client.inspect_container( container_id)["Config"]["WorkingDir"] def get_home_directory(self, container_id): env_vars = self.client.inspect_container(container_id)["Config"]["Env"] home = [i for i in env_vars if i.startswith("HOME")] return home[0].split("=")[1] def put_archive(self, container_id, tar_file_bytes, target_path_in_container): """ Copies and unpacks a given tarfile in the container at specified location. Location must exist in container.""" if self.start_container(container_id) is False: raise Exception("Could not start container.") # Prepend file path with /home/ubuntu/. TODO Should be refined. if not target_path_in_container.startswith("/home/ubuntu/"): import os target_path_in_container = os.path.join("/home/ubuntu/", target_path_in_container) logging.info( "target path in container: {0}".format(target_path_in_container)) if not self.client.put_archive(container_id, target_path_in_container, tar_file_bytes): logging.error(DockerProxy.LOG_TAG + "Failed to copy.") def get_container_ip_address(self, container_id): """ Returns the IP Address of given container.""" self.start_container(container_id) ins = self.client.inspect_container(container_id) ip_address = str(ins.get("NetworkSettings").get("IPAddress")) while True: ip_address = str(ins.get("NetworkSettings").get("IPAddress")) if ip_address == "": time.sleep(3) if ip_address.startswith("1") is True: break return ip_address
class DockerOperator(BaseOperator): """ Execute a command inside a docker container. A temporary directory is created on the host and mounted into a container to allow storing files that together exceed the default disk size of 10GB in a container. The path to the mounted directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``. If a login to a private registry is required prior to pulling the image, a Docker connection needs to be configured in Airflow and the connection ID be provided with the parameter ``docker_conn_id``. :param image: Docker image from which to create the container. If image tag is omitted, "latest" will be used. :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. :type api_version: str :param auto_remove: Auto-removal of the container on daemon side when the container's process exits. The default is False. :type auto_remove: bool :param command: Command to be run in the container. (templated) :type command: str or list :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. See https://docs.docker.com/engine/reference/run/#cpu-share-constraint :type cpus: float :param dns: Docker custom DNS servers :type dns: list of strings :param dns_search: Docker custom DNS search domain :type dns_search: list of strings :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock :type docker_url: str :param environment: Environment variables to set in the container. (templated) :type environment: dict :param force_pull: Pull the docker image on every run. Default is False. :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. :type mem_limit: float or str :param network_mode: Network mode for the container. :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. :type tmp_dir: str :param user: Default user inside the docker container. :type user: int or str :param volumes: List of volumes to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) :type working_dir: str :param xcom_push: Does the stdout will be pushed to the next step using XCom. The default is False. :type xcom_push: bool :param xcom_all: Push all the stdout or just the last line. The default is False (last line). :type xcom_all: bool :param docker_conn_id: ID of the Airflow connection to use :type docker_conn_id: str :param shm_size: Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. :type shm_size: int """ template_fields = ("command", "environment") template_ext = (".sh", ".bash") @apply_defaults def __init__(self, image, api_version=None, command=None, cpus=1.0, docker_url="unix://var/run/docker.sock", environment=None, force_pull=False, mem_limit=None, network_mode=None, tls_ca_cert=None, tls_client_cert=None, tls_client_key=None, tls_hostname=None, tls_ssl_version=None, tmp_dir="/tmp/airflow", user=None, volumes=None, working_dir=None, xcom_push=False, xcom_all=False, docker_conn_id=None, dns=None, dns_search=None, auto_remove=False, shm_size=None, *args, **kwargs): super(DockerOperator, self).__init__(*args, **kwargs) self.api_version = api_version self.auto_remove = auto_remove self.command = command self.cpus = cpus self.dns = dns self.dns_search = dns_search self.docker_url = docker_url self.environment = environment or {} self.force_pull = force_pull self.image = image self.mem_limit = mem_limit self.network_mode = network_mode self.tls_ca_cert = tls_ca_cert self.tls_client_cert = tls_client_cert self.tls_client_key = tls_client_key self.tls_hostname = tls_hostname self.tls_ssl_version = tls_ssl_version self.tmp_dir = tmp_dir self.user = user self.volumes = volumes or [] self.working_dir = working_dir self.xcom_push_flag = xcom_push self.xcom_all = xcom_all self.docker_conn_id = docker_conn_id self.shm_size = shm_size self.cli = None self.container = None def get_hook(self): return DockerHook( docker_conn_id=self.docker_conn_id, base_url=self.docker_url, version=self.api_version, tls=self.__get_tls_config(), ) def execute(self, context): self.log.info("Starting docker container from image %s", self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if self.force_pull or len(self.cli.images(name=self.image)) == 0: for l in self.cli.pull(self.image, stream=True): try: output = json.loads(l.decode("utf-8").strip()) if "status" in output: self.log.info("%s", output["status"]) except Exception: self.log.info("Failed to parse docker pull status") # with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: # self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir # self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit, ), image=self.image, user=self.user, working_dir=self.working_dir, ) self.cli.start(self.container["Id"]) log_metric("docker id", self.container["Id"]) line = "" for line in self.cli.logs(container=self.container["Id"], stream=True): line = line.strip() if hasattr(line, "decode"): line = line.decode("utf-8") self.log.info(line) result = self.cli.wait(self.container["Id"]) log_metric("docker status code", result["StatusCode"]) if result["StatusCode"] != 0: raise AirflowException("docker container failed: " + repr(result)) if self.xcom_push_flag: return (self.cli.logs(container=self.container["Id"]) if self.xcom_all else str(line)) def get_command(self): if self.command is not None and self.command.strip().find("[") == 0: commands = ast.literal_eval(self.command) else: commands = self.command return commands def on_kill(self): if self.cli is not None: self.log.info("Stopping docker container") self.cli.stop(self.container["Id"]) def __get_tls_config(self): tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig( ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname, ) self.docker_url = self.docker_url.replace("tcp://", "https://") return tls_config
class DockerProxy: """ A wrapper over docker-py and some utility methods and classes. """ LOG_TAG = "Docker " shell_commands = ["source"] class ImageBuildException(Exception): def __init__(self, message=None): super("Something went wrong while building docker container image.\n{0}".format(message)) def __init__(self): self.client = Client(base_url=Constants.DOCKER_BASE_URL) self.build_count = 0 logging.basicConfig(level=logging.DEBUG) @staticmethod def get_container_volume_from_working_dir(working_directory): import os return os.path.join("/home/ubuntu/", os.path.basename(working_directory)) def create_container(self, image_str, working_directory=None, name=None, port_bindings={Constants.DEFAULT_PUBLIC_WEBSERVER_PORT: ('127.0.0.1', 8080), Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT: ('127.0.0.1', 8081)}): """Creates a new container with elevated privileges. Returns the container ID. Maps port 80 of container to 8080 of locahost by default""" docker_image = DockerImage.from_string(image_str) volume_dir = DockerProxy.get_container_volume_from_working_dir(working_directory) if name is None: import uuid random_str = str(uuid.uuid4()) name = constants.Constants.MolnsDockerContainerNamePrefix + random_str[:8] image = docker_image.image_id if docker_image.image_id is not Constants.DockerNonExistentTag \ else docker_image.image_tag logging.info("Using image {0}".format(image)) import os if DockerProxy._verify_directory(working_directory) is False: if working_directory is not None: raise InvalidVolumeName("\n\nMOLNs uses certain reserved names for its configuration files in the " "controller environment, and unfortunately the provided name for working " "directory of the controller cannot be one of these. Please configure this " "controller again with a different volume name and retry. " "Here is the list of forbidden names: \n{0}" .format(Constants.ForbiddenVolumeNames)) logging.warning(DockerProxy.LOG_TAG + "Unable to verify provided directory to use to as volume. Volume will NOT " "be created.") hc = self.client.create_host_config(privileged=True, port_bindings=port_bindings) container = self.client.create_container(image=image, name=name, command="/bin/bash", tty=True, detach=True, ports=[Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT], host_config=hc, environment={"PYTHONPATH": "/usr/local/"}) else: container_mount_point = '/home/ubuntu/{0}'.format(os.path.basename(working_directory)) hc = self.client.create_host_config(privileged=True, port_bindings=port_bindings, binds={working_directory: {'bind': container_mount_point, 'mode': 'rw'}}) container = self.client.create_container(image=image, name=name, command="/bin/bash", tty=True, detach=True, ports=[Constants.DEFAULT_PUBLIC_WEBSERVER_PORT, Constants.DEFAULT_PRIVATE_NOTEBOOK_PORT], volumes=container_mount_point, host_config=hc, working_dir=volume_dir, environment={"PYTHONPATH": "/usr/local/"}) container_id = container.get("Id") return container_id # noinspection PyBroadException @staticmethod def _verify_directory(working_directory): import os if working_directory is None or os.path.basename(working_directory) in Constants.ForbiddenVolumeNames: return False try: if not os.path.exists(working_directory): os.makedirs(working_directory) return True except: return False def stop_containers(self, container_ids): """Stops given containers.""" for container_id in container_ids: self.stop_container(container_id) def stop_container(self, container_id): """Stops the container with given ID.""" self.client.stop(container_id) def container_status(self, container_id): """Checks if container with given ID running.""" status = ProviderBase.STATUS_TERMINATED try: ret_val = str(self.client.inspect_container(container_id).get('State').get('Status')) if ret_val.startswith("running"): status = ProviderBase.STATUS_RUNNING else: status = ProviderBase.STATUS_STOPPED except NotFound: pass return status def start_containers(self, container_ids): """Starts each container in given list of container IDs.""" for container_id in container_ids: self.start_container(container_id) def start_container(self, container_id): """ Start the container with given ID.""" logging.info(DockerProxy.LOG_TAG + " Starting container " + container_id) try: self.client.start(container=container_id) except (NotFound, NullResource) as e: print (DockerProxy.LOG_TAG + "Something went wrong while starting container.", e) return False return True def execute_command(self, container_id, command): """Executes given command as a shell command in the given container. Returns None is anything goes wrong.""" run_command = "/bin/bash -c \"" + command + "\"" # print("CONTAINER: {0} COMMAND: {1}".format(container_id, run_command)) if self.start_container(container_id) is False: print (DockerProxy.LOG_TAG + "Could not start container.") return None try: exec_instance = self.client.exec_create(container_id, run_command) response = self.client.exec_start(exec_instance) return [self.client.exec_inspect(exec_instance), response] except (NotFound, APIError) as e: print (DockerProxy.LOG_TAG + " Could not execute command.", e) return None def build_image(self, dockerfile): """ Build image from given Dockerfile object and return ID of the image created. """ import uuid logging.info("Building image...") random_string = str(uuid.uuid4()) image_tag = Constants.DOCKER_IMAGE_PREFIX + "{0}".format(random_string[:]) last_line = "" try: for line in self.client.build(fileobj=dockerfile, rm=True, tag=image_tag): print(DockerProxy._decorate(line)) if "errorDetail" in line: raise DockerProxy.ImageBuildException() last_line = line # Return image ID. It's a hack around the fact that docker-py's build image command doesn't return an image # id. image_id = get_docker_image_id_from_string(str(last_line)) logging.info("Image ID: {0}".format(image_id)) return str(DockerImage(image_id, image_tag)) except (DockerProxy.ImageBuildException, IndexError) as e: raise DockerProxy.ImageBuildException(e) @staticmethod def _decorate(some_line): return some_line[11:-4].rstrip() def image_exists(self, image_str): """Checks if an image with the given ID/tag exists locally.""" docker_image = DockerImage.from_string(image_str) if docker_image.image_id is Constants.DockerNonExistentTag \ and docker_image.image_tag is Constants.DockerNonExistentTag: raise InvalidDockerImageException("Neither image_id nor image_tag provided.") for image in self.client.images(): some_id = image["Id"] some_tags = image["RepoTags"] or [None] if docker_image.image_id in \ some_id[:(Constants.DOCKER_PY_IMAGE_ID_PREFIX_LENGTH + Constants.DOKCER_IMAGE_ID_LENGTH)]: return True if docker_image.image_tag in some_tags: return True return False def terminate_containers(self, container_ids): """ Terminates containers with given container ids.""" for container_id in container_ids: try: if self.container_status(container_id) == ProviderBase.STATUS_RUNNING: self.stop_container(container_id) self.terminate_container(container_id) except NotFound: pass def terminate_container(self, container_id): self.client.remove_container(container_id) def get_mapped_ports(self, container_id): container_ins = self.client.inspect_container(container_id) mapped_ports = container_ins['HostConfig']['PortBindings'] ret_val = [] if mapped_ports is None: logging.info("No mapped ports for {0}".format(container_id)) return for k, v in mapped_ports.iteritems(): host_port = v[0]['HostPort'] ret_val.append(host_port) return ret_val def get_working_directory(self, container_id): return self.client.inspect_container(container_id)["Config"]["WorkingDir"] def get_home_directory(self, container_id): env_vars = self.client.inspect_container(container_id)["Config"]["Env"] home = [i for i in env_vars if i.startswith("HOME")] return home[0].split("=")[1] def put_archive(self, container_id, tar_file_bytes, target_path_in_container): """ Copies and unpacks a given tarfile in the container at specified location. Location must exist in container.""" if self.start_container(container_id) is False: raise Exception("Could not start container.") # Prepend file path with /home/ubuntu/. TODO Should be refined. if not target_path_in_container.startswith("/home/ubuntu/"): import os target_path_in_container = os.path.join("/home/ubuntu/", target_path_in_container) logging.info("target path in container: {0}".format(target_path_in_container)) if not self.client.put_archive(container_id, target_path_in_container, tar_file_bytes): logging.error(DockerProxy.LOG_TAG + "Failed to copy.") def get_container_ip_address(self, container_id): """ Returns the IP Address of given container.""" self.start_container(container_id) ins = self.client.inspect_container(container_id) ip_address = str(ins.get("NetworkSettings").get("IPAddress")) while True: ip_address = str(ins.get("NetworkSettings").get("IPAddress")) if ip_address == "": time.sleep(3) if ip_address.startswith("1") is True: break return ip_address
class TestContainers(unittest.TestCase): podman = None # initialized podman configuration for tests service = None # podman service instance topContainerId = "" def setUp(self): super().setUp() self.client = APIClient(base_url="tcp://127.0.0.1:8080", timeout=15) TestContainers.podman.restore_image_from_cache(self.client) TestContainers.topContainerId = common.run_top_container(self.client) self.assertIsNotNone(TestContainers.topContainerId) def tearDown(self): common.remove_all_containers(self.client) common.remove_all_images(self.client) self.client.close() return super().tearDown() @classmethod def setUpClass(cls): super().setUpClass() TestContainers.podman = Podman() TestContainers.service = TestContainers.podman.open( "system", "service", "tcp:127.0.0.1:8080", "--time=0" ) # give the service some time to be ready... time.sleep(2) rc = TestContainers.service.poll() if rc is not None: raise subprocess.CalledProcessError(rc, "podman system service") @classmethod def tearDownClass(cls): TestContainers.service.terminate() stdout, stderr = TestContainers.service.communicate(timeout=0.5) if stdout: sys.stdout.write("\nContainers Service Stdout:\n" + stdout.decode("utf-8")) if stderr: sys.stderr.write("\nContainers Service Stderr:\n" + stderr.decode("utf-8")) TestContainers.podman.tear_down() return super().tearDownClass() def test_inspect_container(self): # Inspect bogus container with self.assertRaises(errors.NotFound) as error: self.client.inspect_container("dummy") self.assertEqual(error.exception.response.status_code, 404) # Inspect valid container by Id container = self.client.inspect_container(TestContainers.topContainerId) self.assertIn("top", container["Name"]) # Inspect valid container by name container = self.client.inspect_container("top") self.assertIn(TestContainers.topContainerId, container["Id"]) def test_create_container(self): # Run a container with detach mode container = self.client.create_container(image="alpine", detach=True) self.assertEqual(len(container), 2) def test_start_container(self): # Start bogus container with self.assertRaises(errors.NotFound) as error: self.client.start("dummy") self.assertEqual(error.exception.response.status_code, 404) # Podman docs says it should give a 304 but returns with no response # # Start a already started container should return 304 # response = self.client.start(container=TestContainers.topContainerId) # self.assertEqual(error.exception.response.status_code, 304) # Create a new container and validate the count self.client.create_container(image=constant.ALPINE, name="container2") containers = self.client.containers(quiet=True, all=True) self.assertEqual(len(containers), 2) def test_stop_container(self): # Stop bogus container with self.assertRaises(errors.NotFound) as error: self.client.stop("dummy") self.assertEqual(error.exception.response.status_code, 404) # Validate the container state container = self.client.inspect_container("top") self.assertEqual(container["State"]["Status"], "running") # Stop a running container and validate the state self.client.stop(TestContainers.topContainerId) container = self.client.inspect_container("top") self.assertIn( container["State"]["Status"], "stopped exited", ) def test_restart_container(self): # Restart bogus container with self.assertRaises(errors.NotFound) as error: self.client.restart("dummy") self.assertEqual(error.exception.response.status_code, 404) # Validate the container state self.client.stop(TestContainers.topContainerId) container = self.client.inspect_container("top") self.assertEqual(container["State"]["Status"], "stopped") # restart a running container and validate the state self.client.restart(TestContainers.topContainerId) container = self.client.inspect_container("top") self.assertEqual(container["State"]["Status"], "running") def test_remove_container(self): # Remove bogus container with self.assertRaises(errors.NotFound) as error: self.client.remove_container("dummy") self.assertEqual(error.exception.response.status_code, 404) # Remove container by ID with force self.client.remove_container(TestContainers.topContainerId, force=True) containers = self.client.containers() self.assertEqual(len(containers), 0) def test_remove_container_without_force(self): # Validate current container count containers = self.client.containers() self.assertTrue(len(containers), 1) # Remove running container should throw error with self.assertRaises(errors.APIError) as error: self.client.remove_container(TestContainers.topContainerId) self.assertEqual(error.exception.response.status_code, 500) # Remove container by ID with force self.client.stop(TestContainers.topContainerId) self.client.remove_container(TestContainers.topContainerId) containers = self.client.containers() self.assertEqual(len(containers), 0) def test_pause_container(self): # Pause bogus container with self.assertRaises(errors.NotFound) as error: self.client.pause("dummy") self.assertEqual(error.exception.response.status_code, 404) # Validate the container state container = self.client.inspect_container("top") self.assertEqual(container["State"]["Status"], "running") # Pause a running container and validate the state self.client.pause(container["Id"]) container = self.client.inspect_container("top") self.assertEqual(container["State"]["Status"], "paused") def test_pause_stopped_container(self): # Stop the container self.client.stop(TestContainers.topContainerId) # Pause exited container should trow error with self.assertRaises(errors.APIError) as error: self.client.pause(TestContainers.topContainerId) self.assertEqual(error.exception.response.status_code, 500) def test_unpause_container(self): # Unpause bogus container with self.assertRaises(errors.NotFound) as error: self.client.unpause("dummy") self.assertEqual(error.exception.response.status_code, 404) # Validate the container state self.client.pause(TestContainers.topContainerId) container = self.client.inspect_container("top") self.assertEqual(container["State"]["Status"], "paused") # Pause a running container and validate the state self.client.unpause(TestContainers.topContainerId) container = self.client.inspect_container("top") self.assertEqual(container["State"]["Status"], "running") def test_list_container(self): # Add container and validate the count self.client.create_container(image="alpine", detach=True) containers = self.client.containers(all=True) self.assertEqual(len(containers), 2) def test_filters(self): self.skipTest("TODO Endpoint does not yet support filters") # List container with filter by id filters = {"id": TestContainers.topContainerId} ctnrs = self.client.containers(all=True, filters=filters) self.assertEqual(len(ctnrs), 1) # List container with filter by name filters = {"name": "top"} ctnrs = self.client.containers(all=True, filters=filters) self.assertEqual(len(ctnrs), 1) def test_rename_container(self): # rename bogus container with self.assertRaises(errors.APIError) as error: self.client.rename(container="dummy", name="newname") self.assertEqual(error.exception.response.status_code, 404)
class DagsterDockerOperator(DockerOperator): """Dagster operator for Apache Airflow. Wraps a modified DockerOperator incorporating https://github.com/apache/airflow/pull/4315. Additionally, if a Docker client can be initialized using docker.from_env, Unlike the standard DockerOperator, this operator also supports config using docker.from_env, so it isn't necessary to explicitly set docker_url, tls_config, or api_version. Incorporates https://github.com/apache/airflow/pull/4315/ and an implementation of https://issues.apache.org/jira/browse/AIRFLOW-3825. Parameters: host_tmp_dir (str): Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. """ # py2 compat # pylint: disable=keyword-arg-before-vararg def __init__(self, dagster_operator_parameters, *args): kwargs = dagster_operator_parameters.op_kwargs tmp_dir = kwargs.pop("tmp_dir", DOCKER_TEMPDIR) host_tmp_dir = kwargs.pop("host_tmp_dir", seven.get_system_temp_directory()) self.host_tmp_dir = host_tmp_dir run_config = dagster_operator_parameters.run_config if "filesystem" in run_config["storage"]: if ("config" in (run_config["storage"].get("filesystem", {}) or {}) and "base_dir" in ((run_config["storage"].get( "filesystem", {}) or {}).get("config", {}) or {}) and run_config["storage"]["filesystem"]["config"]["base_dir"] != tmp_dir): warnings.warn( "Found base_dir '{base_dir}' set in filesystem storage config, which was not " "the tmp_dir we expected ('{tmp_dir}', mounting host_tmp_dir " "'{host_tmp_dir}' from the host). We assume you know what you are doing, but " "if you are having trouble executing containerized workloads, this may be the " "issue".format( base_dir=run_config["storage"]["filesystem"]["config"] ["base_dir"], tmp_dir=tmp_dir, host_tmp_dir=host_tmp_dir, )) else: run_config["storage"]["filesystem"] = dict( run_config["storage"]["filesystem"] or {}, **{ "config": dict(((run_config["storage"].get("filesystem", {}) or {}).get("config", {}) or {}), **{"base_dir": tmp_dir}) }) self.docker_conn_id_set = kwargs.get("docker_conn_id") is not None self.run_config = run_config self.pipeline_name = dagster_operator_parameters.pipeline_name self.pipeline_snapshot = dagster_operator_parameters.pipeline_snapshot self.execution_plan_snapshot = dagster_operator_parameters.execution_plan_snapshot self.parent_pipeline_snapshot = dagster_operator_parameters.parent_pipeline_snapshot self.mode = dagster_operator_parameters.mode self.step_keys = dagster_operator_parameters.step_keys self.recon_repo = dagster_operator_parameters.recon_repo self._run_id = None self.instance_ref = dagster_operator_parameters.instance_ref check.invariant(self.instance_ref) self.instance = DagsterInstance.from_ref(self.instance_ref) # These shenanigans are so we can override DockerOperator.get_hook in order to configure # a docker client using docker.from_env, rather than messing with the logic of # DockerOperator.execute if not self.docker_conn_id_set: try: from_env().version() except Exception: # pylint: disable=broad-except pass else: kwargs["docker_conn_id"] = True if "environment" not in kwargs: kwargs["environment"] = get_aws_environment() super(DagsterDockerOperator, self).__init__( task_id=dagster_operator_parameters.task_id, dag=dagster_operator_parameters.dag, tmp_dir=tmp_dir, host_tmp_dir=host_tmp_dir, xcom_push=True, # We do this because log lines won't necessarily be emitted in order (!) -- so we can't # just check the last log line to see if it's JSON. xcom_all=True, *args, **kwargs) @contextmanager def get_host_tmp_dir(self): yield self.host_tmp_dir def execute_raw(self, context): """Modified only to use the get_host_tmp_dir helper.""" self.log.info("Starting docker container from image %s", self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info("Pulling docker image %s", self.image) for l in self.cli.pull(self.image, stream=True): output = seven.json.loads(l.decode("utf-8").strip()) if "status" in output: self.log.info("%s", output["status"]) with self.get_host_tmp_dir() as host_tmp_dir: self.environment["AIRFLOW_TMP_DIR"] = self.tmp_dir self.volumes.append("{0}:{1}".format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_docker_command(context.get("ts")), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit, ), image=self.image, user=self.user, working_dir=self.working_dir, ) self.cli.start(self.container["Id"]) res = [] line = "" for new_line in self.cli.logs(container=self.container["Id"], stream=True, stdout=True, stderr=False): line = new_line.strip() if hasattr(line, "decode"): line = line.decode("utf-8") self.log.info(line) res.append(line) result = self.cli.wait(self.container["Id"]) if result["StatusCode"] != 0: raise AirflowException( "docker container failed with result: {result} and logs: {logs}" .format(result=repr(result), logs="\n".join(res))) if self.xcom_push_flag: # Try to avoid any kind of race condition? return res if self.xcom_all else str(line) # This is a class-private name on DockerOperator for no good reason -- # all that the status quo does is inhibit extension of the class. # See https://issues.apache.org/jira/browse/AIRFLOW-3880 def __get_tls_config(self): # pylint: disable=no-member return super(DagsterDockerOperator, self)._DockerOperator__get_tls_config() @property def run_id(self): if self._run_id is None: return "" else: return self._run_id def query(self, airflow_ts): check.opt_str_param(airflow_ts, "airflow_ts") recon_pipeline = self.recon_repo.get_reconstructable_pipeline( self.pipeline_name) input_json = serialize_dagster_namedtuple( ExecuteStepArgs( pipeline_origin=recon_pipeline.get_origin(), pipeline_run_id=self.run_id, instance_ref=self.instance_ref, mode=self.mode, step_keys_to_execute=self.step_keys, run_config=self.run_config, retries_dict={}, )) command = "dagster api execute_step_with_structured_logs {}".format( json.dumps(input_json)) self.log.info("Executing: {command}\n".format(command=command)) return command def get_docker_command(self, airflow_ts): """Deliberately renamed from get_command to avoid shadoowing the method of the base class""" check.opt_str_param(airflow_ts, "airflow_ts") if self.command is not None and self.command.strip().find("[") == 0: commands = ast.literal_eval(self.command) elif self.command is not None: commands = self.command else: commands = self.query(airflow_ts) return commands def get_hook(self): if self.docker_conn_id_set: return super(DagsterDockerOperator, self).get_hook() class _DummyHook(object): def get_conn(self): return from_env().api return _DummyHook() def execute(self, context): if "run_id" in self.params: self._run_id = self.params["run_id"] elif "dag_run" in context and context["dag_run"] is not None: self._run_id = context["dag_run"].run_id try: tags = { AIRFLOW_EXECUTION_DATE_STR: context.get("ts") } if "ts" in context else {} self.instance.register_managed_run( pipeline_name=self.pipeline_name, run_id=self.run_id, run_config=self.run_config, mode=self.mode, solids_to_execute=None, step_keys_to_execute=None, tags=tags, root_run_id=None, parent_run_id=None, pipeline_snapshot=self.pipeline_snapshot, execution_plan_snapshot=self.execution_plan_snapshot, parent_pipeline_snapshot=self.parent_pipeline_snapshot, ) res = self.execute_raw(context) self.log.info("Finished executing container.") if not res: raise AirflowException("Missing query response") try: events = [ deserialize_json_to_dagster_namedtuple(line) for line in res if line ] except Exception: # pylint: disable=broad-except raise AirflowException( "Could not parse response {response}".format( response=repr(res))) check_events_for_failures(events) check_events_for_skips(events) return events finally: self._run_id = None
def create_containers(args): """ - API to create containers. - API is equivalent to docker run command. - Response if image is found locally: {u'Id': u'67d1f4f5fb5e667f03e55e3b794fe34b95304d0cec584459ca0f84fa3c0681e1', u'Warnings': None} - If image is not found locally, then it is pulled from docker hub and the response is a json string. - returns container ID if sucessful or None if unsuccessful """ print("In create container") #args['detach'] = ('True') if 'detach' in args: args['detach'] = bool(args['detach']) if 'stdin_open' in args: args['stdin_open'] = bool(args['stdin_open']) if 'name' in args: args['name'] = str(args['name']) if 'image' in args: args['image'] = str(args['image']) if 'tty' in args: args['tty'] = bool(args['tty']) if 'network_disabled' in args: args['network_disabled'] = bool(args['network_disabled']) #print ('detach: ',args['detach']) invoke_clientAPI = APIClient(base_url='unix://var/run/docker.sock', version='auto') try: containerID = invoke_clientAPI.create_container(**args) print('container ID: ', containerID) print('IN FIRST TRY CALL') print('Created container in 1st try') print('Will start the container now') start_container = invoke_clientAPI.start( container=containerID.get('Id')) except Exception as e: print('IN FIRST Exception') print e try: for line in invoke_clientAPI.pull(args['image'], stream=True): print(json.dumps(json.loads(line), indent=4)) # invoke_clientAPI.pull(args['image'],stream=True) containerID = invoke_clientAPI.create_container(**args) print('Created container, but had to pull the image') except HTTPError: containerID = None """ for line in invoke_clientAPI.pull(args['image'], stream=True): print(json.dumps(json.loads(line), indent=4)) #containerID = invoke_clientAPI.create_container(**args) #print ('CONTAINERID: ',containerID) """ print containerID return containerID
class ModifiedDockerOperator(DockerOperator): """ModifiedDockerOperator supports host temporary directories on OSX. Incorporates https://github.com/apache/airflow/pull/4315/ and an implementation of https://issues.apache.org/jira/browse/AIRFLOW-3825. :param host_tmp_dir: Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. :type host_tmp_dir: str """ def __init__(self, host_tmp_dir='/tmp', **kwargs): self.host_tmp_dir = host_tmp_dir kwargs['xcom_push'] = True super(ModifiedDockerOperator, self).__init__(**kwargs) @contextmanager def get_host_tmp_dir(self): '''Abstracts the tempdir context manager so that this can be overridden.''' with TemporaryDirectory(prefix='airflowtmp', dir=self.host_tmp_dir) as tmp_dir: yield tmp_dir def execute(self, context): '''Modified only to use the get_host_tmp_dir helper.''' self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info('Pulling docker image %s', self.image) for l in self.cli.pull(self.image, stream=True): output = seven.json.loads(l.decode('utf-8').strip()) if 'status' in output: self.log.info("%s", output['status']) with self.get_host_tmp_dir() as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit, ), image=self.image, user=self.user, working_dir=self.working_dir, ) self.cli.start(self.container['Id']) res = [] line = '' for new_line in self.cli.logs(container=self.container['Id'], stream=True): line = new_line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) res.append(line) result = self.cli.wait(self.container['Id']) if result['StatusCode'] != 0: raise AirflowException( 'docker container failed with result: {result} and logs: {logs}'.format( result=repr(result), logs='\n'.join(res) ) ) if self.xcom_push_flag: # Try to avoid any kind of race condition? return res if self.xcom_all else str(line) # This is a class-private name on DockerOperator for no good reason -- # all that the status quo does is inhibit extension of the class. # See https://issues.apache.org/jira/browse/AIRFLOW-3880 def __get_tls_config(self): # pylint: disable=no-member return super(ModifiedDockerOperator, self)._DockerOperator__get_tls_config()
class DockerOperator(BaseOperator): """ Execute a command inside a docker container. A temporary directory is created on the host and mounted into a container to allow storing files that together exceed the default disk size of 10GB in a container. The path to the mounted directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``. :param image: Docker image from which to create the container. :type image: str :param api_version: Remote API version. :type api_version: str :param command: Command to be run in the container. :type command: str or list :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. See https://docs.docker.com/engine/reference/run/#cpu-share-constraint :type cpus: float :param docker_url: URL of the host running the docker daemon. :type docker_url: str :param environment: Environment variables to set in the container. :type environment: dict :param force_pull: Pull the docker image on every run. :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. :type mem_limit: float or str :param network_mode: Network mode for the container. :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. :type tmp_dir: str :param user: Default user inside the docker container. :type user: int or str :param volumes: List of volumes to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) :type working_dir: str :param xcom_push: Does the stdout will be pushed to the next step using XCom. The default is False. :type xcom_push: bool :param xcom_all: Push all the stdout or just the last line. The default is False (last line). :type xcom_all: bool :param auto_remove: Automatically remove the container when it exits :type auto_remove: bool """ template_fields = ('command',) template_ext = ('.sh', '.bash',) @apply_defaults def __init__( self, image, api_version=None, command=None, cpus=1.0, docker_url='unix://var/run/docker.sock', environment=None, force_pull=False, mem_limit=None, network_mode=None, tls_ca_cert=None, tls_client_cert=None, tls_client_key=None, tls_hostname=None, tls_ssl_version=None, tmp_dir='/tmp/airflow', user=None, volumes=None, working_dir=None, xcom_push=False, xcom_all=False, auto_remove=False, *args, **kwargs): super(DockerOperator, self).__init__(*args, **kwargs) self.api_version = api_version self.command = command self.cpus = cpus self.docker_url = docker_url self.environment = environment or {} self.force_pull = force_pull self.image = image self.mem_limit = mem_limit self.network_mode = network_mode self.tls_ca_cert = tls_ca_cert self.tls_client_cert = tls_client_cert self.tls_client_key = tls_client_key self.tls_hostname = tls_hostname self.tls_ssl_version = tls_ssl_version self.tmp_dir = tmp_dir self.user = user self.volumes = volumes or [] self.working_dir = working_dir self.xcom_push_flag = xcom_push self.xcom_all = xcom_all self.auto_remove = auto_remove self.cli = None self.container = None def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig( ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname ) self.docker_url = self.docker_url.replace('tcp://', 'https://') self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if ':' not in self.image: image = self.image + ':latest' else: image = self.image if self.force_pull or len(self.cli.images(name=image)) == 0: self.log.info('Pulling docker image %s', image) for l in self.cli.pull(image, stream=True): output = json.loads(l.decode('utf-8')) self.log.info("%s", output['status']) cpu_shares = int(round(self.cpus * 1024)) with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), cpu_shares=cpu_shares, environment=self.environment, host_config=self.cli.create_host_config( binds=self.volumes, network_mode=self.network_mode, auto_remove=self.auto_remove), image=image, mem_limit=self.mem_limit, user=self.user, working_dir=self.working_dir ) self.cli.start(self.container['Id']) line = '' for line in self.cli.logs(container=self.container['Id'], stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) exit_code = self.cli.wait(self.container['Id']) if exit_code != 0: raise AirflowException('docker container failed') if self.xcom_push_flag: return self.cli.logs(container=self.container['Id']) if self.xcom_all else str(line) def get_command(self): if self.command is not None and self.command.strip().find('[') == 0: commands = ast.literal_eval(self.command) else: commands = self.command return commands def on_kill(self): if self.cli is not None: self.log.info('Stopping docker container') self.cli.stop(self.container['Id'])
class BaseDockerOperator(object): """ Execute a command inside a docker container. A temporary directory is created on the host and mounted into a container to allow storing files that together exceed the default disk size of 10GB in a container. The path to the mounted directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``. If a login to a private registry is required prior to pulling the image, a Docker connection needs to be configured in Airflow and the connection ID be provided with the parameter ``docker_conn_id``. :param image: Docker image from which to create the container. If image tag is omitted, "latest" will be used. :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. :type api_version: str :param auto_remove: Auto-removal of the container on daemon side when the container's process exits. The default is True. :type auto_remove: bool :param command: Command to be run in the container. (templated) :type command: str or list :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. See https://docs.docker.com/engine/reference/run/#cpu-share-constraint :type cpus: float :param dns: Docker custom DNS servers :type dns: list of strings :param dns_search: Docker custom DNS search domain :type dns_search: list of strings :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock :type docker_url: str :param environment: Environment variables to set in the container. (templated) :type environment: dict :param force_pull: Pull the docker image on every run. Default is True. :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. :type mem_limit: float or str :param network_mode: Network mode for the container. :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. :type tmp_dir: str :param user: Default user inside the docker container. :type user: int or str :param volumes: List of volumes to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) :type working_dir: str :param xcom_push: Does the stdout will be pushed to the next step using XCom. The default is False. :type xcom_push: bool :param xcom_all: Push all the stdout or just the last line. The default is False (last line). :type xcom_all: bool :param docker_conn_id: ID of the Airflow connection to use :type docker_conn_id: str :param shm_size: Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. :type shm_size: int :param provide_context: If True, make a serialized form of the context available. :type provide_context: bool :param environment_preset: The name of the environment-preset to pull from the config. If omitted defaults to the "default" key, see `EnvironmentPresetExtension`. :type environment_preset: string """ template_fields = ("command", "environment", "extra_kwargs") template_ext = (".sh", ".bash") known_extra_kwargs = set() @apply_defaults def __init__( self, image, api_version=None, entrypoint=None, command=None, cpus=1.0, docker_url="unix://var/run/docker.sock", environment=None, force_pull=get_boolean_default("force_pull", True), mem_limit=None, network_mode=get_default("network_mode", None), tls_ca_cert=None, tls_client_cert=None, tls_client_key=None, tls_hostname=None, tls_ssl_version=None, tmp_dir="/tmp/airflow", user=None, volumes=None, working_dir=None, xcom_push=False, xcom_all=False, docker_conn_id=None, dns=None, dns_search=None, auto_remove=get_boolean_default("auto_remove", True), shm_size=None, provide_context=False, *args, **kwargs ): self.extra_kwargs = { known_key: kwargs.pop(known_key) for known_key in self.known_extra_kwargs # This conditional is critical since we can not know # here what a "default" value should look like. if known_key in kwargs } super(BaseDockerOperator, self).__init__(*args, **kwargs) self.api_version = api_version self.auto_remove = auto_remove self.command = command self.entrypoint = entrypoint self.cpus = cpus self.dns = dns self.dns_search = dns_search self.docker_url = docker_url self.environment = environment or {} self.force_pull = force_pull self.image = image self.mem_limit = mem_limit self.network_mode = network_mode self.tls_ca_cert = tls_ca_cert self.tls_client_cert = tls_client_cert self.tls_client_key = tls_client_key self.tls_hostname = tls_hostname self.tls_ssl_version = tls_ssl_version self.tmp_dir = tmp_dir self.user = user self.volumes = volumes or [] self.working_dir = working_dir self.xcom_push_flag = xcom_push self.xcom_all = xcom_all self.docker_conn_id = docker_conn_id self.shm_size = shm_size self.provide_context = provide_context self.cli = None self.container = None self._host_client = None # Shim for attaching a test client def get_hook(self): return DockerHook( docker_conn_id=self.docker_conn_id, base_url=self.docker_url, version=self.api_version, tls=self.__get_tls_config(), ) def _execute(self, context): self.log.info("Starting docker container from image %s", self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient( base_url=self.docker_url, version=self.api_version, tls=tls_config ) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info("Pulling docker image %s", self.image) for l in self.cli.pull(self.image, stream=True): output = json.loads(l.decode("utf-8").strip()) if "status" in output: self.log.info("%s", output["status"]) with TemporaryDirectory( prefix="airflowtmp", dir=self.host_tmp_base_dir ) as host_tmp_dir: self.environment["AIRFLOW_TMP_DIR"] = self.tmp_dir additional_volumes = ["{0}:{1}".format(host_tmp_dir, self.tmp_dir)] # Hook for creating mounted meta directories self.prepare_host_tmp_dir(context, host_tmp_dir) self.prepare_environment(context, host_tmp_dir) if self.provide_context: self.write_context(context, host_tmp_dir) self.container = self.cli.create_container( command=self.get_command(), entrypoint=self.entrypoint, environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes + additional_volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit, ), image=self.image, user=self.user, working_dir=self.working_dir, ) self.cli.start(self.container["Id"]) line = "" for line in self.cli.logs(container=self.container["Id"], stream=True): line = line.strip() if hasattr(line, "decode"): line = line.decode("utf-8") self.log.info(line) result = self.cli.wait(self.container["Id"]) if result["StatusCode"] != 0: raise AirflowException("docker container failed: " + repr(result)) # Move the in-container xcom-pushes into airflow. result = self.host_client.get_xcom_push_data(host_tmp_dir) for row in result: self.xcom_push(context, key=row["key"], value=row["value"]) if self.xcom_push_flag: return ( self.cli.logs(container=self.container["Id"]) if self.xcom_all else str(line) ) return self.do_meta_operation(context, host_tmp_dir) def get_command(self): if self.command is not None and self.command.strip().find("[") == 0: commands = ast.literal_eval(self.command) else: commands = self.command return commands def on_kill(self): if self.cli is not None: self.log.info("Stopping docker container") self.cli.stop(self.container["Id"]) def __get_tls_config(self): tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig( ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname, ) self.docker_url = self.docker_url.replace("tcp://", "https://") return tls_config def do_meta_operation(self, context, host_tmp_dir): pass def prepare_environment(self, context, host_tmp_dir): delegate_to_extensions(self, "post_prepare_environment", context, host_tmp_dir) def prepare_host_tmp_dir(self, context, host_tmp_dir): self.host_client.make_meta_dir(host_tmp_dir) host_meta_dir = airflow_docker_helper.get_host_meta_path(host_tmp_dir) self.log.info("Making host meta dir: {}".format(host_meta_dir)) def write_context(self, context, host_tmp_dir): self.host_client.write_context(context, host_tmp_dir) @property def host_tmp_base_dir(self): try: return conf.get("worker", "host_temporary_directory") except AirflowConfigException: return DEFAULT_HOST_TEMPORARY_DIRECTORY def host_meta_dir(self, context, host_tmp_dir): return airflow_docker_helper.get_host_meta_path(host_tmp_dir) @property def host_client(self): return self._host_client or airflow_docker_helper.host @staticmethod def get_config(): return get_config()
class DockerConfigurableOperator(DockerOperator): """ This is modified from https://github.com/apache/incubator-airflow/blob/1.8.2/airflow/operators/docker_operator.py with the exception that we are able to inject container and host arguments before the container is run. """ # noqa def __init__(self, container_args=None, host_args=None, *args, **kwargs): if container_args is None: self.container_args = {} else: self.container_args = container_args if host_args is None: self.host_args = {} else: self.host_args = host_args super().__init__(*args, **kwargs) # This needs to be updated whenever we update to a new version of airflow! def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = self._DockerOperator__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = Client(base_url=self.docker_url, version=self.api_version, tls=tls_config) if ':' not in self.image: image = self.image + ':latest' else: image = self.image if self.force_pull or len(self.cli.images(name=image)) == 0: self.log.info('Pulling docker image %s', image) for l in self.cli.pull(image, stream=True): output = json.loads(l.decode('utf-8')) self.log.info("%s", output['status']) cpu_shares = int(round(self.cpus * 1024)) with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) host_args = { 'binds': self.volumes, 'cpu_shares': cpu_shares, 'mem_limit': self.mem_limit, 'network_mode': self.network_mode } host_args.update(self.host_args) container_args = { 'command': self.get_command(), 'environment': self.environment, 'host_config': self.cli.create_host_config(**host_args), 'image': image, 'user': self.user, 'working_dir': self.working_dir } container_args.update(self.container_args) self.container = self.cli.create_container(**container_args) self.cli.start(self.container['Id']) line = '' for line in self.cli.logs(container=self.container['Id'], stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) exit_code = self.cli.wait(self.container['Id'])['StatusCode'] if exit_code != 0: raise AirflowException('docker container failed') if self.xcom_push_flag: return self.cli.logs(container=self.container['Id'] ) if self.xcom_all else str(line)