def get_server_containers(server: Server, client: docker.APIClient) -> tp.List[dict]: containers = client.containers() server_containers = [] for container in containers: container = { 'command': filter_printable(container['Command']), 'containerId': container['Id'], 'image': container['Image'], 'labels': sorted([{ 'containerId': container['Id'], 'name': l[0], 'value': l[1]} for l in container['Labels'].items()], key=lambda label: label['name']), 'name': container['Names'][0], 'network': container['HostConfig']['NetworkMode'], 'ports': sorted([{ 'destination': str(p['PrivatePort']), 'hostIp': p['IP'] if 'IP' in p else None, 'protocol': p['Type'], 'source': str(p['PublicPort']) if 'PublicPort' in p else None} for p in container['Ports']], key=lambda port: (str(port['destination']), str(port['source']))), 'privileged': client.inspect_container(container['Id'])['HostConfig']['Privileged'], 'serverId': server.id, 'volumes': sorted([{ 'containerId': container['Id'], 'destination': filter_printable(v['Destination']), 'source': filter_printable(v['Source'])} for v in container['Mounts']], key=lambda volume: volume['destination']) } server_containers.append(container) return server_containers
def docker_abs_net_io(container_id): """ Network traffic of all network interfaces within the controller. :param container_id: The full ID of the docker container. :type container_id: ``str`` :return: Returns the absolute network I/O till container startup, in bytes. The return dict also contains the system time. :rtype: ``dict`` """ c = APIClient() command = c.exec_create(container_id, 'ifconfig') ifconfig = c.exec_start(command['Id']) sys_time = int(time.time() * 1000000000) in_bytes = 0 m = re.findall('RX bytes:(\d+)', str(ifconfig)) if m: for number in m: in_bytes += int(number) else: in_bytes = None out_bytes = 0 m = re.findall('TX bytes:(\d+)', str(ifconfig)) if m: for number in m: out_bytes += int(number) else: out_bytes = None return {'NET_in': in_bytes, 'NET_out': out_bytes, 'NET_systime': sys_time}
def docker_client(environment, version=None, tls_config=None, host=None, tls_version=None): """ Returns a docker-py client configured using environment variables according to the same logic as the official Docker client. """ try: kwargs = kwargs_from_env(environment=environment, ssl_version=tls_version) except TLSParameterError: raise UserError( "TLS configuration is invalid - make sure your DOCKER_TLS_VERIFY " "and DOCKER_CERT_PATH are set correctly.\n" "You might need to run `eval \"$(docker-machine env default)\"`") if host: kwargs['base_url'] = host if tls_config: kwargs['tls'] = tls_config if version: kwargs['version'] = version timeout = environment.get('COMPOSE_HTTP_TIMEOUT') if timeout: kwargs['timeout'] = int(timeout) else: kwargs['timeout'] = HTTP_TIMEOUT kwargs['user_agent'] = generate_user_agent() client = APIClient(**kwargs) client._original_base_url = kwargs.get('base_url') return client
def _clean_network(self): client = Client(base_url=self._docker_host, version="auto", timeout=10) networks = client.networks(names=["%s_default" % self._project_name]) id_removes = [e["Id"] for e in networks] for network_id in id_removes: client.remove_network(network_id) LOG.debug("Remove network id {}".format(network_id))
def cleanup_host(worker_api, timeout=5): """ Cleanup a container host when use removes the host Maybe we will remove the networks? :param worker_api: Docker daemon url :param timeout: timeout to wait :return: """ if not worker_api or not worker_api.startswith("tcp://"): logger.error("Invalid worker_api={}".format(worker_api)) return False try: client = Client(base_url=worker_api, version="auto", timeout=timeout) net_names = [x["Name"] for x in client.networks()] for cs_type in CONSENSUS_PLUGINS_FABRIC_V1: net_name = CLUSTER_NETWORK + "_{}".format(cs_type) if net_name in net_names: logger.debug("Remove network {}".format(net_name)) client.remove_network(net_name) else: logger.warning("Network {} not exists!".format(net_name)) except Exception as e: logger.error("Exception happens!") logger.error(e) return False return True
def detect_daemon_type(worker_api, timeout=5): """ Try to detect the daemon type Only wait for timeout seconds. :param worker_api: Docker daemon url :param timeout: Time to wait for the response :return: host type info """ if not worker_api or not worker_api.startswith("tcp://"): return None segs = worker_api.split(":") if len(segs) != 3: logger.error("Invalid daemon url = ", worker_api) return None try: client = Client(base_url=worker_api, version="auto", timeout=timeout) info = client.info() server_version = info['ServerVersion'] if not server_version: logger.warning("info().ServerVersion cannot be empty") return None if server_version.startswith('swarm'): return WORKER_TYPES[1] try: if info['Swarm']['Cluster']['ID'] != '': return WORKER_TYPES[1] except Exception as e: logger.debug(e) return WORKER_TYPES[0] except Exception as e: logger.error(e) return None
def StopContainer(): if GOT_DOCKERPY_API < 2: cli = Client() cli.stop('suri-buildbot') else: cli = DockerClient() cli.containers.get('suri-buildbot').stop() sys.exit(0)
def StartContainer(): suri_src_dir = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0] print "Using base src dir: " + suri_src_dir if GOT_DOCKERPY_API < 2: cli = Client() cli.start('suri-buildbot', port_bindings={8010:8010, 22:None}, binds={suri_src_dir: { 'bind': '/data/oisf', 'ro': True}, os.path.join(suri_src_dir,'qa','docker','buildbot.cfg'): { 'bind': '/data/buildbot/master/master.cfg', 'ro': True}} ) else: cli = DockerClient() cli.containers.get('suri-buildbot').start() sys.exit(0)
def modify_random_containers(client: docker.APIClient, amount: int, action: str = 'stop') -> tp.List[dict]: server_containers = client.containers() stopped_containers = [] for _ in range(amount): container = random.choice(server_containers) if action == 'delete': client.remove_container(container, force=True) elif action == 'stop': client.stop(container) stopped_containers.append(container) server_containers.remove(container) return stopped_containers
def docker_container_id(container_name): """ Uses the container name to return the container ID. :param container_name: The full name of the docker container. :type container_name: ``str`` :return: Returns the container ID or None if the container is not running or could not be found. :rtype: ``dict`` """ c = APIClient() detail = c.inspect_container(container_name) if bool(detail["State"]["Running"]): return detail['Id'] return None
def execute(self, context): logging.info('Starting docker container from image ' + self.image) tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig( ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname ) self.docker_url = self.docker_url.replace('tcp://', 'https://') self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if ':' not in self.image: image = self.image + ':latest' else: image = self.image if self.force_pull or len(self.cli.images(name=image)) == 0: logging.info('Pulling docker image ' + image) for l in self.cli.pull(image, stream=True): output = json.loads(l.decode('utf-8')) logging.info("{}".format(output['status'])) cpu_shares = int(round(self.cpus * 1024)) with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), cpu_shares=cpu_shares, environment=self.environment, host_config=self.cli.create_host_config( binds=self.volumes, network_mode=self.network_mode, auto_remove=self.auto_remove), image=image, mem_limit=self.mem_limit, user=self.user ) self.cli.start(self.container['Id']) line = '' for line in self.cli.logs(container=self.container['Id'], stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') logging.info(line) exit_code = self.cli.wait(self.container['Id']) if exit_code != 0: raise AirflowException('docker container failed') if self.xcom_push_flag: return self.cli.logs(container=self.container['Id']) if self.xcom_all else str(line)
def __init__(self, build_job, repo_path, from_image, copy_code=True, build_steps=None, env_vars=None, dockerfile_name='Dockerfile'): self.build_job = build_job self.job_uuid = build_job.uuid.hex self.job_name = build_job.unique_name self.from_image = from_image self.image_name = get_image_name(self.build_job) self.image_tag = self.job_uuid self.folder_name = repo_path.split('/')[-1] self.repo_path = repo_path self.copy_code = copy_code self.build_path = '/'.join(self.repo_path.split('/')[:-1]) self.build_steps = get_list(build_steps) self.env_vars = get_list(env_vars) self.dockerfile_path = os.path.join(self.build_path, dockerfile_name) self.polyaxon_requirements_path = self._get_requirements_path() self.polyaxon_setup_path = self._get_setup_path() self.docker = APIClient(version='auto') self.registry_host = None self.docker_url = None
def __init__(self, repo_path, from_image, image_name, image_tag, copy_code=True, in_tmp_repo=True, build_steps=None, env_vars=None, dockerfile_name='Dockerfile'): # This will help create a unique tmp folder for dockerizer in case of concurrent jobs self.uuid = uuid.uuid4().hex self.from_image = from_image self.image_name = image_name self.image_tag = image_tag self.repo_path = repo_path self.folder_name = repo_path.split('/')[-1] self.copy_code = copy_code self.in_tmp_repo = in_tmp_repo if in_tmp_repo and copy_code: self.build_repo_path = self.create_tmp_repo() else: self.build_repo_path = self.repo_path self.build_path = '/'.join(self.build_repo_path.split('/')[:-1]) self.build_steps = get_list(build_steps) self.env_vars = get_list(env_vars) self.dockerfile_path = os.path.join(self.build_path, dockerfile_name) self.polyaxon_requirements_path = self._get_requirements_path() self.polyaxon_setup_path = self._get_setup_path() self.docker = APIClient(version='auto') self.registry_host = None self.docker_url = None
def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient( base_url=self.docker_url, version=self.api_version, tls=tls_config ) if ':' not in self.image: image = self.image + ':latest' else: image = self.image if self.force_pull or len(self.cli.images(name=image)) == 0: self.log.info('Pulling docker image %s', image) for l in self.cli.pull(image, stream=True): output = json.loads(l.decode('utf-8')) self.log.info("%s", output['status']) cpu_shares = int(round(self.cpus * 1024)) with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), cpu_shares=cpu_shares, environment=self.environment, host_config=self.cli.create_host_config( binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size), image=image, mem_limit=self.mem_limit, user=self.user, working_dir=self.working_dir ) self.cli.start(self.container['Id']) line = '' for line in self.cli.logs(container=self.container['Id'], stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) exit_code = self.cli.wait(self.container['Id']) if exit_code != 0: raise AirflowException('docker container failed') if self.xcom_push_flag: return self.cli.logs(container=self.container['Id']) \ if self.xcom_all else str(line)
def _clean_chaincode_images(worker_api, name_prefix, timeout=5): """ Clean chaincode images, whose name should have cluster id as prefix :param worker_api: Docker daemon url :param name_prefix: image name prefix :param timeout: Time to wait for the response :return: None """ logger.debug("clean chaincode images with prefix={}".format(name_prefix)) client = Client(base_url=worker_api, version="auto", timeout=timeout) images = client.images() id_removes = [e['Id'] for e in images if e['RepoTags'] and e['RepoTags'][0].startswith(name_prefix)] logger.debug("chaincode image id to removes=" + ", ".join(id_removes)) for _ in id_removes: client.remove_image(_, force=True)
def __init__(self, config=None): if config is None: config = self.guess_config() if isinstance(config, str): self.config = json.load(open(os.path.expanduser(config))) else: self.config = config clouds = list(self.config['clouds'].items()) if len(clouds) != 1: raise RuntimeError("supports only one cloud in config") url, self.creds = clouds[0] url = urlparse(url) base_url = "https://" + url.netloc DockerClient.__init__(self, base_url, tls=True) self.auth = AWS4Auth(self.creds['accesskey'], self.creds['secretkey'], url.netloc.split(".")[0], 'hyper') self._version = "1.23"
def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient( base_url=self.docker_url, version=self.api_version, tls=tls_config ) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info('Pulling docker image %s', self.image) for l in self.cli.pull(self.image, stream=True): output = json.loads(l.decode('utf-8').strip()) if 'status' in output: self.log.info("%s", output['status']) with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit), image=self.image, user=self.user, working_dir=self.working_dir ) self.cli.start(self.container['Id']) line = '' for line in self.cli.logs(container=self.container['Id'], stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) result = self.cli.wait(self.container['Id']) if result['StatusCode'] != 0: raise AirflowException('docker container failed: ' + repr(result)) # duplicated conditional logic because of expensive operation if self.do_xcom_push: return self.cli.logs(container=self.container['Id']) \ if self.xcom_all else line.encode('utf-8')
def _clean_project_networks(worker_api, name_prefix, timeout=5): """ Clean cluster node networks All containers with the name prefix will be removed. :param worker_api: Docker daemon url :param name_prefix: image name prefix :param timeout: Time to wait for the response :return: None """ logger.debug("Clean project networks, worker_api={}, prefix={}".format( worker_api, name_prefix)) client = Client(base_url=worker_api, version="auto", timeout=timeout) networks = client.networks(names=["%s_default" % name_prefix]) id_removes = [e['Id'] for e in networks] for network_id in id_removes: client.remove_network(network_id) logger.debug("Remove network id {}".format(network_id))
def start_containers(worker_api, name_prefix, timeout=5): """Start containers with given prefix The chaincode container usually has name with `name_prefix-` as prefix :param worker_api: Docker daemon url :param name_prefix: image name prefix :param timeout: Time to wait for the response :return: None """ logger.debug("Get containers, worker_api={}, prefix={}".format( worker_api, name_prefix)) client = Client(base_url=worker_api, version="auto", timeout=timeout) containers = client.containers(all=True) id_cc = [e['Id'] for e in containers if e['Names'][0].split("/")[-1].startswith(name_prefix)] logger.info(id_cc) for _ in id_cc: client.start(_)
def get_swarm_node_ip(swarm_url, container_name, timeout=5): """ Detect the host ip where the given container locate in the swarm cluster :param swarm_url: Swarm cluster api url :param container_name: The container name :param timeout: Time to wait for the response :return: host ip """ logger.debug("Detect container={} with swarm_url={}".format( container_name, swarm_url)) try: client = Client(base_url=swarm_url, version="auto", timeout=timeout) info = client.inspect_container(container_name) return info['NetworkSettings']['Ports']['5000/tcp'][0]['HostIp'] except Exception as e: logger.error("Exception happens when detect container host!") logger.error(e) return ''
def _clean_exited_containers(worker_api): """ Clean those containers with exited status This is dangerous, as it may delete temporary containers. Only trigger this when no one else uses the system. :param worker_api: Docker daemon url :return: None """ logger.debug("Clean exited containers") client = Client(base_url=worker_api, version="auto") containers = client.containers(quiet=True, all=True, filters={"status": "exited"}) id_removes = [e['Id'] for e in containers] for _ in id_removes: logger.debug("exited container to remove, id={}", _) try: client.remove_container(_) except Exception as e: logger.error("Exception in clean_exited_containers {}".format(e))
def _clean_project_containers(worker_api, name_prefix, timeout=5): """ Clean cluster node containers and chaincode containers All containers with the name prefix will be removed. :param worker_api: Docker daemon url :param name_prefix: image name prefix :param timeout: Time to wait for the response :return: None """ logger.debug("Clean project containers, worker_api={}, prefix={}".format( worker_api, name_prefix)) client = Client(base_url=worker_api, version="auto", timeout=timeout) containers = client.containers(all=True) id_removes = [e['Id'] for e in containers if e['Names'][0].split("/")[-1].startswith(name_prefix)] for _ in id_removes: client.remove_container(_, force=True) logger.debug("Remove container {}".format(_))
def docker_client(environment, version=None, tls_config=None, host=None, tls_version=None): """ Returns a docker-py client configured using environment variables according to the same logic as the official Docker client. """ try: kwargs = kwargs_from_env(environment=environment, ssl_version=tls_version) except TLSParameterError: raise UserError( "TLS configuration is invalid - make sure your DOCKER_TLS_VERIFY " "and DOCKER_CERT_PATH are set correctly.\n" "You might need to run `eval \"$(docker-machine env default)\"`") if host: kwargs['base_url'] = host if tls_config: kwargs['tls'] = tls_config if version: kwargs['version'] = version timeout = environment.get('COMPOSE_HTTP_TIMEOUT') if timeout: kwargs['timeout'] = int(timeout) else: kwargs['timeout'] = HTTP_TIMEOUT kwargs['user_agent'] = generate_user_agent() # Workaround for # https://pyinstaller.readthedocs.io/en/v3.3.1/runtime-information.html#ld-library-path-libpath-considerations if 'LD_LIBRARY_PATH_ORIG' in environment: kwargs['credstore_env'] = { 'LD_LIBRARY_PATH': environment.get('LD_LIBRARY_PATH_ORIG'), } client = APIClient(**kwargs) client._original_base_url = kwargs.get('base_url') return client
def check_daemon(worker_api, timeout=5): """ Check if the daemon is active Only wait for timeout seconds. :param worker_api: Docker daemon url :param timeout: Time to wait for the response :return: True for active, False for inactive """ if not worker_api or not worker_api.startswith("tcp://"): logger.warning("invalid workder_api={}".format(worker_api)) return False segs = worker_api.split(":") if len(segs) != 3: logger.error("Invalid workder api = ", worker_api) return False try: client = Client(base_url=worker_api, version="auto", timeout=timeout) ping_result = client.ping() logger.debug("ping_result={}".format(ping_result)) return ping_result == 'OK' or ping_result is True except Exception as e: logger.error("Exception in check_daemon {}".format(e)) return False
def CreateContainer(): # FIXME check if existing print "Pulling docking image, first run should take long" if GOT_DOCKERPY_API < 2: cli = Client() cli.pull('regit/suri-buildbot') cli.create_container(name='suri-buildbot', image='regit/suri-buildbot', ports=[8010, 22], volumes=['/data/oisf', '/data/buildbot/master/master.cfg']) else: cli = DockerClient() cli.images.pull('regit/suri-buildbot') suri_src_dir = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0] print "Using base src dir: " + suri_src_dir cli.containers.create('regit/suri-buildbot', name='suri-buildbot', ports={'8010/tcp': 8010, '22/tcp': None} , volumes={suri_src_dir: { 'bind': '/data/oisf', 'mode': 'ro'}, os.path.join(suri_src_dir,'qa','docker','buildbot.cfg'): { 'bind': '/data/buildbot/master/master.cfg', 'mode': 'ro'}}, detach = True) sys.exit(0)
def RmContainer(): if GOT_DOCKERPY_API < 2: cli = Client() try: cli.remove_container('suri-buildbot') except: print "Unable to remove suri-buildbot container" pass try: cli.remove_image('regit/suri-buildbot:latest') except: print "Unable to remove suri-buildbot images" pass else: cli = DockerClient() cli.containers.get('suri-buildbot').remove() cli.images.remove('regit/suri-buildbot:latest') sys.exit(0)
def reset_container_host(host_type, worker_api, timeout=15): """ Try to detect the daemon type Only wait for timeout seconds. :param host_type: Type of host: single or swarm :param worker_api: Docker daemon url :param timeout: Time to wait for the response :return: host type info """ try: client = Client(base_url=worker_api, version="auto", timeout=timeout) containers = client.containers(quiet=True, all=True) logger.debug(containers) for c in containers: client.remove_container(c['Id'], force=True) logger.debug("cleaning all containers") except Exception as e: logger.error("Exception happens when reset host!") logger.error(e) return False try: images = client.images(all=True) logger.debug(images) for i in images: if i["RepoTags"][0] == "<none>:<none>": logger.debug(i) try: client.remove_image(i['Id']) except Exception as e: logger.error(e) continue logger.debug("cleaning <none> images") except Exception as e: logger.error("Exception happens when reset host!") logger.error(e) return False return setup_container_host(host_type=host_type, worker_api=worker_api)
def setup_container_host(host_type, worker_api, timeout=5): """ Setup a container host for deploying cluster on it :param host_type: Docker host type :param worker_api: Docker daemon url :param timeout: timeout to wait :return: True or False """ if not worker_api or not worker_api.startswith("tcp://"): logger.error("Invalid worker_api={}".format(worker_api)) return False if host_type not in WORKER_TYPES: logger.error("Invalid host_type={}".format(host_type)) return False try: client = Client(base_url=worker_api, version="auto", timeout=timeout) net_names = [x["Name"] for x in client.networks()] for cs_type in CONSENSUS_PLUGINS_FABRIC_V1: net_name = CLUSTER_NETWORK + "_{}".format(cs_type) if net_name in net_names: logger.warning("Network {} already exists, use it!".format( net_name)) else: if host_type == WORKER_TYPES[0]: # single client.create_network(net_name, driver='bridge') elif host_type == WORKER_TYPES[1]: # swarm client.create_network(net_name, driver='overlay') else: logger.error("No-supported host_type={}".format(host_type)) return False except Exception as e: logger.error("Exception happens!") logger.error(e) return False return True
def start_containers(client: docker.APIClient): configs = tables('docker').data images = ['ubuntu', 'alpine', 'nginx'] ports_delta = 1 for image in images: base_config = { "image": image, "command": "sleep 1d", "detach": True} for conf in configs: if conf.startswith('vol'): if conf == 'vol1' and image != 'alpine': container = client.create_container( host_config=client.create_host_config(binds=configs[conf]), image=image, command=COMMAND, detach=True) else: container = client.create_container( host_config=client.create_host_config(binds=configs[conf]), **base_config) elif conf.startswith('ports'): ports = {} for p in range(configs[conf]): ports.update({9980 + ports_delta: 9980 + ports_delta}) ports.update({str(9981 + ports_delta) + '/udp': 9985 + ports_delta}) ports_delta += 1 container = client.create_container( host_config=client.create_host_config(port_bindings=ports), ports=[*ports], **base_config) elif conf.startswith('labels'): container = client.create_container( labels=configs[conf], **base_config) elif conf == 'privileged': container = client.create_container( host_config=client.create_host_config(privileged=configs[conf]), **base_config) else: entry_config = copy.copy(base_config) entry_config.pop('command') container = client.create_container( entrypoint=configs[conf], **entry_config) client.start(container)
class ModifiedDockerOperator(DockerOperator): """ModifiedDockerOperator supports host temporary directories on OSX. Incorporates https://github.com/apache/airflow/pull/4315/ and an implementation of https://issues.apache.org/jira/browse/AIRFLOW-3825. :param host_tmp_dir: Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. :type host_tmp_dir: str """ def __init__(self, host_tmp_dir='/tmp', **kwargs): self.host_tmp_dir = host_tmp_dir kwargs['xcom_push'] = True super(ModifiedDockerOperator, self).__init__(**kwargs) @contextmanager def get_host_tmp_dir(self): '''Abstracts the tempdir context manager so that this can be overridden.''' with TemporaryDirectory(prefix='airflowtmp', dir=self.host_tmp_dir) as tmp_dir: yield tmp_dir def execute(self, context): '''Modified only to use the get_host_tmp_dir helper.''' self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info('Pulling docker image %s', self.image) for l in self.cli.pull(self.image, stream=True): output = json.loads(l.decode('utf-8').strip()) if 'status' in output: self.log.info("%s", output['status']) with self.get_host_tmp_dir() as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit, ), image=self.image, user=self.user, working_dir=self.working_dir, ) self.cli.start(self.container['Id']) res = [] line = '' for new_line in self.cli.logs(container=self.container['Id'], stream=True): line = new_line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) res.append(line) result = self.cli.wait(self.container['Id']) if result['StatusCode'] != 0: raise AirflowException('docker container failed: ' + repr(result)) if self.xcom_push_flag: # Try to avoid any kind of race condition? return '\n'.join(res) + '\n' if self.xcom_all else str(line) # This is a class-private name on DockerOperator for no good reason -- # all that the status quo does is inhibit extension of the class. # See https://issues.apache.org/jira/browse/AIRFLOW-3880 def __get_tls_config(self): # pylint: disable=no-member return super(ModifiedDockerOperator, self)._DockerOperator__get_tls_config()
def docker(): return APIClient(version='auto')
def setup_class(cls): # Build the image from scratch cls.client = APIClient(base_url="unix://var/run/docker.sock") response = cls.client.build(path=REPO_DIR, dockerfile=os.path.join( "dockerfiles", "Dockerfile"), tag=IMAGE_NAME, rm=True, nocache=True, pull=True, stream=False) for line in response: print line, # Create a network for both the controller + server to run in cls.client.create_network(NETWORK_NAME, driver="bridge", attachable=True) networking_config = cls.client.create_networking_config( {NETWORK_NAME: cls.client.create_endpoint_config()}) # Start the controller print 'creating controller' host_config = cls.client.create_host_config(auto_remove=True, publish_all_ports=True) container = cls.client.create_container( image=IMAGE_NAME, command="controller", host_config=host_config, networking_config=networking_config) cls.client.start(container["Id"]) TestEventgenOrchestration.controller_id = container["Id"] print container["Id"] cls.controller_container = cls.client.inspect_container( container["Id"]) cls.controller_eventgen_webport = cls.controller_container[ "NetworkSettings"]["Ports"]["9500/tcp"][0]["HostPort"] cls.controller_rabbitmq_webport = cls.controller_container[ "NetworkSettings"]["Ports"]["15672/tcp"][0]["HostPort"] # Start the server print 'creating server' container = cls.client.create_container( image=IMAGE_NAME, command="server", environment=[ "EVENTGEN_AMQP_HOST={}".format( cls.controller_container["Id"][:12]) ], host_config=host_config, networking_config=networking_config) cls.client.start(container["Id"]) TestEventgenOrchestration.server_id = container["Id"] print container["Id"] cls.server_container = cls.client.inspect_container(container["Id"]) cls.server_eventgen_webport = cls.server_container["NetworkSettings"][ "Ports"]["9500/tcp"][0]["HostPort"] cls.server_rabbitmq_webport = cls.server_container["NetworkSettings"][ "Ports"]["15672/tcp"][0]["HostPort"] # Wait for the controller to be available wait_for_response("http://127.0.0.1:{}".format( cls.controller_eventgen_webport)) # Wait for the server to be available wait_for_response("http://127.0.0.1:{}".format( cls.server_eventgen_webport))
def __init__(self): self.client = APIClient('unix://var/run/docker.sock') self.filtered_statuses = ('running', 'restarting', 'paused', 'exited') self.config = Config()
class DockerOperator(BaseOperator): """ Execute a command inside a docker container. A temporary directory is created on the host and mounted into a container to allow storing files that together exceed the default disk size of 10GB in a container. The path to the mounted directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``. If a login to a private registry is required prior to pulling the image, a Docker connection needs to be configured in Airflow and the connection ID be provided with the parameter ``docker_conn_id``. :param image: Docker image from which to create the container. If image tag is omitted, "latest" will be used. :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. :type api_version: str :param command: Command to be run in the container. (templated) :type command: str or list :param container_name: Name of the container. :type container_name: str :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. See https://docs.docker.com/engine/reference/run/#cpu-share-constraint :type cpus: float :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock :type docker_url: str :param environment: Environment variables to set in the container. (templated) :type environment: dict :param force_pull: Pull the docker image on every run. Default is False. :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. :type mem_limit: float or str :param host_tmp_dir: Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. :type host_tmp_dir: str :param network_mode: Network mode for the container. :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. :type tmp_dir: str :param user: Default user inside the docker container. :type user: int or str :param volumes: List of volumes to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. :type volumes: list :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) :type working_dir: str :param xcom_all: Push all the stdout or just the last line. The default is False (last line). :type xcom_all: bool :param docker_conn_id: ID of the Airflow connection to use :type docker_conn_id: str :param dns: Docker custom DNS servers :type dns: list[str] :param dns_search: Docker custom DNS search domain :type dns_search: list[str] :param auto_remove: Auto-removal of the container on daemon side when the container's process exits. The default is False. :type auto_remove: bool :param shm_size: Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. :type shm_size: int """ template_fields = ('command', 'environment',) template_ext = ('.sh', '.bash',) @apply_defaults def __init__( self, image: str, api_version: str = None, command: Union[str, List[str]] = None, container_name: str = None, cpus: float = 1.0, docker_url: str = 'unix://var/run/docker.sock', environment: Dict = None, force_pull: bool = False, mem_limit: Union[float, str] = None, host_tmp_dir: str = None, network_mode: str = None, tls_ca_cert: str = None, tls_client_cert: str = None, tls_client_key: str = None, tls_hostname: Union[str, bool] = None, tls_ssl_version: str = None, tmp_dir: str = '/tmp/airflow', user: Union[str, int] = None, volumes: Iterable[str] = None, working_dir: str = None, xcom_all: bool = False, docker_conn_id: str = None, dns: List[str] = None, dns_search: List[str] = None, auto_remove: bool = False, shm_size: int = None, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.api_version = api_version self.auto_remove = auto_remove self.command = command self.container_name = container_name self.cpus = cpus self.dns = dns self.dns_search = dns_search self.docker_url = docker_url self.environment = environment or {} self.force_pull = force_pull self.image = image self.mem_limit = mem_limit self.host_tmp_dir = host_tmp_dir self.network_mode = network_mode self.tls_ca_cert = tls_ca_cert self.tls_client_cert = tls_client_cert self.tls_client_key = tls_client_key self.tls_hostname = tls_hostname self.tls_ssl_version = tls_ssl_version self.tmp_dir = tmp_dir self.user = user self.volumes = volumes or [] self.working_dir = working_dir self.xcom_all = xcom_all self.docker_conn_id = docker_conn_id self.shm_size = shm_size if kwargs.get('xcom_push') is not None: raise AirflowException("'xcom_push' was deprecated, use 'BaseOperator.do_xcom_push' instead") self.cli = None self.container = None def get_hook(self): return DockerHook( docker_conn_id=self.docker_conn_id, base_url=self.docker_url, version=self.api_version, tls=self.__get_tls_config() ) def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient( base_url=self.docker_url, version=self.api_version, tls=tls_config ) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info('Pulling docker image %s', self.image) for l in self.cli.pull(self.image, stream=True): output = json.loads(l.decode('utf-8').strip()) if 'status' in output: self.log.info("%s", output['status']) with TemporaryDirectory(prefix='airflowtmp', dir=self.host_tmp_dir) as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), name=self.container_name, environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit), image=self.image, user=self.user, working_dir=self.working_dir ) self.cli.start(self.container['Id']) line = '' for line in self.cli.attach(container=self.container['Id'], stdout=True, stderr=True, stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) result = self.cli.wait(self.container['Id']) if result['StatusCode'] != 0: raise AirflowException('docker container failed: ' + repr(result)) # duplicated conditional logic because of expensive operation if self.do_xcom_push: return self.cli.logs(container=self.container['Id']) \ if self.xcom_all else line.encode('utf-8') def get_command(self): if isinstance(self.command, str) and self.command.strip().find('[') == 0: commands = ast.literal_eval(self.command) else: commands = self.command return commands def on_kill(self): if self.cli is not None: self.log.info('Stopping docker container') self.cli.stop(self.container['Id']) def __get_tls_config(self): tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig( ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname ) self.docker_url = self.docker_url.replace('tcp://', 'https://') return tls_config
class Portal(object): def __init__(self): self._docker_client = APIClient() self._kill_now = False self._container_id = None self._std_in = None signal.signal(signal.SIGINT, self._exit_gracefully) signal.signal(signal.SIGTERM, self._exit_gracefully) def _cleanup(self): self._kill_now = True if (self._container_id is not None): self._docker_client.stop(self._container_id) self._docker_client.remove_container(self._container_id, v=True, force=True) def _exit_gracefully(self, signum, frame): self._cleanup() # Bad code to capture whether stdin is set or not def _capture_stdin(self): if select.select([sys.stdin, ], [], [], 0.0)[0]: self._std_in = sys.stdin.buffer.read() elif not sys.stdin.isatty(): self._std_in = sys.stdin.buffer.read() def _download_docker_image(self, command, docker_spec): docker_image_name = None if (docker_spec['image'] == 'Dockerfile'): docker_image_name = "portal/" + command try: image_data = self._docker_client.inspect_image(docker_image_name) return image_data except ImageNotFound: dockerfile = pkgutil.get_data( __name__, "commands/%s/Dockerfile" % command ).decode('utf-8') f = BytesIO(dockerfile.encode('utf-8')) for progress_dict in self._docker_client.build(fileobj=f, quiet=True, tag=docker_image_name, decode=True, rm=True): print(progress_dict) # if ('progress' in progress_dict): # print(progress_dict['progress']) else: docker_image_name = docker_spec['image'] try: image_data = self._docker_client.inspect_image(docker_image_name) return image_data except ImageNotFound: print('Pulling Docker Image...') for progress_dict in self._docker_client.pull(docker_spec['image'], stream=True, decode=True): print(progress_dict['status']) if ('progress' in progress_dict): print(progress_dict['progress']) return self._docker_client.inspect_image(docker_image_name) def _parse_args(self, spec_data, argv): parser = generate_argparse(spec_data['command'], spec_data['arguments']) cmd_options = vars(parser.parse_args(argv)) cmd_args = cmd_options['cmdargs'] for argkey in spec_data['arguments'].keys(): if (spec_data['arguments'][argkey]['shorthand'] == '*'): if (len(cmd_args) > 0): spec_data['arguments'][argkey]['value'] = cmd_args[0] if ('File' in spec_data['arguments'][argkey]['docker']): cmd_args = [os.path.join(spec_data['docker']['working_dir'], cmd_options['cmdargs'][0])] continue spec_data['arguments'][argkey]['value'] = cmd_options[spec_data['arguments'] [argkey]['shorthand']] cmd_args += merge_passthrough_vars(spec_data) return spec_data, cmd_args def _validate_spec(self, spec_data): for _, vargs in spec_data['arguments'].items(): if (vargs['argType'] == 'path' and vargs['docker'] == 'volumeBinding'): # Check if path exists # if (not os.path.isfile(vargs['value'])): #TODO: Fix! # print('Error: Path %s does not exist!' % vargs['value']) # exit(101) pass def _create_container(self, cinfo, attach_stdin): host_config = self._docker_client.create_host_config( port_bindings=cinfo.port_bindings, binds=cinfo.vol_bindings ) return self._docker_client.create_container( cinfo.container_id, command=cinfo.command, ports=cinfo.ports, environment=cinfo.environment_vars, stdin_open=attach_stdin, volumes=cinfo.volumes, # tty=True, host_config=host_config ) def _copy_artefacts_to_container(self, container_id, command_spec): def copy_file(input_path, input_name, output_path): tar_name = str(uuid.uuid4()) + '.tar' tf = tarfile.open(tar_name, mode='w') if (os.path.isfile(input_path)): tf.add(input_path, arcname=input_name) else: print("Could not find file %s " % input_path) tf.close() os.remove(tar_name) return False tf.close() with open(tar_name, 'rb') as tar_file: data = tar_file.read() self._docker_client.put_archive(container_id, output_path, data) os.remove(tar_name) for file in get_input_files(command_spec): copy_file(file['value'], file['value'], command_spec['docker']['working_dir']) home = str(Path.home()) for file in get_input_env_files(command_spec): copy_file(os.path.join(home, file['name']), file['name'], '/root') return True def _copy_artefacts_from_container(self, container_id, command_spec): def copy_file(input_file, output_path): tar_name = str(uuid.uuid4()) + '.tar' f = open(tar_name, 'wb') bits, _ = self._docker_client.get_archive( container_id, input_file) for chunk in bits: f.write(chunk) f.close() tar = tarfile.open(tar_name) tar.extractall() tar.close() os.remove(tar_name) for file in get_output_files(command_spec): copy_file(os.path.join(command_spec['docker']['working_dir'], file['value']), None) for file in get_output_env_files(command_spec): copy_file(os.path.join('/root/', file['name']), None) def run_command(self, command, argv): command_spec = None try: spec_data = pkgutil.get_data( __name__, "commands/%s/spec.toml" % command ).decode('utf-8') command_spec = toml.loads(spec_data) except FileNotFoundError: print('Command not found') return 101 self._capture_stdin() command_spec, cmd_argv = self._parse_args(command_spec, argv) self._validate_spec(command_spec) image_info = self._download_docker_image(command, command_spec['docker']) cinfo = construct_container(image_info, cmd_argv, command_spec) docker_container = self._create_container(cinfo, (self._std_in is not None)) if (len(docker_container.get('Warnings')) > 0): print('Could not start container. Warnings: %s', ' '.join(docker_container.get('Warnings'))) return 101 self._container_id = docker_container.get('Id') print('Process created in container: %s' % self._container_id) if (not self._copy_artefacts_to_container(self._container_id, command_spec)): self._cleanup() return 101 if (self._std_in is not None): s = self._docker_client.attach_socket(self._container_id, params={'stdin': 1, 'stream': 1}) os.write(s.fileno(), self._std_in) # s._sock.sendall(self._std_in) s.close() ## Attaching stdin self._docker_client.start(container=self._container_id) for log in self._docker_client.logs( container=self._container_id, stream=True, follow=True): sys.stdout.buffer.write(log) self._docker_client.wait(container=self._container_id) self._copy_artefacts_from_container(self._container_id, command_spec) self._docker_client.remove_container(container=self._container_id) return 0
class DockerNode(CommonNode): """ An instance of this class will create a detached Docker container. This node binds the ``shared_dir_mount`` directory of the container to a local path in the host system defined in ``self.shared_dir``. :param str identifier: Node unique identifier in the topology being built. :param str image: The image to run on this node, in the form ``repository:tag``. :param str registry: Docker registry to pull image from. :param str command: The command to run when the container is brought up. :param str binds: Directories to bind for this container separated by a ``;`` in the form: :: '/tmp:/tmp;/dev/log:/dev/log;/sys/fs/cgroup:/sys/fs/cgroup' :param str network_mode: Network mode for this container. :param str hostname: Container hostname. :param environment: Environment variables to pass to the container. They can be set as a list of strings in the following format: :: ['environment_variable=value'] or as a dictionary in the following format: :: {'environment_variable': 'value'} :type environment: list or dict :param bool privileged: Run container in privileged mode or not. :param bool tty: Whether to allocate a TTY or not to the process. :param str shared_dir_base: Base path in the host where the shared directory will be created. The shared directory will always have the name of the container inside this directory. :param str shared_dir_mount: Mount point of the shared directory in the container. :param dict create_host_config_kwargs: Extra kwargs arguments to pass to docker-py's ``create_host_config()`` low-level API call. :param dict create_container_kwargs: Extra kwargs arguments to pass to docker-py's ``create_container()`` low-level API call. Read only public attributes: :var str image: Name of the Docker image being used by this node. Same as the ``image`` keyword argument. :var str container_id: Unique container identifier assigned by the Docker daemon in the form of a hash. :var str container_name: Unique container name assigned by the framework in the form ``{identifier}_{pid}_{timestamp}``. :var str shared_dir: Share directory in the host for this container. Always ``/tmp/topology/{container_name}``. :var str shared_dir_mount: Directory inside the container where the ``shared_dir`` is mounted. Same as the ``shared_dir_mount`` keyword .. automethod:: _get_network_config """ @abstractmethod def __init__(self, identifier, image='ubuntu:latest', registry=None, command='bash', binds=None, network_mode='none', hostname=None, environment=None, privileged=True, tty=True, shared_dir_base='/tmp/topology/docker/', shared_dir_mount='/var/topology', create_host_config_kwargs=None, create_container_kwargs=None, **kwargs): super(DockerNode, self).__init__(identifier, **kwargs) self._pid = None self._image = image self._registry = registry self._command = command self._hostname = hostname self._environment = environment self._client = APIClient(version='auto') self._container_name = '{identifier}_{pid}_{timestamp}'.format( identifier=identifier, pid=getpid(), timestamp=datetime.now().isoformat().replace(':', '-')) self._shared_dir_base = shared_dir_base self._shared_dir_mount = shared_dir_mount self._shared_dir = join(shared_dir_base, self._container_name) self._create_host_config_kwargs = create_host_config_kwargs or {} self._create_container_kwargs = create_container_kwargs or {} # Autopull docker image if necessary self._autopull() # Create shared directory ensure_dir(self._shared_dir) # Add binded directories container_binds = [ '{}:{}'.format(self._shared_dir, self._shared_dir_mount) ] if binds is not None: container_binds.extend(binds.split(';')) # Create host config create_host_config_call = { 'privileged': privileged, 'network_mode': network_mode, 'binds': container_binds, 'init': True } create_host_config_call.update(self._create_host_config_kwargs) self._host_config = self._client.create_host_config( **create_host_config_call) # Create container create_container_call = { 'image': self._image, 'command': self._command, 'name': self._container_name, 'detach': True, 'tty': tty, 'hostname': self._hostname, 'host_config': self._host_config, 'environment': self._environment, } create_container_call.update(self._create_container_kwargs) self._container_id = self._client.create_container( **create_container_call)['Id'] @property def image(self): return self._image @property def container_id(self): return self._container_id @property def container_name(self): return self._container_name @property def shared_dir(self): return self._shared_dir @property def shared_dir_mount(self): return self._shared_dir_mount def _get_network_config(self): """ Defines the network configuration for nodes of this type. This method should be overriden when implementing a new node type to return a dictionary with its network configuration by setting the following components: 'mapping' This is a dictionary of dictionaries, each parent-level key defines one network category, and each category *must* have these three keys: **netns**, **managed_by**, and **prefix**, and *can* (optionally) have a **connect_to** key). 'netns' Specifies the network namespace (inside the docker container) where all the ports belonging to this category will be moved after their creation. If set to None, then the ports will remain in the container's default network namespace. 'managed_by' Specifies who will manage different aspects of this network category depending on its value (which can be either **docker** or **platform**). 'docker' This network category will represent a network created by docker (identical to using the docker network create command) and will be visible to docker (right now all docker-managed networks are created using docker's 'bridge' built-in network plugin, this will likely change in the near future). 'platform' This network category will represent ports created by the Docker Platform Engine and is invisible to docker. 'prefix' Defines a prefix that will be used when a port/interface is moved into a namespace, its value can be set to '' (empty string) if no prefix is needed. In cases where the parent network category doesn't have a netns (i.e. 'netns' is set to None) this value will be ignored. 'connect_to' Specifies a Docker network this category will be connected to, if this network doesn't exists it will be created. If set to None, this category will be connected to a uniquely named Docker network that will be created by the platform. 'default_category' Every port that didn't explicitly set its category (using the "category" attribute in the SZN definition) will be set to this category. This is an example of a network configuration dictionary as expected to be returned by this funcition:: { 'default_category': 'front_panel', 'mapping': { 'oobm': { 'netns': 'oobmns', 'managed_by': 'docker', 'connect_to': 'oobm' 'prefix': '' }, 'back_panel': { 'netns': None, 'managed_by': 'docker', 'prefix': '' }, 'front_panel': { 'netns': 'front', 'managed_by': 'platform', 'prefix': 'f_' } } } :returns: The dictionary defining the network configuration. :rtype: dict """ return { 'default_category': 'front_panel', 'mapping': { 'oobm': { 'netns': None, 'managed_by': 'docker', 'prefix': '' }, 'front_panel': { 'netns': 'front_panel', 'managed_by': 'platform', 'prefix': '' } } } def _autopull(self): """ Autopulls the docker image of the node, if necessary. """ # Search for image in available images for tags in [img['RepoTags'] for img in self._client.images()]: # Docker py can return repo tags as None if tags and self._image in tags: return # Determine image parts registry = self._registry image = self._image tag = 'latest' if ':' in image: image, tag = image.split(':') # Pull image pull_uri = image if registry: pull_uri = '{}/{}'.format(registry, image) pull_name = '{}:{}'.format(pull_uri, tag) log.info('Trying to pull image {} ...'.format(pull_name)) last = '' for line in self._client.pull(pull_uri, tag=tag, stream=True): last = line status = loads(last.decode('utf8')) log.debug('Pulling result :: {}'.format(status)) if 'error' in status: raise Exception(status['error']) # Retag if required if pull_name != self._image: if not self._client.tag(pull_name, image, tag): raise Exception( 'Error when tagging image {} with tag {}:{}'.format( pull_name, image, tag)) log.info('Tagged image {} with tag {}:{}'.format( pull_name, image, tag)) def _docker_exec(self, command): """ Execute a command inside the docker. :param str command: The command to execute. """ log.debug('[{}]._docker_exec(\'{}\') ::'.format( self._container_id, command)) response = check_output( shsplit('docker exec {container_id} {command}'.format( container_id=self._container_id, command=command.strip()))).decode('utf8') log.debug(response) return response def _get_services_address(self): """ Get the service address of the node using Docker's inspect mechanism to grab OOBM interface address. :return: The address (IP or FQDN) of the services interface (oobm). :rtype: str """ network_name = self._container_name + '_oobm' address = self._client.inspect_container( self.container_id )['NetworkSettings']['Networks'][network_name]['IPAddress'] return address def notify_add_biport(self, node, biport): """ Get notified that a new biport was added to this engine node. :param node: The specification node that spawn this engine node. :type node: pynml.nml.Node :param biport: The specification bidirectional port added. :type biport: pynml.nml.BidirectionalPort :rtype: str :return: The assigned interface name of the port. """ network_config = self._get_network_config() category = biport.metadata.get('category', network_config['default_category']) category_config = network_config['mapping'][category] if category_config['managed_by'] == 'docker': netname = category_config.get( 'connect_to', '{}_{}'.format(self._container_name, category)) return get_iface_name(self, netname) else: return biport.metadata.get('label', biport.identifier) def notify_add_bilink(self, nodeport, bilink): """ Get notified that a new bilink was added to a port of this engine node. :param nodeport: A tuple with the specification node and port being linked. :type nodeport: (pynml.nml.Node, pynml.nml.BidirectionalPort) :param bilink: The specification bidirectional link added. :type bilink: pynml.nml.BidirectionalLink """ def notify_post_build(self): """ Get notified that the post build stage of the topology build was reached. """ # Log container data image_data = self._client.inspect_image(image=self._image) log.info('Started container {}:\n' ' Image name: {}\n' ' Image id: {}\n' ' Image creation date: {}' ' Image tags: {}'.format( self._container_name, self._image, image_data.get('Id', '????'), image_data.get('Created', '????'), ', '.join(image_data.get('RepoTags', [])))) container_data = self._client.inspect_container( container=self._container_id) log.debug(container_data) def start(self): """ Start the docker node and configures a netns for it. """ self._client.start(self._container_id) self._pid = self._client.inspect_container( self._container_id)['State']['Pid'] def stop(self): """ Request container to stop. """ self._client.stop(self._container_id) self._client.wait(self._container_id) self._client.remove_container(self._container_id) def disable(self): """ Disable the node. In Docker implementation this pauses the container. """ for portlbl in self.ports: self.set_port_state(portlbl, False) self._client.pause(self._container_id) def enable(self): """ Enable the node. In Docker implementation this unpauses the container. """ self._client.unpause(self._container_id) for portlbl in self.ports: self.set_port_state(portlbl, True) def set_port_state(self, portlbl, state): """ Set the given port label to the given state. :param str portlbl: The label of the port. :param bool state: True for up, False for down. """ iface = self.ports[portlbl] state = 'up' if state else 'down' command = ('ip netns exec front_panel ' 'ip link set dev {iface} {state}'.format(**locals())) self._docker_exec(command)
def __init__(self, identifier, image='ubuntu:latest', registry=None, command='bash', binds=None, network_mode='none', hostname=None, environment=None, privileged=True, tty=True, shared_dir_base='/tmp/topology/docker/', shared_dir_mount='/var/topology', create_host_config_kwargs=None, create_container_kwargs=None, **kwargs): super(DockerNode, self).__init__(identifier, **kwargs) self._pid = None self._image = image self._registry = registry self._command = command self._hostname = hostname self._environment = environment self._client = APIClient(version='auto') self._container_name = '{identifier}_{pid}_{timestamp}'.format( identifier=identifier, pid=getpid(), timestamp=datetime.now().isoformat().replace(':', '-')) self._shared_dir_base = shared_dir_base self._shared_dir_mount = shared_dir_mount self._shared_dir = join(shared_dir_base, self._container_name) self._create_host_config_kwargs = create_host_config_kwargs or {} self._create_container_kwargs = create_container_kwargs or {} # Autopull docker image if necessary self._autopull() # Create shared directory ensure_dir(self._shared_dir) # Add binded directories container_binds = [ '{}:{}'.format(self._shared_dir, self._shared_dir_mount) ] if binds is not None: container_binds.extend(binds.split(';')) # Create host config create_host_config_call = { 'privileged': privileged, 'network_mode': network_mode, 'binds': container_binds, 'init': True } create_host_config_call.update(self._create_host_config_kwargs) self._host_config = self._client.create_host_config( **create_host_config_call) # Create container create_container_call = { 'image': self._image, 'command': self._command, 'name': self._container_name, 'detach': True, 'tty': tty, 'hostname': self._hostname, 'host_config': self._host_config, 'environment': self._environment, } create_container_call.update(self._create_container_kwargs) self._container_id = self._client.create_container( **create_container_call)['Id']
def run_top_container(client: APIClient): c = client.create_container( constant.ALPINE, command="top", detach=True, tty=True, name="top" ) client.start(c.get("Id")) return c.get("Id")
def remove_all_images(client: APIClient): for image in client.images(quiet=True): client.remove_image(image, force=True)
class DockerHelper: def __init__(self, config): super().__init__() self.__padlock = threading.Lock() self.__check_in_progress = False self.__config = config self.__client = APIClient(base_url=config.docker_socket, timeout=config.docker_req_timeout_sec) self.__params_cache = {} self.last_check_containers_run_end_timestamp = datetime.datetime.min self.last_check_containers_run_start_timestamp = datetime.datetime.min self.last_check_containers_run_time = datetime.timedelta.min self.last_periodic_run_ok = False def check_container(self, container_id, remove_from_cache=False): try: if remove_from_cache: self.remove_from_cache(container_id) if not self.__config.disable_params: params = self.get_params(container_id) else: params = {} if not self.__config.disable_metrics: logger.debug("[{0}] Starting to fetch metrics for {1}".format(threading.current_thread().name, container_id)) metrics = self.__client.stats(container=container_id, decode=True, stream=False) else: metrics = {} logger.debug("[{0}] Fetched data for container {1}".format(threading.current_thread().name, container_id)) except NotFound as e: logger.warning("Container {0} not found - {1}.".format(container_id, e)) return None except (ReadTimeout, ProtocolError, JSONDecodeError) as e: logger.error("Communication error when fetching info about container {0}: {1}".format(container_id, e)) return None except Exception as e: logger.error("Unexpected error when fetching info about container {0}: {1}".format(container_id, e)) return None return Container(container_id, params, metrics, 0) def check_containers(self): with self.__padlock: if self.__check_in_progress: logger.warning("[{0}] Previous check did not yet complete, consider increasing CHECK_INTERVAL_S" .format(threading.current_thread().name)) return self.__check_in_progress = True logger.debug("Periodic check start: connecting to get the list of containers") self.last_check_containers_run_start_timestamp = datetime.datetime.utcnow() try: containers = self.__client.containers(quiet=True) logger.debug("[{0}] Fetched containers list from docker daemon".format(threading.current_thread().name)) except (ReadTimeout, ProtocolError, JSONDecodeError) as e: logger.error("Timeout while trying to get list of containers from docker: {0}".format(e)) with self.__padlock: self.__check_in_progress = False self.last_periodic_run_ok = False return except Exception as e: logger.error("Unexpected error while trying to get list of containers from docker: {0}".format(e)) with self.__padlock: self.__check_in_progress = False self.last_periodic_run_ok = False return ids = [container['Id'] for container in containers] for container_id in ids: container = self.check_container(container_id) if container is None: continue yield container logger.debug("Containers checked") if self.__config.cache_params: logger.debug("Purging cache") self.purge_cache(ids) self.last_periodic_run_ok = True self.last_check_containers_run_end_timestamp = datetime.datetime.utcnow() self.last_check_containers_run_time = self.last_check_containers_run_end_timestamp \ - self.last_check_containers_run_start_timestamp logger.debug("Periodic check done") with self.__padlock: self.__check_in_progress = False def get_params(self, container_id): if self.__config.cache_params and container_id in self.__params_cache: logger.debug("Returning cached params for container {0}".format(container_id)) return self.__params_cache[container_id] logger.debug("[{0}] Starting to fetch params for {1}".format(threading.current_thread().name, container_id)) try: params = self.__client.inspect_container(container_id) except NotFound as e: logger.warning("Container {0} not found - {1}.".format(container_id, e)) return None except (ReadTimeout, ProtocolError, JSONDecodeError) as e: logger.error("Communication error when fetching params for container {0}: {1}".format(container_id, e)) return {} except Exception as e: logger.error("Unexpected error when fetching params for container {0}: {1}".format(container_id, e)) return {} logger.debug("[{0}] Params fetched for {1}".format(threading.current_thread().name, container_id)) if not self.__config.cache_params: return params logger.debug("[{0}] Storing params of {1} in cache".format(threading.current_thread().name, container_id)) self.__params_cache[container_id] = params return params def purge_cache(self, running_container_ids): diff = [c for c in self.__params_cache.keys() if c not in running_container_ids] for cid in diff: self.__params_cache.pop(cid, None) def remove_from_cache(self, container_id): self.__params_cache.pop(container_id, None) def get_events_observable(self): successful = False ev = None while not successful: try: ev = self.__client.events(decode=True) except (ReadTimeout, ProtocolError, JSONDecodeError) as e: logger.error("Communication error when subscribing for container events, retrying in 5s: {0}".format(e)) time.sleep(5) except Exception as e: logger.error("Unexpected error when subscribing for container events, retrying in 5s: {0}".format(e)) time.sleep(5) successful = True return ev def kill_container(self, container): try: self.__client.stop(container.params['Id']) except (ReadTimeout, ProtocolError) as e: logger.error("Communication error when stopping container {0}: {1}".format(container.cid, e)) except Exception as e: logger.error("Unexpected error when stopping container {0}: {1}".format(container.cid, e))
def get_conn(self) -> APIClient: client = APIClient(base_url=self.__base_url, version=self.__version, tls=self.__tls) self.__login(client) return client
class DockerBuilder(BuilderInterface): """A builder using the local Docker client""" def __init__(self, repository, image_name=DEFAULT_IMAGE_NAME, image_tag=None, base_image=None, dockerfile_path=None): self.repository = repository self.image_name = image_name self.base_image = base_image self.dockerfile_path = dockerfile_path if image_tag is None: self.image_tag = utils.get_unique_tag() else: self.image_tag = image_tag self.full_image_name = utils.get_image_full_name( self.repository, self.image_name, self.image_tag) self.docker_client = None def generate_pod_spec(self): """return a V1PodSpec initialized with the proper container""" return client.V1PodSpec(containers=[ client.V1Container( name='model', image=self.full_image_name, ) ], restart_policy='Never') def execute(self): write_dockerfile(dockerfile_path=self.dockerfile_path, base_image=self.base_image) self.docker_client = APIClient(version='auto') self.build() self.publish() def build(self): logger.warn('Building docker image {}...'.format(self.full_image_name)) bld = self.docker_client.build(path='.', tag=self.full_image_name, encoding='utf-8') for line in bld: self._process_stream(line) def publish(self): logger.warn('Publishing image {}...'.format(self.full_image_name)) for line in self.docker_client.push(self.full_image_name, stream=True): self._process_stream(line) def _process_stream(self, line): raw = line.decode('utf-8').strip() lns = raw.split('\n') for ln in lns: try: ljson = json.loads(ln) if ljson.get('error'): msg = str(ljson.get('error', ljson)) logger.error('Build failed: ' + msg) raise Exception('Image build failed: ' + msg) else: if ljson.get('stream'): msg = 'Build output: {}'.format( ljson['stream'].strip()) elif ljson.get('status'): msg = 'Push output: {} {}'.format( ljson['status'], ljson.get('progress')) elif ljson.get('aux'): msg = 'Push finished: {}'.format(ljson.get('aux')) else: msg = str(ljson) logger.info(msg) except json.JSONDecodeError: logger.warning('JSON decode error: {}'.format(ln))
class DockerBuilder(object): LATEST_IMAGE_TAG = 'latest' WORKDIR = '/code' def __init__(self, build_job, repo_path, from_image, copy_code=True, build_steps=None, env_vars=None, dockerfile_name='Dockerfile'): self.build_job = build_job self.job_uuid = build_job.uuid.hex self.job_name = build_job.unique_name self.from_image = from_image self.image_name = get_image_name(self.build_job) self.image_tag = self.job_uuid self.folder_name = repo_path.split('/')[-1] self.repo_path = repo_path self.copy_code = copy_code self.build_path = '/'.join(self.repo_path.split('/')[:-1]) self.build_steps = get_list(build_steps) self.env_vars = get_list(env_vars) self.dockerfile_path = os.path.join(self.build_path, dockerfile_name) self.polyaxon_requirements_path = self._get_requirements_path() self.polyaxon_setup_path = self._get_setup_path() self.docker = APIClient(version='auto') self.registry_host = None self.docker_url = None def get_tagged_image(self): return get_tagged_image(self.build_job) def check_image(self): return self.docker.images(self.get_tagged_image()) def clean(self): # Clean dockerfile delete_path(self.dockerfile_path) def login_internal_registry(self): try: self.docker.login(username=settings.REGISTRY_USER, password=settings.REGISTRY_PASSWORD, registry=settings.REGISTRY_HOST, reauth=True) except DockerException as e: _logger.exception('Failed to connect to registry %s\n', e) def login_private_registries(self): if not settings.PRIVATE_REGISTRIES: return for registry in settings.PRIVATE_REGISTRIES: self.docker.login(username=registry.user, password=registry.password, registry=registry.host, reauth=True) @staticmethod def _prepare_log_lines(log_line): raw = log_line.decode('utf-8').strip() raw_lines = raw.split('\n') log_lines = [] for raw_line in raw_lines: try: json_line = json.loads(raw_line) if json_line.get('error'): raise DockerBuilderError(str(json_line.get('error', json_line))) else: if json_line.get('stream'): log_lines.append('Build: {}'.format(json_line['stream'].strip())) elif json_line.get('status'): log_lines.append('Push: {} {}'.format( json_line['status'], json_line.get('progress') )) elif json_line.get('aux'): log_lines.append('Push finished: {}'.format(json_line.get('aux'))) else: log_lines.append(str(json_line)) except json.JSONDecodeError: log_lines.append('JSON decode error: {}'.format(raw_line)) return log_lines def _handle_logs(self, log_lines): publisher.publish_build_job_log( log_lines=log_lines, job_uuid=self.job_uuid, job_name=self.job_name ) def _handle_log_stream(self, stream): log_lines = [] last_emit_time = time.time() try: for log_line in stream: log_lines += self._prepare_log_lines(log_line) publish_cond = ( len(log_lines) == publisher.MESSAGES_COUNT or (log_lines and time.time() - last_emit_time > publisher.MESSAGES_TIMEOUT) ) if publish_cond: self._handle_logs(log_lines) log_lines = [] last_emit_time = time.time() if log_lines: self._handle_logs(log_lines) except (BuildError, APIError, DockerBuilderError) as e: self._handle_logs('Build Error {}'.format(e)) return False return True def _get_requirements_path(self): def get_requirements(requirements_file): requirements_path = os.path.join(self.repo_path, requirements_file) if os.path.isfile(requirements_path): return os.path.join(self.folder_name, requirements_file) requirements = get_requirements('polyaxon_requirements.txt') if requirements: return requirements requirements = get_requirements('requirements.txt') if requirements: return requirements return None def _get_setup_path(self): def get_setup(setup_file): setup_file_path = os.path.join(self.repo_path, setup_file) has_setup = os.path.isfile(setup_file_path) if has_setup: st = os.stat(setup_file_path) os.chmod(setup_file_path, st.st_mode | stat.S_IEXEC) return os.path.join(self.folder_name, setup_file) setup_file = get_setup('polyaxon_setup.sh') if setup_file: return setup_file setup_file = get_setup('setup.sh') if setup_file: return setup_file return None def render(self): docker_template = jinja2.Template(POLYAXON_DOCKER_TEMPLATE) return docker_template.render( from_image=self.from_image, polyaxon_requirements_path=self.polyaxon_requirements_path, polyaxon_setup_path=self.polyaxon_setup_path, build_steps=self.build_steps, env_vars=self.env_vars, folder_name=self.folder_name, workdir=self.WORKDIR, nvidia_bin=settings.MOUNT_PATHS_NVIDIA.get('bin'), copy_code=self.copy_code ) def build(self, nocache=False, memory_limit=None): _logger.debug('Starting build in `%s`', self.repo_path) # Checkout to the correct commit if self.image_tag != self.LATEST_IMAGE_TAG: git.checkout_commit(repo_path=self.repo_path, commit=self.image_tag) limits = { # Always disable memory swap for building, since mostly # nothing good can come of that. 'memswap': -1 } if memory_limit: limits['memory'] = memory_limit # Create DockerFile with open(self.dockerfile_path, 'w') as dockerfile: rendered_dockerfile = self.render() celery_app.send_task( SchedulerCeleryTasks.BUILD_JOBS_SET_DOCKERFILE, kwargs={'build_job_uuid': self.job_uuid, 'dockerfile': rendered_dockerfile}) dockerfile.write(rendered_dockerfile) stream = self.docker.build( path=self.build_path, tag=self.get_tagged_image(), forcerm=True, rm=True, pull=True, nocache=nocache, container_limits=limits) return self._handle_log_stream(stream=stream) def push(self): stream = self.docker.push(self.image_name, tag=self.image_tag, stream=True) return self._handle_log_stream(stream=stream)
def execute(self): write_dockerfile(dockerfile_path=self.dockerfile_path, base_image=self.base_image) self.docker_client = APIClient(version='auto') self.build() self.publish()
class DockerOperator(BaseOperator): """ Execute a command inside a docker container. A temporary directory is created on the host and mounted into a container to allow storing files that together exceed the default disk size of 10GB in a container. The path to the mounted directory can be accessed via the environment variable ``AIRFLOW_TMP_DIR``. If a login to a private registry is required prior to pulling the image, a Docker connection needs to be configured in Airflow and the connection ID be provided with the parameter ``docker_conn_id``. :param image: Docker image from which to create the container. If image tag is omitted, "latest" will be used. :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. :type api_version: str :param auto_remove: Auto-removal of the container on daemon side when the container's process exits. The default is False. :type auto_remove: bool :param command: Command to be run in the container. (templated) :type command: str or list :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. See https://docs.docker.com/engine/reference/run/#cpu-share-constraint :type cpus: float :param dns: Docker custom DNS servers :type dns: list[str] :param dns_search: Docker custom DNS search domain :type dns_search: list[str] :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock :type docker_url: str :param environment: Environment variables to set in the container. (templated) :type environment: dict :param force_pull: Pull the docker image on every run. Default is False. :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. :type mem_limit: float or str :param network_mode: Network mode for the container. :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. :type tmp_dir: str :param user: Default user inside the docker container. :type user: int or str :param volumes: List of volumes to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. :type volumes: list :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) :type working_dir: str :param xcom_all: Push all the stdout or just the last line. The default is False (last line). :type xcom_all: bool :param docker_conn_id: ID of the Airflow connection to use :type docker_conn_id: str :param shm_size: Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. :type shm_size: int """ template_fields = ('command', 'environment',) template_ext = ('.sh', '.bash',) @apply_defaults def __init__( self, image, api_version=None, command=None, cpus=1.0, docker_url='unix://var/run/docker.sock', environment=None, force_pull=False, mem_limit=None, network_mode=None, tls_ca_cert=None, tls_client_cert=None, tls_client_key=None, tls_hostname=None, tls_ssl_version=None, tmp_dir='/tmp/airflow', user=None, volumes=None, working_dir=None, xcom_all=False, docker_conn_id=None, dns=None, dns_search=None, auto_remove=False, shm_size=None, *args, **kwargs): super().__init__(*args, **kwargs) self.api_version = api_version self.auto_remove = auto_remove self.command = command self.cpus = cpus self.dns = dns self.dns_search = dns_search self.docker_url = docker_url self.environment = environment or {} self.force_pull = force_pull self.image = image self.mem_limit = mem_limit self.network_mode = network_mode self.tls_ca_cert = tls_ca_cert self.tls_client_cert = tls_client_cert self.tls_client_key = tls_client_key self.tls_hostname = tls_hostname self.tls_ssl_version = tls_ssl_version self.tmp_dir = tmp_dir self.user = user self.volumes = volumes or [] self.working_dir = working_dir self.xcom_all = xcom_all self.docker_conn_id = docker_conn_id self.shm_size = shm_size if kwargs.get('xcom_push') is not None: raise AirflowException("'xcom_push' was deprecated, use 'BaseOperator.do_xcom_push' instead") self.cli = None self.container = None def get_hook(self): return DockerHook( docker_conn_id=self.docker_conn_id, base_url=self.docker_url, version=self.api_version, tls=self.__get_tls_config() ) def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient( base_url=self.docker_url, version=self.api_version, tls=tls_config ) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info('Pulling docker image %s', self.image) for l in self.cli.pull(self.image, stream=True): output = json.loads(l.decode('utf-8').strip()) if 'status' in output: self.log.info("%s", output['status']) with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit), image=self.image, user=self.user, working_dir=self.working_dir ) self.cli.start(self.container['Id']) line = '' for line in self.cli.logs(container=self.container['Id'], stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) result = self.cli.wait(self.container['Id']) if result['StatusCode'] != 0: raise AirflowException('docker container failed: ' + repr(result)) # duplicated conditional logic because of expensive operation if self.do_xcom_push: return self.cli.logs(container=self.container['Id']) \ if self.xcom_all else line.encode('utf-8') def get_command(self): if self.command is not None and self.command.strip().find('[') == 0: commands = ast.literal_eval(self.command) else: commands = self.command return commands def on_kill(self): if self.cli is not None: self.log.info('Stopping docker container') self.cli.stop(self.container['Id']) def __get_tls_config(self): tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig( ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname ) self.docker_url = self.docker_url.replace('tcp://', 'https://') return tls_config
class DockerCli: def __init__(self): self.client = APIClient('unix://var/run/docker.sock') self.filtered_statuses = ('running', 'restarting', 'paused', 'exited') self.config = Config() def _get_containers(self, filters=None): filters = filters if filters else dict() for status in self.filtered_statuses: filters.update({'status': status}) for container in self.client.containers( all=True, filters=filters ): img_name, _, img_version = container['Image'].partition(':') service = self.config.get_service_by_name(img_name) if service: instance = dict() instance['created'] = container['Created'] instance['id'] = container['Id'] instance['image'] = img_name for con_port in container['Ports']: if service['port'] is con_port['PrivatePort']: instance['port'] = con_port.get('PublicPort') else: instance['port'] = None instance['state'] = container['State'] instance['status'] = container['Status'] instance['version'] = img_version yield instance return def get_all_containers(self): containers = [] for container in self._get_containers(): if container: containers.append(container) return containers def get_container(self, by_id): for container in self._get_containers({'id': by_id}): return container raise NotFoundContainerException( 'Container was not found: {}'.format(by_id) ) def create_container(self, image): service = self.config.get_service_by_name(image) if service: container = self.client.create_container( image='{0}:{1}'.format(image, service['version']), ports=[service['port']], detach=True, host_config=self.client.create_host_config( port_bindings={service['port']: None} ) ) self.client.start(container=container['Id']) return self.get_container(container['Id']) raise NotFoundImageException('Image was not found: {}'.format(image)) def remove_container(self, by_id): try: self.client.remove_container( container=by_id, force=True, v=True ) except errors.NotFound as e: raise NotFoundContainerException(e) return {'status': 'OK'}
def execute(self, context): self.log.info('Starting docker container from image %s', self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient( base_url=self.docker_url, version=self.api_version, tls=tls_config ) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info('Pulling docker image %s', self.image) for l in self.cli.pull(self.image, stream=True): output = json.loads(l.decode('utf-8').strip()) if 'status' in output: self.log.info("%s", output['status']) with TemporaryDirectory(prefix='airflowtmp', dir=self.host_tmp_dir) as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), name=self.container_name, environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit), image=self.image, user=self.user, working_dir=self.working_dir ) self.cli.start(self.container['Id']) line = '' for line in self.cli.attach(container=self.container['Id'], stdout=True, stderr=True, stream=True): line = line.strip() if hasattr(line, 'decode'): line = line.decode('utf-8') self.log.info(line) result = self.cli.wait(self.container['Id']) if result['StatusCode'] != 0: raise AirflowException('docker container failed: ' + repr(result)) # duplicated conditional logic because of expensive operation if self.do_xcom_push: return self.cli.logs(container=self.container['Id']) \ if self.xcom_all else line.encode('utf-8')
def cleanup(): client = Client(version='auto', **kwargs_from_env()) test_images = client.images(TEST_ORG + "/*") for image in test_images: client.remove_image(image)
def docker(): if os.environ.get('DOCKER_MACHINE_IP') is not None: docker = from_env(assert_hostname=False) else: docker = APIClient(version='auto') return docker
class HubIO: """:class:`HubIO` provides the way to interact with Jina Hub registry. You can use it with CLI to package a directory into a Jina Hub image and publish it to the world. Examples: - :command:`jina hub build my_pod/` build the image - :command:`jina hub build my_pod/ --push` build the image and push to the public registry - :command:`jina hub pull jinahub/pod.dummy_mwu_encoder:0.0.6` to download the image """ def __init__(self, args: 'argparse.Namespace'): """Create a new HubIO. :param args: arguments """ self.logger = JinaLogger(self.__class__.__name__, **vars(args)) self.args = args self._load_docker_client() def _load_docker_client(self): with ImportExtensions( required=False, help_text= 'missing "docker" dependency, available CLIs limited to "jina hub [list, new]"' 'to enable full CLI, please do pip install "jina[docker]"', ): import docker from docker import APIClient, DockerClient self._client: DockerClient = docker.from_env() # low-level client self._raw_client = APIClient(base_url='unix://var/run/docker.sock') def new(self, no_input: bool = False) -> None: """ Create a new executor using cookiecutter template. :param no_input: Argument to avoid prompting dialogue (just to be used for testing) """ with ImportExtensions(required=True): from cookiecutter.main import cookiecutter import click # part of cookiecutter cookiecutter_template = self.args.template if self.args.type == 'app': cookiecutter_template = 'https://github.com/jina-ai/cookiecutter-jina.git' elif self.args.type == 'pod': cookiecutter_template = ( 'https://github.com/jina-ai/cookiecutter-jina-hub.git') try: cookiecutter( template=cookiecutter_template, overwrite_if_exists=self.args.overwrite, output_dir=self.args.output_dir, no_input=no_input, ) except click.exceptions.Abort: self.logger.info('nothing is created, bye!') def login(self) -> None: """Login using Github Device flow to allow push access to Jina Hub Registry.""" import requests with resource_stream('jina', '/'.join( ('resources', 'hubapi.yml'))) as fp: hubapi_yml = JAML.load(fp) client_id = hubapi_yml['github']['client_id'] scope = hubapi_yml['github']['scope'] device_code_url = hubapi_yml['github']['device_code_url'] access_token_url = hubapi_yml['github']['access_token_url'] grant_type = hubapi_yml['github']['grant_type'] login_max_retry = hubapi_yml['github']['login_max_retry'] headers = {'Accept': 'application/json'} code_request_body = {'client_id': client_id, 'scope': scope} try: self.logger.info( 'Jina Hub login will use Github Device to generate one time token' ) response = requests.post(url=device_code_url, headers=headers, data=code_request_body) if response.status_code != requests.codes.ok: self.logger.error( 'cannot reach github server. please make sure you\'re connected to internet' ) code_response = response.json() device_code = code_response['device_code'] user_code = code_response['user_code'] verification_uri = code_response['verification_uri'] try: self.logger.info( f'You should see a "Device Activation" page open in your browser. ' f'If not, please go to {colored(verification_uri, "cyan", attrs=["underline"])}' ) self.logger.info( 'Please follow the steps:\n\n' f'1. Enter the following code to that page: {colored(user_code, "cyan", attrs=["bold"])}\n' '2. Click "Continue"\n' '3. Come back to this terminal\n') # allowing sometime for the user to view the message time.sleep(0.5) webbrowser.open(verification_uri, new=2) except: pass # intentional pass, browser support isn't cross-platform access_request_body = { 'client_id': client_id, 'device_code': device_code, 'grant_type': grant_type, } for _ in range(login_max_retry): access_token_response = requests.post( url=access_token_url, headers=headers, data=access_request_body).json() if access_token_response.get('error', None) == 'authorization_pending': self.logger.warning('still waiting for authorization') countdown( 10, reason=colored('re-fetch access token', 'cyan', attrs=['bold', 'reverse']), ) elif 'access_token' in access_token_response: token = { 'access_token': access_token_response['access_token'] } with open(credentials_file(), 'w') as cf: JAML.dump(token, cf) self.logger.success(f'successfully logged in!') break else: self.logger.error(f'max retries {login_max_retry} reached') except KeyError as exp: self.logger.error(f'can not read the key in response: {exp}') def list(self) -> Optional[List[Dict[str, Any]]]: """List all hub images given a filter specified by CLI. :return: list of dictionaries of images """ if self.args.local_only: return _list_local(self.logger) else: return _list( logger=self.logger, image_name=self.args.name, image_kind=self.args.kind, image_type=self.args.type, image_keywords=self.args.keywords, ) def push( self, name: Optional[str] = None, build_result: Optional[Dict] = None, ) -> None: """Push image to Jina Hub. :param name: name of image :param build_result: dictionary containing the build summary :return: None """ name = name or self.args.name try: # check if image exists # fail if it does if (self.args.no_overwrite and build_result and self._image_version_exists( build_result['manifest_info']['name'], build_result['manifest_info']['version'], jina_version, )): raise ImageAlreadyExists( f'Image with name {name} already exists. Will NOT overwrite.' ) else: self.logger.debug( f'Image with name {name} does not exist. Pushing now...') self._push_docker_hub(name) if not build_result: file_path = get_summary_path(name) if os.path.isfile(file_path): with open(file_path) as f: build_result = json.load(f) else: self.logger.error( f'can not find the build summary file.' f'please use "jina hub build" to build the image first ' f'before pushing.') return if build_result: if build_result.get('is_build_success', False): _register_to_mongodb(logger=self.logger, summary=build_result) if build_result.get('details', None) and build_result.get( 'build_history', None): self._write_slack_message( build_result, build_result['details'], build_result['build_history'], ) except Exception as e: self.logger.error(f'Error when trying to push image {name}: {e!r}') if isinstance(e, ImageAlreadyExists): raise e def _push_docker_hub(self, name: Optional[str] = None) -> None: """Push to Docker Hub. :param name: name of image """ check_registry(self.args.registry, name, self.args.repository) self._check_docker_image(name) self._docker_login() with ProgressBar(task_name=f'pushing {name}', batch_unit='') as t: for line in self._client.images.push(name, stream=True, decode=True): t.update(1) self.logger.debug(line) self.logger.success(f'🎉 {name} is now published!') share_link = f'https://api.jina.ai/hub/?jh={urllib.parse.quote_plus(name)}' try: webbrowser.open(share_link, new=2) except: # pass intentionally, dont want to bother users on opening browser failure pass finally: self.logger.info( f'Check out the usage {colored(share_link, "cyan", attrs=["underline"])} and share it with others!' ) def pull(self) -> None: """Pull docker image.""" check_registry(self.args.registry, self.args.name, self.args.repository) try: self._docker_login() with TimeContext(f'pulling {self.args.name}', self.logger): image = self._client.images.pull(self.args.name) if isinstance(image, list): image = image[0] image_tag = image.tags[0] if image.tags else '' self.logger.success( f'🎉 pulled {image_tag} ({image.short_id}) uncompressed size: {get_readable_size(image.attrs["Size"])}' ) except Exception as ex: self.logger.error( f'can not pull image {self.args.name} from {self.args.registry} due to {ex!r}' ) def _check_docker_image(self, name: str) -> None: # check local image image = self._client.images.get(name) for r in _allowed: if f'{_label_prefix}{r}' not in image.labels.keys(): self.logger.warning( f'{r} is missing in your docker image labels, you may want to check it' ) try: image.labels['ai.jina.hub.jina_version'] = jina_version label_info = ( f'{self.args.repository}/' + '{type}.{kind}.{name}:{version}-{jina_version}'.format( **{ k.replace(_label_prefix, ''): v for k, v in image.labels.items() })) safe_name = safe_url_name(label_info) if name != safe_name: raise ValueError( f'image {name} does not match with label info in the image. name should be {safe_name}' ) except KeyError as e: self.logger.error( f'missing key in the label of the image {repr(e)}') raise self.logger.info( f'✅ {name} is a valid Jina Hub image, ready to publish') def _docker_login(self) -> None: """Log-in to Docker.""" from docker.errors import APIError if not (self.args.username and self.args.password): self.args.username, self.args.password = _fetch_docker_auth( logger=self.logger) try: self._client.login( username=self.args.username, password=self.args.password, registry=self.args.registry, ) self.logger.debug(f'successfully logged in to docker hub') except APIError: raise DockerLoginFailed( f'invalid credentials passed. docker login failed') def build(self) -> Dict: """ Perform a build of the Docker image. :return: dictionary with information on image (manifest) """ if self.args.dry_run: result = self.dry_run() else: is_build_success, is_push_success = True, False _logs = [] _except_strs = [] _excepts = [] with TimeContext(f'building {colored(self.args.path, "green")}', self.logger) as tc: try: _check_result = self._check_completeness() self._freeze_jina_version() _dockerfile = os.path.basename(_check_result['Dockerfile']) _labels = { _label_prefix + k: str(v) for k, v in self.manifest.items() } streamer = self._raw_client.build( decode=True, path=self.args.path, tag=self.tag, pull=self.args.pull, dockerfile=_dockerfile, labels=_labels, rm=True, ) for chunk in streamer: if 'stream' in chunk: for line in chunk['stream'].splitlines(): if is_error_message(line): self.logger.critical(line) _except_strs.append(line) elif 'warning' in line.lower(): self.logger.warning(line) else: self.logger.info(line) _logs.append(line) except Exception as ex: # if pytest fails it should end up here as well is_build_success = False ex = HubBuilderBuildError(ex) _except_strs.append(repr(ex)) _excepts.append(ex) if is_build_success: # compile it again, but this time don't show the log image, log = self._client.images.build( path=self.args.path, tag=self.tag, pull=self.args.pull, dockerfile=_dockerfile, labels=_labels, rm=True, ) # success _details = { 'inspect': self._raw_client.inspect_image(image.tags[0]), 'tag': image.tags[0], 'hash': image.short_id, 'size': get_readable_size(image.attrs['Size']), } self.logger.success( '🎉 built {tag} ({hash}) uncompressed size: {size}'. format_map(_details)) else: self.logger.error( f'can not build the image, please double check the log') _details = {} if is_build_success: if self.args.test_uses: p_names = [] try: is_build_success = False p_names, failed_test_levels = HubIO._test_build( image, self.args.test_level, self.config_yaml_path, self.args.timeout_ready, self.args.daemon, self.logger, ) if any(test_level in failed_test_levels for test_level in [ BuildTestLevel.POD_DOCKER, BuildTestLevel.FLOW, ]): is_build_success = False self.logger.error( f'build unsuccessful, failed at {str(failed_test_levels)} level' ) else: is_build_success = True self.logger.warning( f'Build successful. Tests failed at : {str(failed_test_levels)} levels. ' f'This could be due to the fact that the executor has non-installed external dependencies' ) except Exception as ex: self.logger.error( f'something wrong while testing the build: {ex!r}') ex = HubBuilderTestError(ex) _except_strs.append(repr(ex)) _excepts.append(ex) finally: if self.args.daemon: try: for p in p_names: self._raw_client.stop(p) except: pass # suppress on purpose self._raw_client.prune_containers() info, env_info = get_full_version() _host_info = { 'jina': info, 'jina_envs': env_info, 'docker': self._raw_client.info(), 'build_args': vars(self.args), } _build_history = { 'time': get_now_timestamp(), 'host_info': _host_info if is_build_success and self.args.host_info else '', 'duration': tc.readable_duration, 'logs': _logs, 'exception': _except_strs, } if self.args.prune_images: self.logger.info('deleting unused images') self._raw_client.prune_images() # since db tracks `version` & `jina_version` on the top level, let's get rid of them in `manifest` if is_build_success: _version = self.manifest['version'] self.manifest.pop('version', None) self.manifest.pop('jina_version', None) else: _version = '0.0.1' result = { 'name': self.executor_name if is_build_success else '', 'version': _version, 'jina_version': jina_version, 'path': self.args.path, 'manifest_info': self.manifest if is_build_success else '', 'details': _details, 'is_build_success': is_build_success, 'build_history': _build_history, } # only successful build (NOT dry run) writes the summary to disk if result['is_build_success']: self._write_summary_to_file(summary=result) if self.args.push: self.push(image.tags[0], result) if not result['is_build_success'] and self.args.raise_error: # remove the very verbose build log when throw error if 'build_history' in result: result['build_history'].pop('logs', None) raise HubBuilderError(_excepts) return result @staticmethod def _test_build( image, # type docker image object test_level: 'BuildTestLevel', config_yaml_path: str, timeout_ready: int, daemon_arg: bool, logger: 'JinaLogger', ): p_names = [] failed_levels = [] logger.info(f'run tests using test level {test_level}') # test uses at executor level if test_level >= BuildTestLevel.EXECUTOR: logger.info( f'test to initialize an executor from yaml configuration: {config_yaml_path}' ) try: with BaseExecutor.load_config(config_yaml_path): pass logger.info(f'successfully tested to initialize an executor') except: logger.error(f'failed to initialize an executor') failed_levels.append(BuildTestLevel.EXECUTOR) # test uses at Pod level (no docker) if test_level >= BuildTestLevel.POD_NONDOCKER: logger.info( f'test to initialize a pod from yaml configuration: {config_yaml_path}' ) try: with Pod(set_pod_parser().parse_args([ '--uses', config_yaml_path, '--timeout-ready', str(timeout_ready), ])): pass logger.info( f'successfully tested to initialize a pod from yaml configuration' ) except: logger.error(f'failed to initialize a pod') failed_levels.append(BuildTestLevel.POD_NONDOCKER) # test uses at Pod level (with docker) if test_level >= BuildTestLevel.POD_DOCKER: p_name = random_name() logger.info( f'test to initialize a pod via docker image {image.tags[0]} named {p_name}' ) try: with Pod(set_pod_parser().parse_args([ '--uses', f'docker://{image.tags[0]}', '--name', p_name, '--timeout-ready', str(timeout_ready), ] + ['--daemon'] if daemon_arg else [])): pass p_names.append(p_name) logger.info( f'successfully tested to initialize a pod via docker') except: logger.error(f'failed to initialize a pod via docker image') failed_levels.append(BuildTestLevel.POD_DOCKER) # test uses at Flow level if test_level >= BuildTestLevel.FLOW: p_name = random_name() logger.info( f'test to build a flow from docker image {image.tags[0]} named {p_name} ' f'with daemon={daemon_arg} and timeout_ready={timeout_ready}') try: with Flow().add( name=p_name, uses=f'docker://{image.tags[0]}', daemon=daemon_arg, timeout_ready=timeout_ready, ): pass p_names.append(p_name) logger.info( 'successfully tested to build a flow from docker image') except: logger.error(f'failed to build a flow from docker image') failed_levels.append(BuildTestLevel.FLOW) return p_names, failed_levels def dry_run(self) -> Dict: """ Perform a dry-run. :return: a dict with the manifest info. """ try: s = self._check_completeness() s['is_build_success'] = True except Exception as ex: s = {'is_build_success': False, 'exception': str(ex)} return s def _write_summary_to_file(self, summary: Dict) -> None: file_path = get_summary_path(f'{summary["name"]}:{summary["version"]}') with open(file_path, 'w+') as f: json.dump(summary, f) self.logger.debug(f'stored the summary from build to {file_path}') def _freeze_jina_version(self) -> None: import pkg_resources requirements_path = get_exist_path(self.args.path, 'requirements.txt') if requirements_path and os.path.exists(requirements_path): new_requirements = [] update = False with open(requirements_path, 'r') as fp: requirements = pkg_resources.parse_requirements(fp) for req in requirements: if 'jina' in str(req): update = True self.logger.info( f'Freezing jina version to {jina_version}') new_requirements.append(f'jina=={jina_version}') else: new_requirements.append(str(req)) if update: with open(requirements_path, 'w') as fp: fp.write('\n'.join(new_requirements)) def _check_completeness(self) -> Dict: dockerfile_path = get_exist_path(self.args.path, self.args.file) manifest_path = get_exist_path(self.args.path, 'manifest.yml') self.config_yaml_path = get_exist_path(self.args.path, 'config.yml') readme_path = get_exist_path(self.args.path, 'README.md') requirements_path = get_exist_path(self.args.path, 'requirements.txt') yaml_glob = set(glob.glob(os.path.join(self.args.path, '*.yml'))) yaml_glob.difference_update({manifest_path, self.config_yaml_path}) if not self.config_yaml_path: self.config_yaml_path = yaml_glob.pop() py_glob = glob.glob(os.path.join(self.args.path, '*.py')) test_glob = glob.glob(os.path.join(self.args.path, 'tests/test_*.py')) completeness = { 'Dockerfile': dockerfile_path, 'manifest.yml': manifest_path, 'config.yml': self.config_yaml_path, 'README.md': readme_path, 'requirements.txt': requirements_path, '*.yml': yaml_glob, '*.py': py_glob, 'tests': test_glob, } self.logger.info(f'completeness check\n' + '\n'.join( f'{colored("✓", "green") if v else colored("✗", "red"):>4} {k:<20} {v}' for k, v in completeness.items()) + '\n') if not (completeness['Dockerfile'] and completeness['manifest.yml']): self.logger.critical( 'Dockerfile or manifest.yml is not given, can not build') raise FileNotFoundError( 'Dockerfile or manifest.yml is not given, can not build') self.manifest = self._read_manifest(manifest_path) self.manifest['jina_version'] = jina_version self.executor_name = safe_url_name( f'{self.args.repository}/' + f'{self.manifest["type"]}.{self.manifest["kind"]}.{self.manifest["name"]}' ) self.tag = self.executor_name + f':{self.manifest["version"]}-{jina_version}' return completeness def _read_manifest(self, path: str, validate: bool = True) -> Dict: with resource_stream( 'jina', '/'.join( ('resources', 'hub-builder', 'manifest.yml'))) as fp: tmp = JAML.load( fp ) # do not expand variables at here, i.e. DO NOT USE expand_dict(yaml.load(fp)) with open(path) as fp: tmp.update(JAML.load(fp)) if validate: self._validate_manifest(tmp) return tmp def _validate_manifest(self, manifest: Dict) -> None: required = {'name', 'type', 'version'} # check the required field in manifest for r in required: if r not in manifest: raise ValueError( f'{r} is missing in the manifest.yaml, it is required') # check if all fields are there for r in _allowed: if r not in manifest: self.logger.warning( f'{r} is missing in your manifest.yml, you may want to check it' ) # check name check_name(manifest['name']) # check_image_type check_image_type(manifest['type']) # check version number check_version(manifest['version']) # check version number check_license(manifest['license']) # check platform if not isinstance(manifest['platform'], list): manifest['platform'] = list(manifest['platform']) check_platform(manifest['platform']) # replace all chars in value to safe chars for k, v in manifest.items(): if v and isinstance(v, str): manifest[k] = remove_control_characters(v) # show manifest key-values for k, v in manifest.items(): self.logger.debug(f'{k}: {v}') def _write_slack_message(self, *args): def _expand_fn(v): if isinstance(v, str): for d in args: try: v = v.format(**d) except KeyError: pass return v if 'JINAHUB_SLACK_WEBHOOK' in os.environ: with resource_stream( 'jina', '/'.join(('resources', 'hub-builder-success', 'slack-jinahub.json')), ) as fp: tmp = expand_dict(json.load(fp), _expand_fn, resolve_cycle_ref=False) req = urllib.request.Request( os.environ['JINAHUB_SLACK_WEBHOOK']) req.add_header('Content-Type', 'application/json; charset=utf-8') jdb = json.dumps(tmp).encode('utf-8') # needs to be bytes req.add_header('Content-Length', str(len(jdb))) with urllib.request.urlopen(req, jdb) as f: res = f.read() self.logger.info(f'push to Slack: {res}') # alias of "new" in cli create = new init = new def _image_version_exists(self, name, module_version, req_jina_version): manifests = _list(self.logger, name) # check if matching module version and jina version exists if manifests: matching = [ m for m in manifests if m['version'] == module_version and 'jina_version' in m.keys() and m['jina_version'] == req_jina_version ] return len(matching) > 0 return False
def __init__(self): self.docker_client = APIClient(version='auto')
def check_image(build_job): from docker import APIClient docker = APIClient(version='auto') return docker.images(get_tagged_image(build_job))
def remove_all_containers(client: APIClient): for ctnr in client.containers(quiet=True): client.remove_container(ctnr, force=True)
from containers import main from docker import APIClient import os, appinit client = APIClient(base_url="unix://var/run/docker.sock") def run(service, force=False): path = os.path.dirname(appinit.__file__) prefix = main.settings.get_variable("docker-prefix") image_name = '%s-base:latest' % prefix print("Building base image.") kwargs = { 'nocache': force, 'decode': True, 'forcerm': True, 'path': path + "/docker/base/", 'dockerfile': path + "/docker/base/Dockerfile", 'rm': True, 'tag': image_name, } for line in client.build(**kwargs): if "stream" in line: print(line['stream']) print("Done -- building base image.")
def build(self): logging.info("Building image using docker") self.docker_client = APIClient(version='auto') self._build() if self.push: self.publish()
class DockerBuilder(BaseBuilder): """A builder using the local Docker client""" def __init__(self, registry=None, image_name=constants.DEFAULT_IMAGE_NAME, base_image=constants.DEFAULT_BASE_IMAGE, preprocessor=None, push=True, dockerfile_path=None): super().__init__( registry=registry, image_name=image_name, push=push, base_image=base_image, preprocessor=preprocessor, ) def build(self): logging.info("Building image using docker") self.docker_client = APIClient(version='auto') self._build() if self.push: self.publish() def _build(self): docker_command = self.preprocessor.get_command() logger.warning("Docker command: {}".format(docker_command)) if not docker_command: logger.warning( "Not setting a command for the output docker image.") install_reqs_before_copy = self.preprocessor.is_requirements_txt_file_present( ) dockerfile_path = dockerfile.write_dockerfile( docker_command=docker_command, dockerfile_path=self.dockerfile_path, path_prefix=self.preprocessor.path_prefix, base_image=self.base_image, install_reqs_before_copy=install_reqs_before_copy) self.preprocessor.output_map[dockerfile_path] = 'Dockerfile' context_file, context_hash = self.preprocessor.context_tar_gz() self.image_tag = self.full_image_name(context_hash) logger.warn('Building docker image {}...'.format(self.image_tag)) with open(context_file, 'rb') as fileobj: bld = self.docker_client.build(path='.', custom_context=True, fileobj=fileobj, tag=self.image_tag, encoding='utf-8') for line in bld: self._process_stream(line) def publish(self): logger.warn('Publishing image {}...'.format(self.image_tag)) for line in self.docker_client.push(self.image_tag, stream=True): self._process_stream(line) def _process_stream(self, line): raw = line.decode('utf-8').strip() lns = raw.split('\n') for ln in lns: try: ljson = json.loads(ln) if ljson.get('error'): msg = str(ljson.get('error', ljson)) logger.error('Build failed: ' + msg) raise Exception('Image build failed: ' + msg) else: if ljson.get('stream'): msg = 'Build output: {}'.format( ljson['stream'].strip()) elif ljson.get('status'): msg = 'Push output: {} {}'.format( ljson['status'], ljson.get('progress')) elif ljson.get('aux'): msg = 'Push finished: {}'.format(ljson.get('aux')) else: msg = str(ljson) logger.info(msg) except json.JSONDecodeError: logger.warning('JSON decode error: {}'.format(ln))
def execute_raw(self, context): """Modified only to use the get_host_tmp_dir helper.""" self.log.info("Starting docker container from image %s", self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info("Pulling docker image %s", self.image) for l in self.cli.pull(self.image, stream=True): output = seven.json.loads(l.decode("utf-8").strip()) if "status" in output: self.log.info("%s", output["status"]) with self.get_host_tmp_dir() as host_tmp_dir: self.environment["AIRFLOW_TMP_DIR"] = self.tmp_dir self.volumes.append("{0}:{1}".format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_docker_command(context.get("ts")), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit, ), image=self.image, user=self.user, working_dir=self.working_dir, ) self.cli.start(self.container["Id"]) res = [] line = "" for new_line in self.cli.logs( container=self.container["Id"], stream=True, stdout=True, stderr=False ): line = new_line.strip() if hasattr(line, "decode"): line = line.decode("utf-8") self.log.info(line) res.append(line) result = self.cli.wait(self.container["Id"]) if result["StatusCode"] != 0: raise AirflowException( "docker container failed with result: {result} and logs: {logs}".format( result=repr(result), logs="\n".join(res) ) ) if self.xcom_push_flag: # Try to avoid any kind of race condition? return res if self.xcom_all else str(line)
from invoke import task from docker import APIClient import os from invoke_tools import lxc, system, vcs cli = APIClient(base_url='unix://var/run/docker.sock', timeout=600, version="auto") def __check_branch(): if os.getenv("TRAVIS_PULL_REQUEST") != "false": exit("This is a PR, so not deploying.") if os.getenv("TRAVIS_BRANCH") == "master": return "production" elif os.getenv("TRAVIS_BRANCH") == "develop": return "beta" else: exit("Not master or develop, so not deploying.") @task def build_dev_image(ctx): """ Builds development image to run tests on """ git = vcs.Git() version = git.get_version() lxc.Docker.build(
class DagsterDockerOperator(DockerOperator): """Dagster operator for Apache Airflow. Wraps a modified DockerOperator incorporating https://github.com/apache/airflow/pull/4315. Additionally, if a Docker client can be initialized using docker.from_env, Unlike the standard DockerOperator, this operator also supports config using docker.from_env, so it isn't necessary to explicitly set docker_url, tls_config, or api_version. Incorporates https://github.com/apache/airflow/pull/4315/ and an implementation of https://issues.apache.org/jira/browse/AIRFLOW-3825. Parameters: host_tmp_dir (str): Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. """ def __init__(self, dagster_operator_parameters, *args): kwargs = dagster_operator_parameters.op_kwargs tmp_dir = kwargs.pop("tmp_dir", DOCKER_TEMPDIR) host_tmp_dir = kwargs.pop("host_tmp_dir", seven.get_system_temp_directory()) self.host_tmp_dir = host_tmp_dir run_config = dagster_operator_parameters.run_config if "filesystem" in run_config["intermediate_storage"]: if ( "config" in (run_config["intermediate_storage"].get("filesystem", {}) or {}) and "base_dir" in ( (run_config["intermediate_storage"].get("filesystem", {}) or {}).get( "config", {} ) or {} ) and run_config["intermediate_storage"]["filesystem"]["config"]["base_dir"] != tmp_dir ): warnings.warn( "Found base_dir '{base_dir}' set in filesystem storage config, which was not " "the tmp_dir we expected ('{tmp_dir}', mounting host_tmp_dir " "'{host_tmp_dir}' from the host). We assume you know what you are doing, but " "if you are having trouble executing containerized workloads, this may be the " "issue".format( base_dir=run_config["intermediate_storage"]["filesystem"]["config"][ "base_dir" ], tmp_dir=tmp_dir, host_tmp_dir=host_tmp_dir, ) ) else: run_config["intermediate_storage"]["filesystem"] = dict( run_config["intermediate_storage"]["filesystem"] or {}, **{ "config": dict( ( ( run_config["intermediate_storage"].get("filesystem", {}) or {} ).get("config", {}) or {} ), **{"base_dir": tmp_dir}, ) }, ) self.docker_conn_id_set = kwargs.get("docker_conn_id") is not None self.run_config = run_config self.pipeline_name = dagster_operator_parameters.pipeline_name self.pipeline_snapshot = dagster_operator_parameters.pipeline_snapshot self.execution_plan_snapshot = dagster_operator_parameters.execution_plan_snapshot self.parent_pipeline_snapshot = dagster_operator_parameters.parent_pipeline_snapshot self.mode = dagster_operator_parameters.mode self.step_keys = dagster_operator_parameters.step_keys self.recon_repo = dagster_operator_parameters.recon_repo self._run_id = None self.instance_ref = dagster_operator_parameters.instance_ref check.invariant(self.instance_ref) self.instance = DagsterInstance.from_ref(self.instance_ref) # These shenanigans are so we can override DockerOperator.get_hook in order to configure # a docker client using docker.from_env, rather than messing with the logic of # DockerOperator.execute if not self.docker_conn_id_set: try: from_env().version() except Exception: # pylint: disable=broad-except pass else: kwargs["docker_conn_id"] = True if "environment" not in kwargs: kwargs["environment"] = get_aws_environment() super(DagsterDockerOperator, self).__init__( task_id=dagster_operator_parameters.task_id, dag=dagster_operator_parameters.dag, tmp_dir=tmp_dir, host_tmp_dir=host_tmp_dir, xcom_push=True, # We do this because log lines won't necessarily be emitted in order (!) -- so we can't # just check the last log line to see if it's JSON. xcom_all=True, *args, **kwargs, ) @contextmanager def get_host_tmp_dir(self): yield self.host_tmp_dir def execute_raw(self, context): """Modified only to use the get_host_tmp_dir helper.""" self.log.info("Starting docker container from image %s", self.image) tls_config = self.__get_tls_config() if self.docker_conn_id: self.cli = self.get_hook().get_conn() else: self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config) if self.force_pull or len(self.cli.images(name=self.image)) == 0: self.log.info("Pulling docker image %s", self.image) for l in self.cli.pull(self.image, stream=True): output = seven.json.loads(l.decode("utf-8").strip()) if "status" in output: self.log.info("%s", output["status"]) with self.get_host_tmp_dir() as host_tmp_dir: self.environment["AIRFLOW_TMP_DIR"] = self.tmp_dir self.volumes.append("{0}:{1}".format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_docker_command(context.get("ts")), environment=self.environment, host_config=self.cli.create_host_config( auto_remove=self.auto_remove, binds=self.volumes, network_mode=self.network_mode, shm_size=self.shm_size, dns=self.dns, dns_search=self.dns_search, cpu_shares=int(round(self.cpus * 1024)), mem_limit=self.mem_limit, ), image=self.image, user=self.user, working_dir=self.working_dir, ) self.cli.start(self.container["Id"]) res = [] line = "" for new_line in self.cli.logs( container=self.container["Id"], stream=True, stdout=True, stderr=False ): line = new_line.strip() if hasattr(line, "decode"): line = line.decode("utf-8") self.log.info(line) res.append(line) result = self.cli.wait(self.container["Id"]) if result["StatusCode"] != 0: raise AirflowException( "docker container failed with result: {result} and logs: {logs}".format( result=repr(result), logs="\n".join(res) ) ) if self.xcom_push_flag: # Try to avoid any kind of race condition? return res if self.xcom_all else str(line) # This is a class-private name on DockerOperator for no good reason -- # all that the status quo does is inhibit extension of the class. # See https://issues.apache.org/jira/browse/AIRFLOW-3880 def __get_tls_config(self): # pylint: disable=no-member return super(DagsterDockerOperator, self)._DockerOperator__get_tls_config() @property def run_id(self): if self._run_id is None: return "" else: return self._run_id def query(self, airflow_ts): check.opt_str_param(airflow_ts, "airflow_ts") recon_pipeline = self.recon_repo.get_reconstructable_pipeline(self.pipeline_name) input_json = serialize_dagster_namedtuple( ExecuteStepArgs( pipeline_origin=recon_pipeline.get_python_origin(), pipeline_run_id=self.run_id, instance_ref=self.instance_ref, step_keys_to_execute=self.step_keys, ) ) command = "dagster api execute_step {}".format(json.dumps(input_json)) self.log.info("Executing: {command}\n".format(command=command)) return command def get_docker_command(self, airflow_ts): """Deliberately renamed from get_command to avoid shadoowing the method of the base class""" check.opt_str_param(airflow_ts, "airflow_ts") if self.command is not None and self.command.strip().find("[") == 0: commands = ast.literal_eval(self.command) elif self.command is not None: commands = self.command else: commands = self.query(airflow_ts) return commands def get_hook(self): if self.docker_conn_id_set: return super(DagsterDockerOperator, self).get_hook() class _DummyHook: def get_conn(self): return from_env().api return _DummyHook() def execute(self, context): if "run_id" in self.params: self._run_id = self.params["run_id"] elif "dag_run" in context and context["dag_run"] is not None: self._run_id = context["dag_run"].run_id try: tags = {AIRFLOW_EXECUTION_DATE_STR: context.get("ts")} if "ts" in context else {} self.instance.register_managed_run( pipeline_name=self.pipeline_name, run_id=self.run_id, run_config=self.run_config, mode=self.mode, solids_to_execute=None, step_keys_to_execute=None, tags=tags, root_run_id=None, parent_run_id=None, pipeline_snapshot=self.pipeline_snapshot, execution_plan_snapshot=self.execution_plan_snapshot, parent_pipeline_snapshot=self.parent_pipeline_snapshot, ) res = self.execute_raw(context) self.log.info("Finished executing container.") if not res: raise AirflowException("Missing query response") try: events = [deserialize_json_to_dagster_namedtuple(line) for line in res if line] except Exception: # pylint: disable=broad-except raise AirflowException( "Could not parse response {response}".format(response=repr(res)) ) if len(events) == 1 and isinstance(events[0], StepExecutionSkipped): raise AirflowSkipException( "Dagster emitted skip event, skipping execution in Airflow" ) check_events_for_failures(events) check_events_for_skips(events) return events finally: self._run_id = None
class HubIO: """ :class:`HubIO` provides the way to interact with Jina Hub registry. You can use it with CLI to package a directory into a Jina Hub image and publish it to the world. Examples: - :command:`jina hub build my_pod/` build the image - :command:`jina hub build my_pod/ --push` build the image and push to the public registry - :command:`jina hub pull jinahub/pod.dummy_mwu_encoder:0.0.6` to download the image """ def __init__(self, args: 'argparse.Namespace'): self.logger = get_logger(self.__class__.__name__, **vars(args)) self.args = args try: import docker from docker import APIClient self._client = docker.from_env() # low-level client self._raw_client = APIClient(base_url='unix://var/run/docker.sock') except (ImportError, ModuleNotFoundError): self.logger.critical( 'requires "docker" dependency, please install it via "pip install jina[docker]"' ) raise def new(self) -> None: """Create a new executor using cookiecutter template """ try: from cookiecutter.main import cookiecutter except (ImportError, ModuleNotFoundError): self.logger.critical( 'requires "cookiecutter" dependency, please install it via "pip install cookiecutter"' ) raise import click cookiecutter_template = self.args.template if self.args.type == 'app': cookiecutter_template = 'https://github.com/jina-ai/cookiecutter-jina.git' elif self.args.type == 'pod': cookiecutter_template = 'https://github.com/jina-ai/cookiecutter-jina-hub.git' cookiecutter(cookiecutter_template, overwrite_if_exists=self.args.overwrite, output_dir=self.args.output_dir) try: cookiecutter(cookiecutter_template, overwrite_if_exists=self.args.overwrite, output_dir=self.args.output_dir) except click.exceptions.Abort: self.logger.info('nothing is created, bye!') def push(self, name: str = None, readme_path: str = None) -> None: """ A wrapper of docker push - Checks for the tempfile, returns without push if it cannot find - Pushes to docker hub, returns withput writing to db if it fails - Writes to the db """ name = name or self.args.name file_path = get_summary_path(name) if not os.path.isfile(file_path): self.logger.error(f'can not find the build summary file') return try: self._push_docker_hub(name, readme_path) except: self.logger.error('can not push to the docker hub registry') return with open(file_path) as f: result = json.load(f) if result['is_build_success']: self._write_summary_to_db(summary=result) def _push_docker_hub(self, name: str = None, readme_path: str = None) -> None: """ Helper push function """ check_registry(self.args.registry, name, _repo_prefix) self._check_docker_image(name) self.login() with ProgressBar(task_name=f'pushing {name}', batch_unit='') as t: for line in self._client.images.push(name, stream=True, decode=True): t.update(1) self.logger.debug(line) self.logger.success(f'🎉 {name} is now published!') if False and readme_path: # unfortunately Docker Hub Personal Access Tokens cannot be used as they are not supported by the API _volumes = { os.path.dirname(os.path.abspath(readme_path)): { 'bind': '/workspace' } } _env = { 'DOCKERHUB_USERNAME': self.args.username, 'DOCKERHUB_PASSWORD': self.args.password, 'DOCKERHUB_REPOSITORY': name.split(':')[0], 'README_FILEPATH': '/workspace/README.md', } self._client.containers.run('peterevans/dockerhub-description:2.1', auto_remove=True, volumes=_volumes, environment=_env) share_link = f'https://api.jina.ai/hub/?jh={urllib.parse.quote_plus(name)}' try: webbrowser.open(share_link, new=2) except: pass finally: self.logger.info( f'Check out the usage {colored(share_link, "cyan", attrs=["underline"])} and share it with others!' ) def pull(self) -> None: """A wrapper of docker pull """ check_registry(self.args.registry, self.args.name, _repo_prefix) self.login() try: with TimeContext(f'pulling {self.args.name}', self.logger): image = self._client.images.pull(self.args.name) if isinstance(image, list): image = image[0] image_tag = image.tags[0] if image.tags else '' self.logger.success( f'🎉 pulled {image_tag} ({image.short_id}) uncompressed size: {get_readable_size(image.attrs["Size"])}' ) except: self.logger.error( f'can not pull image {self.args.name} from {self.args.registry}' ) raise def _check_docker_image(self, name: str) -> None: # check local image image = self._client.images.get(name) for r in _allowed: if f'{_label_prefix}{r}' not in image.labels.keys(): self.logger.warning( f'{r} is missing in your docker image labels, you may want to check it' ) try: if name != safe_url_name(f'{_repo_prefix}' + '{type}.{kind}.{name}:{version}'.format( **{ k.replace(_label_prefix, ''): v for k, v in image.labels.items() })): raise ValueError( f'image {name} does not match with label info in the image' ) except KeyError: self.logger.error('missing key in the label of the image') raise self.logger.info( f'✅ {name} is a valid Jina Hub image, ready to publish') def login(self) -> None: """A wrapper of docker login """ if self.args.username and self.args.password: self._client.login(username=self.args.username, password=self.args.password, registry=self.args.registry) else: raise ValueError( 'no username/password specified, docker login failed') def build(self) -> Dict: """A wrapper of docker build """ if self.args.dry_run: result = self.dry_run() else: is_build_success, is_push_success = True, False _logs = [] _excepts = [] with TimeContext(f'building {colored(self.args.path, "green")}', self.logger) as tc: try: self._check_completeness() streamer = self._raw_client.build( decode=True, path=self.args.path, tag=self.tag, pull=self.args.pull, dockerfile=self.dockerfile_path_revised, rm=True) for chunk in streamer: if 'stream' in chunk: for line in chunk['stream'].splitlines(): if is_error_message(line): self.logger.critical(line) _excepts.append(line) elif 'warning' in line.lower(): self.logger.warning(line) else: self.logger.info(line) _logs.append(line) except Exception as ex: # if pytest fails it should end up here as well is_build_success = False _excepts.append(str(ex)) if is_build_success: # compile it again, but this time don't show the log image, log = self._client.images.build( path=self.args.path, tag=self.tag, pull=self.args.pull, dockerfile=self.dockerfile_path_revised, rm=True) # success _details = { 'inspect': self._raw_client.inspect_image(image.tags[0]), 'tag': image.tags[0], 'hash': image.short_id, 'size': get_readable_size(image.attrs['Size']), } self.logger.success( '🎉 built {tag} ({hash}) uncompressed size: {size}'. format_map(_details)) else: self.logger.error( f'can not build the image, please double check the log') _details = {} if is_build_success: if self.args.test_uses: try: is_build_success = False from jina.flow import Flow p_name = random_name() with Flow().add(name=p_name, uses=image.tags[0], daemon=self.args.daemon): pass if self.args.daemon: self._raw_client.stop(p_name) self._raw_client.prune_containers() is_build_success = True except PeaFailToStart: self.logger.error( f'can not use it in the Flow, please check your file bundle' ) except Exception as ex: self.logger.error( f'something wrong but it is probably not your fault. {repr(ex)}' ) _version = self.manifest[ 'version'] if 'version' in self.manifest else '0.0.1' info, env_info = get_full_version() _host_info = { 'jina': info, 'jina_envs': env_info, 'docker': self._raw_client.info(), 'build_args': vars(self.args) } _build_history = { 'time': get_now_timestamp(), 'host_info': _host_info if is_build_success and self.args.host_info else '', 'duration': tc.readable_duration, 'logs': _logs, 'exception': _excepts } if self.args.prune_images: self.logger.info('deleting unused images') self._raw_client.prune_images() result = { 'name': getattr(self, 'canonical_name', ''), 'version': self.manifest['version'] if is_build_success and 'version' in self.manifest else '0.0.1', 'path': self.args.path, 'manifest_info': self.manifest if is_build_success else '', 'details': _details, 'is_build_success': is_build_success, 'build_history': [_build_history] } # only successful build (NOT dry run) writes the summary to disk if result['is_build_success']: self._write_summary_to_file(summary=result) if self.args.push: try: self._push_docker_hub(image.tags[0], self.readme_path) self._write_summary_to_db(summary=result) self._write_slack_message(result, _details, _build_history) except Exception as ex: self.logger.error( f'can not complete the push due to {repr(ex)}') if not result['is_build_success'] and self.args.raise_error: # remove the very verbose build log when throw error result['build_history'][0].pop('logs') raise RuntimeError(result) return result def dry_run(self) -> Dict: try: s = self._check_completeness() s['is_build_success'] = True except Exception as ex: s = {'is_build_success': False, 'exception': str(ex)} return s def _write_summary_to_db(self, summary: Dict) -> None: """ Inserts / Updates summary document in mongodb """ if not is_db_envs_set(): self.logger.warning( 'MongoDB environment vars are not set! bookkeeping skipped.') return build_summary = handle_dot_in_keys(document=summary) _build_query = { 'name': build_summary['name'], 'version': build_summary['version'] } _current_build_history = build_summary['build_history'] with MongoDBHandler( hostname=os.environ['JINA_DB_HOSTNAME'], username=os.environ['JINA_DB_USERNAME'], password=os.environ['JINA_DB_PASSWORD'], database_name=os.environ['JINA_DB_NAME'], collection_name=os.environ['JINA_DB_COLLECTION']) as db: existing_doc = db.find(query=_build_query) if existing_doc: build_summary['build_history'] = existing_doc[ 'build_history'] + _current_build_history _modified_count = db.replace(document=build_summary, query=_build_query) self.logger.debug( f'Updated the build + push summary in db. {_modified_count} documents modified' ) else: _inserted_id = db.insert(document=build_summary) self.logger.debug( f'Inserted the build + push summary in db with id {_inserted_id}' ) def _write_summary_to_file(self, summary: Dict) -> None: file_path = get_summary_path(f'{summary["name"]}:{summary["version"]}') with open(file_path, 'w+') as f: json.dump(summary, f) self.logger.debug(f'stored the summary from build to {file_path}') def _check_completeness(self) -> Dict: self.dockerfile_path = get_exist_path(self.args.path, 'Dockerfile') self.manifest_path = get_exist_path(self.args.path, 'manifest.yml') self.readme_path = get_exist_path(self.args.path, 'README.md') self.requirements_path = get_exist_path(self.args.path, 'requirements.txt') yaml_glob = glob.glob(os.path.join(self.args.path, '*.yml')) if yaml_glob: yaml_glob.remove(self.manifest_path) py_glob = glob.glob(os.path.join(self.args.path, '*.py')) test_glob = glob.glob(os.path.join(self.args.path, 'tests/test_*.py')) completeness = { 'Dockerfile': self.dockerfile_path, 'manifest.yml': self.manifest_path, 'README.md': self.readme_path, 'requirements.txt': self.requirements_path, '*.yml': yaml_glob, '*.py': py_glob, 'tests': test_glob } self.logger.info(f'completeness check\n' + '\n'.join( '%4s %-20s %s' % (colored('✓', 'green') if v else colored('✗', 'red'), k, v) for k, v in completeness.items()) + '\n') if completeness['Dockerfile'] and completeness['manifest.yml']: pass else: self.logger.critical( 'Dockerfile or manifest.yml is not given, can not build') raise FileNotFoundError( 'Dockerfile or manifest.yml is not given, can not build') self.manifest = self._read_manifest(self.manifest_path) self.dockerfile_path_revised = self._get_revised_dockerfile( self.dockerfile_path, self.manifest) self.tag = safe_url_name(f'{_repo_prefix}' + '{type}.{kind}.{name}:{version}'.format( **self.manifest)) self.canonical_name = safe_url_name(f'{_repo_prefix}' + '{type}.{kind}.{name}'.format( **self.manifest)) return completeness def _read_manifest(self, path: str, validate: bool = True) -> Dict: with resource_stream( 'jina', '/'.join( ('resources', 'hub-builder', 'manifest.yml'))) as fp: tmp = yaml.load( fp ) # do not expand variables at here, i.e. DO NOT USE expand_dict(yaml.load(fp)) with open(path) as fp: tmp.update(yaml.load(fp)) if validate: self._validate_manifest(tmp) return tmp def _validate_manifest(self, manifest: Dict) -> None: required = {'name', 'type', 'version'} # check the required field in manifest for r in required: if r not in manifest: raise ValueError( f'{r} is missing in the manifest.yaml, it is required') # check if all fields are there for r in _allowed: if r not in manifest: self.logger.warning( f'{r} is missing in your manifest.yml, you may want to check it' ) # check name check_name(manifest['name']) # check_image_type check_image_type(manifest['type']) # check version number check_version(manifest['version']) # check version number check_license(manifest['license']) # check platform if not isinstance(manifest['platform'], list): manifest['platform'] = list(manifest['platform']) check_platform(manifest['platform']) # replace all chars in value to safe chars for k, v in manifest.items(): if v and isinstance(v, str): manifest[k] = remove_control_characters(v) # show manifest key-values for k, v in manifest.items(): self.logger.debug(f'{k}: {v}') def _get_revised_dockerfile(self, dockerfile_path: str, manifest: Dict) -> str: # modify dockerfile revised_dockerfile = [] with open(dockerfile_path) as fp: for l in fp: revised_dockerfile.append(l) if l.startswith('FROM'): revised_dockerfile.append('LABEL ') revised_dockerfile.append(' \\ \n'.join( f'{_label_prefix}{k}="{v}"' for k, v in manifest.items())) f = tempfile.NamedTemporaryFile('w', delete=False).name with open(f, 'w', encoding='utf8') as fp: fp.writelines(revised_dockerfile) for k in revised_dockerfile: self.logger.debug(k) return f def _write_slack_message(self, *args): def _expand_fn(v): if isinstance(v, str): for d in args: try: v = v.format(**d) except KeyError: pass return v if 'JINAHUB_SLACK_WEBHOOK' in os.environ: with resource_stream( 'jina', '/'.join(('resources', 'hub-builder-success', 'slack-jinahub.json'))) as fp: tmp = expand_dict(json.load(fp), _expand_fn, resolve_cycle_ref=False) req = urllib.request.Request( os.environ['JINAHUB_SLACK_WEBHOOK']) req.add_header('Content-Type', 'application/json; charset=utf-8') jdb = json.dumps(tmp).encode('utf-8') # needs to be bytes req.add_header('Content-Length', str(len(jdb))) with urllib.request.urlopen(req, jdb) as f: res = f.read() self.logger.info(f'push to Slack: {res}') # alias of "new" in cli create = new init = new