Example #1
0
def get_server_containers(server: Server, client: docker.APIClient) -> tp.List[dict]:
    containers = client.containers()
    server_containers = []
    for container in containers:
        container = {
            'command': filter_printable(container['Command']),
            'containerId': container['Id'],
            'image': container['Image'],
            'labels': sorted([{
                'containerId': container['Id'],
                'name': l[0],
                'value': l[1]} for l in container['Labels'].items()],
                key=lambda label: label['name']),
            'name': container['Names'][0],
            'network': container['HostConfig']['NetworkMode'],
            'ports': sorted([{
                'destination': str(p['PrivatePort']),
                'hostIp': p['IP'] if 'IP' in p else None,
                'protocol': p['Type'],
                'source': str(p['PublicPort']) if 'PublicPort' in p else None} for p in container['Ports']],
                key=lambda port: (str(port['destination']), str(port['source']))),
            'privileged': client.inspect_container(container['Id'])['HostConfig']['Privileged'],
            'serverId': server.id,
            'volumes': sorted([{
                'containerId': container['Id'],
                'destination': filter_printable(v['Destination']),
                'source': filter_printable(v['Source'])} for v in container['Mounts']],
                key=lambda volume: volume['destination'])
        }
        server_containers.append(container)
    return server_containers
Example #2
0
def docker_abs_net_io(container_id):
    """
    Network traffic of all network interfaces within the controller.

    :param container_id: The full ID of the docker container.
    :type container_id: ``str``
    :return: Returns the absolute network I/O till container startup, in bytes. The return dict also contains the
        system time.
    :rtype: ``dict``
    """
    c = APIClient()
    command = c.exec_create(container_id, 'ifconfig')
    ifconfig = c.exec_start(command['Id'])
    sys_time = int(time.time() * 1000000000)

    in_bytes = 0
    m = re.findall('RX bytes:(\d+)', str(ifconfig))
    if m:
        for number in m:
            in_bytes += int(number)
    else:
        in_bytes = None

    out_bytes = 0
    m = re.findall('TX bytes:(\d+)', str(ifconfig))
    if m:
        for number in m:
            out_bytes += int(number)
    else:
        out_bytes = None

    return {'NET_in': in_bytes, 'NET_out': out_bytes, 'NET_systime': sys_time}
Example #3
0
def docker_client(environment, version=None, tls_config=None, host=None,
                  tls_version=None):
    """
    Returns a docker-py client configured using environment variables
    according to the same logic as the official Docker client.
    """
    try:
        kwargs = kwargs_from_env(environment=environment, ssl_version=tls_version)
    except TLSParameterError:
        raise UserError(
            "TLS configuration is invalid - make sure your DOCKER_TLS_VERIFY "
            "and DOCKER_CERT_PATH are set correctly.\n"
            "You might need to run `eval \"$(docker-machine env default)\"`")

    if host:
        kwargs['base_url'] = host
    if tls_config:
        kwargs['tls'] = tls_config

    if version:
        kwargs['version'] = version

    timeout = environment.get('COMPOSE_HTTP_TIMEOUT')
    if timeout:
        kwargs['timeout'] = int(timeout)
    else:
        kwargs['timeout'] = HTTP_TIMEOUT

    kwargs['user_agent'] = generate_user_agent()

    client = APIClient(**kwargs)
    client._original_base_url = kwargs.get('base_url')

    return client
Example #4
0
 def _clean_network(self):
     client = Client(base_url=self._docker_host, version="auto", timeout=10)
     networks = client.networks(names=["%s_default" % self._project_name])
     id_removes = [e["Id"] for e in networks]
     for network_id in id_removes:
         client.remove_network(network_id)
         LOG.debug("Remove network id {}".format(network_id))
Example #5
0
def cleanup_host(worker_api, timeout=5):
    """
    Cleanup a container host when use removes the host

    Maybe we will remove the networks?

    :param worker_api: Docker daemon url
    :param timeout: timeout to wait
    :return:
    """
    if not worker_api or not worker_api.startswith("tcp://"):
        logger.error("Invalid worker_api={}".format(worker_api))
        return False
    try:
        client = Client(base_url=worker_api, version="auto", timeout=timeout)
        net_names = [x["Name"] for x in client.networks()]
        for cs_type in CONSENSUS_PLUGINS_FABRIC_V1:
            net_name = CLUSTER_NETWORK + "_{}".format(cs_type)
            if net_name in net_names:
                logger.debug("Remove network {}".format(net_name))
                client.remove_network(net_name)
            else:
                logger.warning("Network {} not exists!".format(net_name))
    except Exception as e:
        logger.error("Exception happens!")
        logger.error(e)
        return False
    return True
Example #6
0
def detect_daemon_type(worker_api, timeout=5):
    """ Try to detect the daemon type

    Only wait for timeout seconds.

    :param worker_api: Docker daemon url
    :param timeout: Time to wait for the response
    :return: host type info
    """
    if not worker_api or not worker_api.startswith("tcp://"):
        return None
    segs = worker_api.split(":")
    if len(segs) != 3:
        logger.error("Invalid daemon url = ", worker_api)
        return None
    try:
        client = Client(base_url=worker_api, version="auto", timeout=timeout)
        info = client.info()
        server_version = info['ServerVersion']
        if not server_version:
            logger.warning("info().ServerVersion cannot be empty")
            return None
        if server_version.startswith('swarm'):
            return WORKER_TYPES[1]
        try:
            if info['Swarm']['Cluster']['ID'] != '':
                return WORKER_TYPES[1]
        except Exception as e:
            logger.debug(e)
        return WORKER_TYPES[0]
    except Exception as e:
        logger.error(e)
        return None
Example #7
0
def StopContainer():
    if GOT_DOCKERPY_API < 2:
        cli = Client()
        cli.stop('suri-buildbot')
    else:
        cli = DockerClient()
        cli.containers.get('suri-buildbot').stop()
    sys.exit(0)
Example #8
0
def StartContainer():
    suri_src_dir = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0]
    print "Using base src dir: " + suri_src_dir
    if GOT_DOCKERPY_API < 2:
        cli = Client()
        cli.start('suri-buildbot', port_bindings={8010:8010, 22:None}, binds={suri_src_dir: { 'bind': '/data/oisf', 'ro': True}, os.path.join(suri_src_dir,'qa','docker','buildbot.cfg'): { 'bind': '/data/buildbot/master/master.cfg', 'ro': True}} )
    else:
        cli = DockerClient()
        cli.containers.get('suri-buildbot').start()
    sys.exit(0)
Example #9
0
def modify_random_containers(client: docker.APIClient, amount: int, action: str = 'stop') -> tp.List[dict]:
    server_containers = client.containers()
    stopped_containers = []
    for _ in range(amount):
        container = random.choice(server_containers)
        if action == 'delete':
            client.remove_container(container, force=True)
        elif action == 'stop':
            client.stop(container)
            stopped_containers.append(container)
        server_containers.remove(container)
    return stopped_containers
Example #10
0
def docker_container_id(container_name):
    """
    Uses the container name to return the container ID.

    :param container_name: The full name of the docker container.
    :type container_name: ``str``
    :return: Returns the container ID or None if the container is not running or could not be found.
    :rtype: ``dict``
    """
    c = APIClient()
    detail = c.inspect_container(container_name)
    if bool(detail["State"]["Running"]):
        return detail['Id']
    return None
    def execute(self, context):
        logging.info('Starting docker container from image ' + self.image)

        tls_config = None
        if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key:
            tls_config = tls.TLSConfig(
                    ca_cert=self.tls_ca_cert,
                    client_cert=(self.tls_client_cert, self.tls_client_key),
                    verify=True,
                    ssl_version=self.tls_ssl_version,
                    assert_hostname=self.tls_hostname
            )
            self.docker_url = self.docker_url.replace('tcp://', 'https://')

        self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config)

        if ':' not in self.image:
            image = self.image + ':latest'
        else:
            image = self.image

        if self.force_pull or len(self.cli.images(name=image)) == 0:
            logging.info('Pulling docker image ' + image)
            for l in self.cli.pull(image, stream=True):
                output = json.loads(l.decode('utf-8'))
                logging.info("{}".format(output['status']))

        cpu_shares = int(round(self.cpus * 1024))

        with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir:
            self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir
            self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                    command=self.get_command(),
                    cpu_shares=cpu_shares,
                    environment=self.environment,
                    host_config=self.cli.create_host_config(
                                                binds=self.volumes,
                                                network_mode=self.network_mode,
                                                auto_remove=self.auto_remove),
                    image=image,
                    mem_limit=self.mem_limit,
                    user=self.user
            )
            self.cli.start(self.container['Id'])

            line = ''
            for line in self.cli.logs(container=self.container['Id'], stream=True):
                line = line.strip()
                if hasattr(line, 'decode'):
                    line = line.decode('utf-8')
                logging.info(line)

            exit_code = self.cli.wait(self.container['Id'])
            if exit_code != 0:
                raise AirflowException('docker container failed')

            if self.xcom_push_flag:
                return self.cli.logs(container=self.container['Id']) if self.xcom_all else str(line)
Example #12
0
    def __init__(self,
                 build_job,
                 repo_path,
                 from_image,
                 copy_code=True,
                 build_steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        self.build_job = build_job
        self.job_uuid = build_job.uuid.hex
        self.job_name = build_job.unique_name
        self.from_image = from_image
        self.image_name = get_image_name(self.build_job)
        self.image_tag = self.job_uuid
        self.folder_name = repo_path.split('/')[-1]
        self.repo_path = repo_path
        self.copy_code = copy_code

        self.build_path = '/'.join(self.repo_path.split('/')[:-1])
        self.build_steps = get_list(build_steps)
        self.env_vars = get_list(env_vars)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = APIClient(version='auto')
        self.registry_host = None
        self.docker_url = None
Example #13
0
    def __init__(self,
                 repo_path,
                 from_image,
                 image_name,
                 image_tag,
                 copy_code=True,
                 in_tmp_repo=True,
                 build_steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        # This will help create a unique tmp folder for dockerizer in case of concurrent jobs
        self.uuid = uuid.uuid4().hex
        self.from_image = from_image
        self.image_name = image_name
        self.image_tag = image_tag
        self.repo_path = repo_path
        self.folder_name = repo_path.split('/')[-1]
        self.copy_code = copy_code
        self.in_tmp_repo = in_tmp_repo
        if in_tmp_repo and copy_code:
            self.build_repo_path = self.create_tmp_repo()
        else:
            self.build_repo_path = self.repo_path

        self.build_path = '/'.join(self.build_repo_path.split('/')[:-1])
        self.build_steps = get_list(build_steps)
        self.env_vars = get_list(env_vars)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = APIClient(version='auto')
        self.registry_host = None
        self.docker_url = None
    def execute(self, context):
        self.log.info('Starting docker container from image %s', self.image)

        tls_config = self.__get_tls_config()

        if self.docker_conn_id:
            self.cli = self.get_hook().get_conn()
        else:
            self.cli = APIClient(
                base_url=self.docker_url,
                version=self.api_version,
                tls=tls_config
            )

        if ':' not in self.image:
            image = self.image + ':latest'
        else:
            image = self.image

        if self.force_pull or len(self.cli.images(name=image)) == 0:
            self.log.info('Pulling docker image %s', image)
            for l in self.cli.pull(image, stream=True):
                output = json.loads(l.decode('utf-8'))
                self.log.info("%s", output['status'])

        cpu_shares = int(round(self.cpus * 1024))

        with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir:
            self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir
            self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                command=self.get_command(),
                cpu_shares=cpu_shares,
                environment=self.environment,
                host_config=self.cli.create_host_config(
                    binds=self.volumes,
                    network_mode=self.network_mode,
                    shm_size=self.shm_size),
                image=image,
                mem_limit=self.mem_limit,
                user=self.user,
                working_dir=self.working_dir
            )
            self.cli.start(self.container['Id'])

            line = ''
            for line in self.cli.logs(container=self.container['Id'], stream=True):
                line = line.strip()
                if hasattr(line, 'decode'):
                    line = line.decode('utf-8')
                self.log.info(line)

            exit_code = self.cli.wait(self.container['Id'])
            if exit_code != 0:
                raise AirflowException('docker container failed')

            if self.xcom_push_flag:
                return self.cli.logs(container=self.container['Id']) \
                    if self.xcom_all else str(line)
Example #15
0
def _clean_chaincode_images(worker_api, name_prefix, timeout=5):
    """ Clean chaincode images, whose name should have cluster id as prefix

    :param worker_api: Docker daemon url
    :param name_prefix: image name prefix
    :param timeout: Time to wait for the response
    :return: None
    """
    logger.debug("clean chaincode images with prefix={}".format(name_prefix))
    client = Client(base_url=worker_api, version="auto", timeout=timeout)
    images = client.images()
    id_removes = [e['Id'] for e in images if e['RepoTags'] and
                  e['RepoTags'][0].startswith(name_prefix)]
    logger.debug("chaincode image id to removes=" + ", ".join(id_removes))
    for _ in id_removes:
        client.remove_image(_, force=True)
Example #16
0
 def __init__(self, config=None):
     if config is None:
         config = self.guess_config()
     if isinstance(config, str):
         self.config = json.load(open(os.path.expanduser(config)))
     else:
         self.config = config
     clouds = list(self.config['clouds'].items())
     if len(clouds) != 1:
         raise RuntimeError("supports only one cloud in config")
     url, self.creds = clouds[0]
     url = urlparse(url)
     base_url = "https://" + url.netloc
     DockerClient.__init__(self, base_url, tls=True)
     self.auth = AWS4Auth(self.creds['accesskey'], self.creds['secretkey'], url.netloc.split(".")[0],
                          'hyper')
     self._version = "1.23"
    def execute(self, context):
        self.log.info('Starting docker container from image %s', self.image)

        tls_config = self.__get_tls_config()

        if self.docker_conn_id:
            self.cli = self.get_hook().get_conn()
        else:
            self.cli = APIClient(
                base_url=self.docker_url,
                version=self.api_version,
                tls=tls_config
            )

        if self.force_pull or len(self.cli.images(name=self.image)) == 0:
            self.log.info('Pulling docker image %s', self.image)
            for l in self.cli.pull(self.image, stream=True):
                output = json.loads(l.decode('utf-8').strip())
                if 'status' in output:
                    self.log.info("%s", output['status'])

        with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir:
            self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir
            self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                command=self.get_command(),
                environment=self.environment,
                host_config=self.cli.create_host_config(
                    auto_remove=self.auto_remove,
                    binds=self.volumes,
                    network_mode=self.network_mode,
                    shm_size=self.shm_size,
                    dns=self.dns,
                    dns_search=self.dns_search,
                    cpu_shares=int(round(self.cpus * 1024)),
                    mem_limit=self.mem_limit),
                image=self.image,
                user=self.user,
                working_dir=self.working_dir
            )
            self.cli.start(self.container['Id'])

            line = ''
            for line in self.cli.logs(container=self.container['Id'], stream=True):
                line = line.strip()
                if hasattr(line, 'decode'):
                    line = line.decode('utf-8')
                self.log.info(line)

            result = self.cli.wait(self.container['Id'])
            if result['StatusCode'] != 0:
                raise AirflowException('docker container failed: ' + repr(result))

            # duplicated conditional logic because of expensive operation
            if self.do_xcom_push:
                return self.cli.logs(container=self.container['Id']) \
                    if self.xcom_all else line.encode('utf-8')
Example #18
0
def _clean_project_networks(worker_api, name_prefix, timeout=5):
    """
    Clean cluster node networks

    All containers with the name prefix will be removed.

    :param worker_api: Docker daemon url
    :param name_prefix: image name prefix
    :param timeout: Time to wait for the response
    :return: None
    """
    logger.debug("Clean project networks, worker_api={}, prefix={}".format(
        worker_api, name_prefix))
    client = Client(base_url=worker_api, version="auto", timeout=timeout)
    networks = client.networks(names=["%s_default" % name_prefix])
    id_removes = [e['Id'] for e in networks]
    for network_id in id_removes:
        client.remove_network(network_id)
        logger.debug("Remove network id {}".format(network_id))
Example #19
0
def start_containers(worker_api, name_prefix, timeout=5):
    """Start containers with given prefix

    The chaincode container usually has name with `name_prefix-` as prefix

    :param worker_api: Docker daemon url
    :param name_prefix: image name prefix
    :param timeout: Time to wait for the response
    :return: None
    """
    logger.debug("Get containers, worker_api={}, prefix={}".format(
        worker_api, name_prefix))
    client = Client(base_url=worker_api, version="auto", timeout=timeout)
    containers = client.containers(all=True)
    id_cc = [e['Id'] for e in containers if
             e['Names'][0].split("/")[-1].startswith(name_prefix)]
    logger.info(id_cc)
    for _ in id_cc:
        client.start(_)
Example #20
0
def get_swarm_node_ip(swarm_url, container_name, timeout=5):
    """
    Detect the host ip where the given container locate in the swarm cluster

    :param swarm_url: Swarm cluster api url
    :param container_name: The container name
    :param timeout: Time to wait for the response
    :return: host ip
    """
    logger.debug("Detect container={} with swarm_url={}".format(
        container_name, swarm_url))
    try:
        client = Client(base_url=swarm_url, version="auto", timeout=timeout)
        info = client.inspect_container(container_name)
        return info['NetworkSettings']['Ports']['5000/tcp'][0]['HostIp']
    except Exception as e:
        logger.error("Exception happens when detect container host!")
        logger.error(e)
        return ''
Example #21
0
def _clean_exited_containers(worker_api):
    """ Clean those containers with exited status

    This is dangerous, as it may delete temporary containers.
    Only trigger this when no one else uses the system.

    :param worker_api: Docker daemon url
    :return: None
    """
    logger.debug("Clean exited containers")
    client = Client(base_url=worker_api, version="auto")
    containers = client.containers(quiet=True, all=True,
                                   filters={"status": "exited"})
    id_removes = [e['Id'] for e in containers]
    for _ in id_removes:
        logger.debug("exited container to remove, id={}", _)
        try:
            client.remove_container(_)
        except Exception as e:
            logger.error("Exception in clean_exited_containers {}".format(e))
Example #22
0
def _clean_project_containers(worker_api, name_prefix, timeout=5):
    """
    Clean cluster node containers and chaincode containers

    All containers with the name prefix will be removed.

    :param worker_api: Docker daemon url
    :param name_prefix: image name prefix
    :param timeout: Time to wait for the response
    :return: None
    """
    logger.debug("Clean project containers, worker_api={}, prefix={}".format(
        worker_api, name_prefix))
    client = Client(base_url=worker_api, version="auto", timeout=timeout)
    containers = client.containers(all=True)
    id_removes = [e['Id'] for e in containers if
                  e['Names'][0].split("/")[-1].startswith(name_prefix)]
    for _ in id_removes:
        client.remove_container(_, force=True)
        logger.debug("Remove container {}".format(_))
Example #23
0
def docker_client(environment, version=None, tls_config=None, host=None,
                  tls_version=None):
    """
    Returns a docker-py client configured using environment variables
    according to the same logic as the official Docker client.
    """
    try:
        kwargs = kwargs_from_env(environment=environment, ssl_version=tls_version)
    except TLSParameterError:
        raise UserError(
            "TLS configuration is invalid - make sure your DOCKER_TLS_VERIFY "
            "and DOCKER_CERT_PATH are set correctly.\n"
            "You might need to run `eval \"$(docker-machine env default)\"`")

    if host:
        kwargs['base_url'] = host
    if tls_config:
        kwargs['tls'] = tls_config

    if version:
        kwargs['version'] = version

    timeout = environment.get('COMPOSE_HTTP_TIMEOUT')
    if timeout:
        kwargs['timeout'] = int(timeout)
    else:
        kwargs['timeout'] = HTTP_TIMEOUT

    kwargs['user_agent'] = generate_user_agent()

    # Workaround for
    # https://pyinstaller.readthedocs.io/en/v3.3.1/runtime-information.html#ld-library-path-libpath-considerations
    if 'LD_LIBRARY_PATH_ORIG' in environment:
        kwargs['credstore_env'] = {
            'LD_LIBRARY_PATH': environment.get('LD_LIBRARY_PATH_ORIG'),
        }

    client = APIClient(**kwargs)
    client._original_base_url = kwargs.get('base_url')

    return client
Example #24
0
def check_daemon(worker_api, timeout=5):
    """ Check if the daemon is active

    Only wait for timeout seconds.

    :param worker_api: Docker daemon url
    :param timeout: Time to wait for the response
    :return: True for active, False for inactive
    """
    if not worker_api or not worker_api.startswith("tcp://"):
        logger.warning("invalid workder_api={}".format(worker_api))
        return False
    segs = worker_api.split(":")
    if len(segs) != 3:
        logger.error("Invalid workder api = ", worker_api)
        return False
    try:
        client = Client(base_url=worker_api, version="auto", timeout=timeout)
        ping_result = client.ping()
        logger.debug("ping_result={}".format(ping_result))
        return ping_result == 'OK' or ping_result is True
    except Exception as e:
        logger.error("Exception in check_daemon {}".format(e))
        return False
Example #25
0
def CreateContainer():
    # FIXME check if existing
    print "Pulling docking image, first run should take long"
    if GOT_DOCKERPY_API < 2:
        cli = Client()
        cli.pull('regit/suri-buildbot')
        cli.create_container(name='suri-buildbot', image='regit/suri-buildbot', ports=[8010, 22], volumes=['/data/oisf', '/data/buildbot/master/master.cfg'])
    else:
        cli = DockerClient()
        cli.images.pull('regit/suri-buildbot')
        suri_src_dir = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0]
        print "Using base src dir: " + suri_src_dir
        cli.containers.create('regit/suri-buildbot', name='suri-buildbot', ports={'8010/tcp': 8010, '22/tcp': None} , volumes={suri_src_dir: { 'bind': '/data/oisf', 'mode': 'ro'}, os.path.join(suri_src_dir,'qa','docker','buildbot.cfg'): { 'bind': '/data/buildbot/master/master.cfg', 'mode': 'ro'}}, detach = True)
    sys.exit(0)
Example #26
0
def RmContainer():
    if GOT_DOCKERPY_API < 2:
        cli = Client()
        try:
            cli.remove_container('suri-buildbot')
        except:
            print "Unable to remove suri-buildbot container"
            pass
        try:
            cli.remove_image('regit/suri-buildbot:latest')
        except:
            print "Unable to remove suri-buildbot images"
            pass
    else:
        cli = DockerClient()
        cli.containers.get('suri-buildbot').remove()
        cli.images.remove('regit/suri-buildbot:latest')
    sys.exit(0)
Example #27
0
def reset_container_host(host_type, worker_api, timeout=15):
    """ Try to detect the daemon type

    Only wait for timeout seconds.

    :param host_type: Type of host: single or swarm
    :param worker_api: Docker daemon url
    :param timeout: Time to wait for the response
    :return: host type info
    """
    try:
        client = Client(base_url=worker_api, version="auto", timeout=timeout)
        containers = client.containers(quiet=True, all=True)
        logger.debug(containers)
        for c in containers:
            client.remove_container(c['Id'], force=True)
        logger.debug("cleaning all containers")
    except Exception as e:
        logger.error("Exception happens when reset host!")
        logger.error(e)
        return False
    try:
        images = client.images(all=True)
        logger.debug(images)
        for i in images:
            if i["RepoTags"][0] == "<none>:<none>":
                logger.debug(i)
                try:
                    client.remove_image(i['Id'])
                except Exception as e:
                    logger.error(e)
                    continue
        logger.debug("cleaning <none> images")
    except Exception as e:
        logger.error("Exception happens when reset host!")
        logger.error(e)
        return False

    return setup_container_host(host_type=host_type, worker_api=worker_api)
Example #28
0
def setup_container_host(host_type, worker_api, timeout=5):
    """
    Setup a container host for deploying cluster on it

    :param host_type: Docker host type
    :param worker_api: Docker daemon url
    :param timeout: timeout to wait
    :return: True or False
    """
    if not worker_api or not worker_api.startswith("tcp://"):
        logger.error("Invalid worker_api={}".format(worker_api))
        return False
    if host_type not in WORKER_TYPES:
        logger.error("Invalid host_type={}".format(host_type))
        return False
    try:
        client = Client(base_url=worker_api, version="auto", timeout=timeout)
        net_names = [x["Name"] for x in client.networks()]
        for cs_type in CONSENSUS_PLUGINS_FABRIC_V1:
            net_name = CLUSTER_NETWORK + "_{}".format(cs_type)
            if net_name in net_names:
                logger.warning("Network {} already exists, use it!".format(
                    net_name))
            else:
                if host_type == WORKER_TYPES[0]:  # single
                    client.create_network(net_name, driver='bridge')
                elif host_type == WORKER_TYPES[1]:  # swarm
                    client.create_network(net_name, driver='overlay')
                else:
                    logger.error("No-supported host_type={}".format(host_type))
                    return False
    except Exception as e:
        logger.error("Exception happens!")
        logger.error(e)
        return False
    return True
Example #29
0
def start_containers(client: docker.APIClient):
    configs = tables('docker').data
    images = ['ubuntu', 'alpine', 'nginx']
    ports_delta = 1
    for image in images:
        base_config = {
            "image": image,
            "command": "sleep 1d",
            "detach": True}
        for conf in configs:
            if conf.startswith('vol'):
                if conf == 'vol1' and image != 'alpine':
                    container = client.create_container(
                        host_config=client.create_host_config(binds=configs[conf]),
                        image=image, command=COMMAND, detach=True)
                else:
                    container = client.create_container(
                        host_config=client.create_host_config(binds=configs[conf]),
                        **base_config)

            elif conf.startswith('ports'):
                ports = {}
                for p in range(configs[conf]):
                    ports.update({9980 + ports_delta: 9980 + ports_delta})
                    ports.update({str(9981 + ports_delta) + '/udp': 9985 + ports_delta})
                    ports_delta += 1
                container = client.create_container(
                    host_config=client.create_host_config(port_bindings=ports),
                    ports=[*ports],
                    **base_config)

            elif conf.startswith('labels'):
                container = client.create_container(
                    labels=configs[conf],
                    **base_config)

            elif conf == 'privileged':
                container = client.create_container(
                    host_config=client.create_host_config(privileged=configs[conf]),
                    **base_config)
            else:
                entry_config = copy.copy(base_config)
                entry_config.pop('command')
                container = client.create_container(
                    entrypoint=configs[conf],
                    **entry_config)
            client.start(container)
Example #30
0
class ModifiedDockerOperator(DockerOperator):
    """ModifiedDockerOperator supports host temporary directories on OSX.

    Incorporates https://github.com/apache/airflow/pull/4315/ and an implementation of
    https://issues.apache.org/jira/browse/AIRFLOW-3825.

    :param host_tmp_dir: Specify the location of the temporary directory on the host which will
        be mapped to tmp_dir. If not provided defaults to using the standard system temp directory.
    :type host_tmp_dir: str
    """

    def __init__(self, host_tmp_dir='/tmp', **kwargs):
        self.host_tmp_dir = host_tmp_dir
        kwargs['xcom_push'] = True
        super(ModifiedDockerOperator, self).__init__(**kwargs)

    @contextmanager
    def get_host_tmp_dir(self):
        '''Abstracts the tempdir context manager so that this can be overridden.'''
        with TemporaryDirectory(prefix='airflowtmp', dir=self.host_tmp_dir) as tmp_dir:
            yield tmp_dir

    def execute(self, context):
        '''Modified only to use the get_host_tmp_dir helper.'''
        self.log.info('Starting docker container from image %s', self.image)

        tls_config = self.__get_tls_config()
        if self.docker_conn_id:
            self.cli = self.get_hook().get_conn()
        else:
            self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config)

        if self.force_pull or len(self.cli.images(name=self.image)) == 0:
            self.log.info('Pulling docker image %s', self.image)
            for l in self.cli.pull(self.image, stream=True):
                output = json.loads(l.decode('utf-8').strip())
                if 'status' in output:
                    self.log.info("%s", output['status'])

        with self.get_host_tmp_dir() as host_tmp_dir:
            self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir
            self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                command=self.get_command(),
                environment=self.environment,
                host_config=self.cli.create_host_config(
                    auto_remove=self.auto_remove,
                    binds=self.volumes,
                    network_mode=self.network_mode,
                    shm_size=self.shm_size,
                    dns=self.dns,
                    dns_search=self.dns_search,
                    cpu_shares=int(round(self.cpus * 1024)),
                    mem_limit=self.mem_limit,
                ),
                image=self.image,
                user=self.user,
                working_dir=self.working_dir,
            )
            self.cli.start(self.container['Id'])

            res = []
            line = ''
            for new_line in self.cli.logs(container=self.container['Id'], stream=True):
                line = new_line.strip()
                if hasattr(line, 'decode'):
                    line = line.decode('utf-8')
                self.log.info(line)
                res.append(line)

            result = self.cli.wait(self.container['Id'])
            if result['StatusCode'] != 0:
                raise AirflowException('docker container failed: ' + repr(result))

            if self.xcom_push_flag:
                # Try to avoid any kind of race condition?
                return '\n'.join(res) + '\n' if self.xcom_all else str(line)

    # This is a class-private name on DockerOperator for no good reason --
    # all that the status quo does is inhibit extension of the class.
    # See https://issues.apache.org/jira/browse/AIRFLOW-3880
    def __get_tls_config(self):
        # pylint: disable=no-member
        return super(ModifiedDockerOperator, self)._DockerOperator__get_tls_config()
Example #31
0
def docker():
    return APIClient(version='auto')
 def setup_class(cls):
     # Build the image from scratch
     cls.client = APIClient(base_url="unix://var/run/docker.sock")
     response = cls.client.build(path=REPO_DIR,
                                 dockerfile=os.path.join(
                                     "dockerfiles", "Dockerfile"),
                                 tag=IMAGE_NAME,
                                 rm=True,
                                 nocache=True,
                                 pull=True,
                                 stream=False)
     for line in response:
         print line,
     # Create a network for both the controller + server to run in
     cls.client.create_network(NETWORK_NAME,
                               driver="bridge",
                               attachable=True)
     networking_config = cls.client.create_networking_config(
         {NETWORK_NAME: cls.client.create_endpoint_config()})
     # Start the controller
     print 'creating controller'
     host_config = cls.client.create_host_config(auto_remove=True,
                                                 publish_all_ports=True)
     container = cls.client.create_container(
         image=IMAGE_NAME,
         command="controller",
         host_config=host_config,
         networking_config=networking_config)
     cls.client.start(container["Id"])
     TestEventgenOrchestration.controller_id = container["Id"]
     print container["Id"]
     cls.controller_container = cls.client.inspect_container(
         container["Id"])
     cls.controller_eventgen_webport = cls.controller_container[
         "NetworkSettings"]["Ports"]["9500/tcp"][0]["HostPort"]
     cls.controller_rabbitmq_webport = cls.controller_container[
         "NetworkSettings"]["Ports"]["15672/tcp"][0]["HostPort"]
     # Start the server
     print 'creating server'
     container = cls.client.create_container(
         image=IMAGE_NAME,
         command="server",
         environment=[
             "EVENTGEN_AMQP_HOST={}".format(
                 cls.controller_container["Id"][:12])
         ],
         host_config=host_config,
         networking_config=networking_config)
     cls.client.start(container["Id"])
     TestEventgenOrchestration.server_id = container["Id"]
     print container["Id"]
     cls.server_container = cls.client.inspect_container(container["Id"])
     cls.server_eventgen_webport = cls.server_container["NetworkSettings"][
         "Ports"]["9500/tcp"][0]["HostPort"]
     cls.server_rabbitmq_webport = cls.server_container["NetworkSettings"][
         "Ports"]["15672/tcp"][0]["HostPort"]
     # Wait for the controller to be available
     wait_for_response("http://127.0.0.1:{}".format(
         cls.controller_eventgen_webport))
     # Wait for the server to be available
     wait_for_response("http://127.0.0.1:{}".format(
         cls.server_eventgen_webport))
Example #33
0
 def __init__(self):
     self.client = APIClient('unix://var/run/docker.sock')
     self.filtered_statuses = ('running', 'restarting', 'paused', 'exited')
     self.config = Config()
Example #34
0
class DockerOperator(BaseOperator):
    """
    Execute a command inside a docker container.

    A temporary directory is created on the host and
    mounted into a container to allow storing files
    that together exceed the default disk size of 10GB in a container.
    The path to the mounted directory can be accessed
    via the environment variable ``AIRFLOW_TMP_DIR``.

    If a login to a private registry is required prior to pulling the image, a
    Docker connection needs to be configured in Airflow and the connection ID
    be provided with the parameter ``docker_conn_id``.

    :param image: Docker image from which to create the container.
        If image tag is omitted, "latest" will be used.
    :type image: str
    :param api_version: Remote API version. Set to ``auto`` to automatically
        detect the server's version.
    :type api_version: str
    :param command: Command to be run in the container. (templated)
    :type command: str or list
    :param container_name: Name of the container.
    :type container_name: str
    :param cpus: Number of CPUs to assign to the container.
        This value gets multiplied with 1024. See
        https://docs.docker.com/engine/reference/run/#cpu-share-constraint
    :type cpus: float
    :param docker_url: URL of the host running the docker daemon.
        Default is unix://var/run/docker.sock
    :type docker_url: str
    :param environment: Environment variables to set in the container. (templated)
    :type environment: dict
    :param force_pull: Pull the docker image on every run. Default is False.
    :type force_pull: bool
    :param mem_limit: Maximum amount of memory the container can use.
        Either a float value, which represents the limit in bytes,
        or a string like ``128m`` or ``1g``.
    :type mem_limit: float or str
    :param host_tmp_dir: Specify the location of the temporary directory on the host which will
        be mapped to tmp_dir. If not provided defaults to using the standard system temp directory.
    :type host_tmp_dir: str
    :param network_mode: Network mode for the container.
    :type network_mode: str
    :param tls_ca_cert: Path to a PEM-encoded certificate authority
        to secure the docker connection.
    :type tls_ca_cert: str
    :param tls_client_cert: Path to the PEM-encoded certificate
        used to authenticate docker client.
    :type tls_client_cert: str
    :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client.
    :type tls_client_key: str
    :param tls_hostname: Hostname to match against
        the docker server certificate or False to disable the check.
    :type tls_hostname: str or bool
    :param tls_ssl_version: Version of SSL to use when communicating with docker daemon.
    :type tls_ssl_version: str
    :param tmp_dir: Mount point inside the container to
        a temporary directory created on the host by the operator.
        The path is also made available via the environment variable
        ``AIRFLOW_TMP_DIR`` inside the container.
    :type tmp_dir: str
    :param user: Default user inside the docker container.
    :type user: int or str
    :param volumes: List of volumes to mount into the container, e.g.
        ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``.
    :type volumes: list
    :param working_dir: Working directory to
        set on the container (equivalent to the -w switch the docker client)
    :type working_dir: str
    :param xcom_all: Push all the stdout or just the last line.
        The default is False (last line).
    :type xcom_all: bool
    :param docker_conn_id: ID of the Airflow connection to use
    :type docker_conn_id: str
    :param dns: Docker custom DNS servers
    :type dns: list[str]
    :param dns_search: Docker custom DNS search domain
    :type dns_search: list[str]
    :param auto_remove: Auto-removal of the container on daemon side when the
        container's process exits.
        The default is False.
    :type auto_remove: bool
    :param shm_size: Size of ``/dev/shm`` in bytes. The size must be
        greater than 0. If omitted uses system default.
    :type shm_size: int
    """
    template_fields = ('command', 'environment',)
    template_ext = ('.sh', '.bash',)

    @apply_defaults
    def __init__(
            self,
            image: str,
            api_version: str = None,
            command: Union[str, List[str]] = None,
            container_name: str = None,
            cpus: float = 1.0,
            docker_url: str = 'unix://var/run/docker.sock',
            environment: Dict = None,
            force_pull: bool = False,
            mem_limit: Union[float, str] = None,
            host_tmp_dir: str = None,
            network_mode: str = None,
            tls_ca_cert: str = None,
            tls_client_cert: str = None,
            tls_client_key: str = None,
            tls_hostname: Union[str, bool] = None,
            tls_ssl_version: str = None,
            tmp_dir: str = '/tmp/airflow',
            user: Union[str, int] = None,
            volumes: Iterable[str] = None,
            working_dir: str = None,
            xcom_all: bool = False,
            docker_conn_id: str = None,
            dns: List[str] = None,
            dns_search: List[str] = None,
            auto_remove: bool = False,
            shm_size: int = None,
            *args,
            **kwargs) -> None:

        super().__init__(*args, **kwargs)
        self.api_version = api_version
        self.auto_remove = auto_remove
        self.command = command
        self.container_name = container_name
        self.cpus = cpus
        self.dns = dns
        self.dns_search = dns_search
        self.docker_url = docker_url
        self.environment = environment or {}
        self.force_pull = force_pull
        self.image = image
        self.mem_limit = mem_limit
        self.host_tmp_dir = host_tmp_dir
        self.network_mode = network_mode
        self.tls_ca_cert = tls_ca_cert
        self.tls_client_cert = tls_client_cert
        self.tls_client_key = tls_client_key
        self.tls_hostname = tls_hostname
        self.tls_ssl_version = tls_ssl_version
        self.tmp_dir = tmp_dir
        self.user = user
        self.volumes = volumes or []
        self.working_dir = working_dir
        self.xcom_all = xcom_all
        self.docker_conn_id = docker_conn_id
        self.shm_size = shm_size
        if kwargs.get('xcom_push') is not None:
            raise AirflowException("'xcom_push' was deprecated, use 'BaseOperator.do_xcom_push' instead")

        self.cli = None
        self.container = None

    def get_hook(self):
        return DockerHook(
            docker_conn_id=self.docker_conn_id,
            base_url=self.docker_url,
            version=self.api_version,
            tls=self.__get_tls_config()
        )

    def execute(self, context):
        self.log.info('Starting docker container from image %s', self.image)

        tls_config = self.__get_tls_config()

        if self.docker_conn_id:
            self.cli = self.get_hook().get_conn()
        else:
            self.cli = APIClient(
                base_url=self.docker_url,
                version=self.api_version,
                tls=tls_config
            )

        if self.force_pull or len(self.cli.images(name=self.image)) == 0:
            self.log.info('Pulling docker image %s', self.image)
            for l in self.cli.pull(self.image, stream=True):
                output = json.loads(l.decode('utf-8').strip())
                if 'status' in output:
                    self.log.info("%s", output['status'])

        with TemporaryDirectory(prefix='airflowtmp', dir=self.host_tmp_dir) as host_tmp_dir:
            self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir
            self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                command=self.get_command(),
                name=self.container_name,
                environment=self.environment,
                host_config=self.cli.create_host_config(
                    auto_remove=self.auto_remove,
                    binds=self.volumes,
                    network_mode=self.network_mode,
                    shm_size=self.shm_size,
                    dns=self.dns,
                    dns_search=self.dns_search,
                    cpu_shares=int(round(self.cpus * 1024)),
                    mem_limit=self.mem_limit),
                image=self.image,
                user=self.user,
                working_dir=self.working_dir
            )
            self.cli.start(self.container['Id'])

            line = ''
            for line in self.cli.attach(container=self.container['Id'],
                                        stdout=True,
                                        stderr=True,
                                        stream=True):
                line = line.strip()
                if hasattr(line, 'decode'):
                    line = line.decode('utf-8')
                self.log.info(line)

            result = self.cli.wait(self.container['Id'])
            if result['StatusCode'] != 0:
                raise AirflowException('docker container failed: ' + repr(result))

            # duplicated conditional logic because of expensive operation
            if self.do_xcom_push:
                return self.cli.logs(container=self.container['Id']) \
                    if self.xcom_all else line.encode('utf-8')

    def get_command(self):
        if isinstance(self.command, str) and self.command.strip().find('[') == 0:
            commands = ast.literal_eval(self.command)
        else:
            commands = self.command
        return commands

    def on_kill(self):
        if self.cli is not None:
            self.log.info('Stopping docker container')
            self.cli.stop(self.container['Id'])

    def __get_tls_config(self):
        tls_config = None
        if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key:
            tls_config = tls.TLSConfig(
                ca_cert=self.tls_ca_cert,
                client_cert=(self.tls_client_cert, self.tls_client_key),
                verify=True,
                ssl_version=self.tls_ssl_version,
                assert_hostname=self.tls_hostname
            )
            self.docker_url = self.docker_url.replace('tcp://', 'https://')
        return tls_config
Example #35
0
class Portal(object):

    def __init__(self):
        self._docker_client = APIClient()
        self._kill_now = False
        self._container_id = None
        self._std_in = None

        signal.signal(signal.SIGINT, self._exit_gracefully)
        signal.signal(signal.SIGTERM, self._exit_gracefully)

    def _cleanup(self):
        self._kill_now = True

        if (self._container_id is not None):
            self._docker_client.stop(self._container_id)
            self._docker_client.remove_container(self._container_id, v=True, force=True)

    def _exit_gracefully(self, signum, frame):
        self._cleanup()

    # Bad code to capture whether stdin is set or not
    def _capture_stdin(self):
        if select.select([sys.stdin, ], [], [], 0.0)[0]:
            self._std_in = sys.stdin.buffer.read()
        elif not sys.stdin.isatty():
            self._std_in = sys.stdin.buffer.read()
    
    def _download_docker_image(self, command, docker_spec):
        docker_image_name = None

        if (docker_spec['image'] == 'Dockerfile'):
            docker_image_name = "portal/" + command
            try:
                image_data = self._docker_client.inspect_image(docker_image_name)
                return image_data
            except ImageNotFound:
                dockerfile = pkgutil.get_data(
                    __name__, "commands/%s/Dockerfile" %  command ).decode('utf-8')
                f = BytesIO(dockerfile.encode('utf-8'))
                for progress_dict in self._docker_client.build(fileobj=f, quiet=True, tag=docker_image_name, decode=True, rm=True):
                    print(progress_dict)
                # if ('progress' in progress_dict):
                #     print(progress_dict['progress'])
        else:
            docker_image_name = docker_spec['image']
            try:
                image_data = self._docker_client.inspect_image(docker_image_name)
                return image_data
            except ImageNotFound:
                print('Pulling Docker Image...')
                for progress_dict in self._docker_client.pull(docker_spec['image'], stream=True, decode=True):
                    print(progress_dict['status'])
                    if ('progress' in progress_dict):
                        print(progress_dict['progress'])
        return self._docker_client.inspect_image(docker_image_name)

    def _parse_args(self, spec_data, argv):
        parser = generate_argparse(spec_data['command'], spec_data['arguments'])
        cmd_options = vars(parser.parse_args(argv))
        cmd_args = cmd_options['cmdargs']
        for argkey in spec_data['arguments'].keys():
            if (spec_data['arguments'][argkey]['shorthand'] == '*'):
                if (len(cmd_args) > 0):
                    spec_data['arguments'][argkey]['value'] = cmd_args[0]
                    if ('File' in spec_data['arguments'][argkey]['docker']):
                        cmd_args = [os.path.join(spec_data['docker']['working_dir'], cmd_options['cmdargs'][0])]
                continue
            spec_data['arguments'][argkey]['value'] = cmd_options[spec_data['arguments']
                                                                  [argkey]['shorthand']]
                                    
        cmd_args += merge_passthrough_vars(spec_data)
        return spec_data, cmd_args

    def _validate_spec(self, spec_data):
        for _, vargs in spec_data['arguments'].items():
            if (vargs['argType'] == 'path' and vargs['docker'] == 'volumeBinding'):
                # Check if path exists
                # if (not os.path.isfile(vargs['value'])): #TODO: Fix!
                #     print('Error: Path %s does not exist!' % vargs['value'])
                #     exit(101)
                pass

    def _create_container(self, cinfo, attach_stdin):
        host_config = self._docker_client.create_host_config(
            port_bindings=cinfo.port_bindings,
            binds=cinfo.vol_bindings
        )
        return self._docker_client.create_container(
            cinfo.container_id,
            command=cinfo.command,
            ports=cinfo.ports,
            environment=cinfo.environment_vars,
            stdin_open=attach_stdin,
            volumes=cinfo.volumes,
            # tty=True,
            host_config=host_config
        )

    def _copy_artefacts_to_container(self, container_id, command_spec):
        def copy_file(input_path, input_name,  output_path):
            tar_name = str(uuid.uuid4()) + '.tar'
            tf = tarfile.open(tar_name, mode='w')

            if (os.path.isfile(input_path)):
                tf.add(input_path, arcname=input_name)
            else:
                print("Could not find file %s " % input_path)
                tf.close()
                os.remove(tar_name)
                return False

            tf.close()
            with open(tar_name, 'rb') as tar_file:
                data = tar_file.read()
                self._docker_client.put_archive(container_id, output_path, data)

            os.remove(tar_name)


        for file in get_input_files(command_spec):
            copy_file(file['value'], file['value'], command_spec['docker']['working_dir'])

        home = str(Path.home())
        for file in get_input_env_files(command_spec):
            copy_file(os.path.join(home, file['name']), file['name'], '/root')
        return True


    def _copy_artefacts_from_container(self, container_id, command_spec):
        def copy_file(input_file, output_path):
            tar_name = str(uuid.uuid4()) + '.tar'
            f = open(tar_name, 'wb')
            bits, _ = self._docker_client.get_archive(
                container_id, input_file)
            for chunk in bits:
                f.write(chunk)
            f.close()

            tar = tarfile.open(tar_name)
            tar.extractall()
            tar.close()
            os.remove(tar_name)

        for file in get_output_files(command_spec):
            copy_file(os.path.join(command_spec['docker']['working_dir'], file['value']), None)

        for file in get_output_env_files(command_spec):
            copy_file(os.path.join('/root/', file['name']), None)

    def run_command(self, command, argv):

        command_spec = None
        try:
            spec_data = pkgutil.get_data(
                __name__, "commands/%s/spec.toml" %  command ).decode('utf-8')
            command_spec = toml.loads(spec_data)
        except FileNotFoundError:
            print('Command not found')
            return 101

        self._capture_stdin()

        command_spec, cmd_argv = self._parse_args(command_spec, argv)
        self._validate_spec(command_spec)
        
        image_info = self._download_docker_image(command, command_spec['docker'])
        cinfo = construct_container(image_info, cmd_argv, command_spec)

        docker_container = self._create_container(cinfo, (self._std_in is not None))
        if (len(docker_container.get('Warnings')) > 0):
            print('Could not start container. Warnings: %s',
                  ' '.join(docker_container.get('Warnings')))
            return 101

        self._container_id = docker_container.get('Id')
        print('Process created in container: %s' % self._container_id)

        if (not self._copy_artefacts_to_container(self._container_id, command_spec)):
            self._cleanup()
            return 101

        
        
        if (self._std_in is not None):
            s = self._docker_client.attach_socket(self._container_id, params={'stdin': 1, 'stream': 1})
            os.write(s.fileno(), self._std_in)
            # s._sock.sendall(self._std_in)
            s.close()
        
        ## Attaching stdin
        self._docker_client.start(container=self._container_id)

        for log in self._docker_client.logs(
            container=self._container_id, stream=True, follow=True):
            sys.stdout.buffer.write(log)
        self._docker_client.wait(container=self._container_id)

        self._copy_artefacts_from_container(self._container_id, command_spec)

        self._docker_client.remove_container(container=self._container_id)

        return 0
Example #36
0
class DockerNode(CommonNode):
    """
    An instance of this class will create a detached Docker container.

    This node binds the ``shared_dir_mount`` directory of the container to a
    local path in the host system defined in ``self.shared_dir``.

    :param str identifier: Node unique identifier in the topology being built.
    :param str image: The image to run on this node, in the
     form ``repository:tag``.
    :param str registry: Docker registry to pull image from.
    :param str command: The command to run when the container is brought up.
    :param str binds: Directories to bind for this container separated by a
     ``;`` in the form:

     ::

        '/tmp:/tmp;/dev/log:/dev/log;/sys/fs/cgroup:/sys/fs/cgroup'

    :param str network_mode: Network mode for this container.
    :param str hostname: Container hostname.
    :param environment: Environment variables to pass to the
     container. They can be set as a list of strings in the following format:

     ::

        ['environment_variable=value']

     or as a dictionary in the following format:

     ::

        {'environment_variable': 'value'}

    :type environment: list or dict
    :param bool privileged: Run container in privileged mode or not.
    :param bool tty: Whether to allocate a TTY or not to the process.
    :param str shared_dir_base: Base path in the host where the shared
     directory will be created. The shared directory will always have the name
     of the container inside this directory.
    :param str shared_dir_mount: Mount point of the shared directory in the
     container.
    :param dict create_host_config_kwargs: Extra kwargs arguments to pass to
     docker-py's ``create_host_config()`` low-level API call.
    :param dict create_container_kwargs: Extra kwargs arguments to pass to
     docker-py's ``create_container()`` low-level API call.

    Read only public attributes:

    :var str image: Name of the Docker image being used by this node.
     Same as the ``image`` keyword argument.
    :var str container_id: Unique container identifier assigned by the Docker
     daemon in the form of a hash.
    :var str container_name: Unique container name assigned by the framework in
     the form ``{identifier}_{pid}_{timestamp}``.
    :var str shared_dir: Share directory in the host for this container. Always
     ``/tmp/topology/{container_name}``.
    :var str shared_dir_mount: Directory inside the container where the
     ``shared_dir`` is mounted. Same as the ``shared_dir_mount`` keyword

    .. automethod:: _get_network_config
    """
    @abstractmethod
    def __init__(self,
                 identifier,
                 image='ubuntu:latest',
                 registry=None,
                 command='bash',
                 binds=None,
                 network_mode='none',
                 hostname=None,
                 environment=None,
                 privileged=True,
                 tty=True,
                 shared_dir_base='/tmp/topology/docker/',
                 shared_dir_mount='/var/topology',
                 create_host_config_kwargs=None,
                 create_container_kwargs=None,
                 **kwargs):

        super(DockerNode, self).__init__(identifier, **kwargs)

        self._pid = None
        self._image = image
        self._registry = registry
        self._command = command
        self._hostname = hostname
        self._environment = environment
        self._client = APIClient(version='auto')

        self._container_name = '{identifier}_{pid}_{timestamp}'.format(
            identifier=identifier,
            pid=getpid(),
            timestamp=datetime.now().isoformat().replace(':', '-'))
        self._shared_dir_base = shared_dir_base
        self._shared_dir_mount = shared_dir_mount
        self._shared_dir = join(shared_dir_base, self._container_name)

        self._create_host_config_kwargs = create_host_config_kwargs or {}
        self._create_container_kwargs = create_container_kwargs or {}

        # Autopull docker image if necessary
        self._autopull()

        # Create shared directory
        ensure_dir(self._shared_dir)

        # Add binded directories
        container_binds = [
            '{}:{}'.format(self._shared_dir, self._shared_dir_mount)
        ]
        if binds is not None:
            container_binds.extend(binds.split(';'))

        # Create host config
        create_host_config_call = {
            'privileged': privileged,
            'network_mode': network_mode,
            'binds': container_binds,
            'init': True
        }
        create_host_config_call.update(self._create_host_config_kwargs)

        self._host_config = self._client.create_host_config(
            **create_host_config_call)

        # Create container
        create_container_call = {
            'image': self._image,
            'command': self._command,
            'name': self._container_name,
            'detach': True,
            'tty': tty,
            'hostname': self._hostname,
            'host_config': self._host_config,
            'environment': self._environment,
        }
        create_container_call.update(self._create_container_kwargs)

        self._container_id = self._client.create_container(
            **create_container_call)['Id']

    @property
    def image(self):
        return self._image

    @property
    def container_id(self):
        return self._container_id

    @property
    def container_name(self):
        return self._container_name

    @property
    def shared_dir(self):
        return self._shared_dir

    @property
    def shared_dir_mount(self):
        return self._shared_dir_mount

    def _get_network_config(self):
        """
        Defines the network configuration for nodes of this type.

        This method should be overriden when implementing a new node type to
        return a dictionary with its network configuration by setting the
        following components:

        'mapping'
            This is a dictionary of dictionaries, each parent-level key defines
            one network category, and each category *must* have these three
            keys: **netns**, **managed_by**, and **prefix**, and *can*
            (optionally) have a **connect_to** key).

            'netns'
                Specifies the network namespace (inside the docker container)
                where all the ports belonging to this category will be moved
                after their creation. If set to None, then the ports will
                remain in the container's default network namespace.

            'managed_by'
                Specifies who will manage different aspects of this network
                category depending on its value (which can be either **docker**
                or **platform**).

                'docker'
                    This network category will represent a network created by
                    docker (identical to using the docker network create
                    command) and will be visible to docker (right now all
                    docker-managed networks are created using docker's 'bridge'
                    built-in network plugin, this will likely change in the
                    near future).

                'platform'
                    This network category will represent ports created by the
                    Docker Platform Engine and is invisible to docker.

            'prefix'
                Defines a prefix that will be used when a port/interface is
                moved into a namespace, its value can be set to '' (empty
                string) if no prefix is needed. In cases where the parent
                network category doesn't have a netns (i.e. 'netns' is set to
                None) this value will be ignored.

            'connect_to'
                Specifies a Docker network this category will be connected to,
                if this network doesn't exists it will be created. If set to
                None, this category will be connected to a uniquely named
                Docker network that will be created by the platform.

        'default_category'
            Every port that didn't explicitly set its category (using the
            "category" attribute in the SZN definition) will be set to this
            category.

        This is an example of a network configuration dictionary as expected to
        be returned by this funcition::

            {
                'default_category': 'front_panel',
                'mapping': {
                    'oobm': {
                        'netns': 'oobmns',
                        'managed_by': 'docker',
                        'connect_to': 'oobm'
                        'prefix': ''
                    },
                    'back_panel': {
                        'netns': None,
                        'managed_by': 'docker',
                        'prefix': ''
                    },
                    'front_panel': {
                        'netns': 'front',
                        'managed_by': 'platform',
                        'prefix': 'f_'
                    }
                }
            }

        :returns: The dictionary defining the network configuration.
        :rtype: dict
        """
        return {
            'default_category': 'front_panel',
            'mapping': {
                'oobm': {
                    'netns': None,
                    'managed_by': 'docker',
                    'prefix': ''
                },
                'front_panel': {
                    'netns': 'front_panel',
                    'managed_by': 'platform',
                    'prefix': ''
                }
            }
        }

    def _autopull(self):
        """
        Autopulls the docker image of the node, if necessary.
        """
        # Search for image in available images
        for tags in [img['RepoTags'] for img in self._client.images()]:
            # Docker py can return repo tags as None
            if tags and self._image in tags:
                return

        # Determine image parts
        registry = self._registry
        image = self._image
        tag = 'latest'

        if ':' in image:
            image, tag = image.split(':')

        # Pull image
        pull_uri = image
        if registry:
            pull_uri = '{}/{}'.format(registry, image)
        pull_name = '{}:{}'.format(pull_uri, tag)

        log.info('Trying to pull image {} ...'.format(pull_name))

        last = ''
        for line in self._client.pull(pull_uri, tag=tag, stream=True):
            last = line
        status = loads(last.decode('utf8'))

        log.debug('Pulling result :: {}'.format(status))

        if 'error' in status:
            raise Exception(status['error'])

        # Retag if required
        if pull_name != self._image:
            if not self._client.tag(pull_name, image, tag):
                raise Exception(
                    'Error when tagging image {} with tag {}:{}'.format(
                        pull_name, image, tag))

            log.info('Tagged image {} with tag {}:{}'.format(
                pull_name, image, tag))

    def _docker_exec(self, command):
        """
        Execute a command inside the docker.

        :param str command: The command to execute.
        """
        log.debug('[{}]._docker_exec(\'{}\') ::'.format(
            self._container_id, command))

        response = check_output(
            shsplit('docker exec {container_id} {command}'.format(
                container_id=self._container_id,
                command=command.strip()))).decode('utf8')

        log.debug(response)
        return response

    def _get_services_address(self):
        """
        Get the service address of the node using Docker's inspect mechanism
        to grab OOBM interface address.

        :return: The address (IP or FQDN) of the services interface (oobm).
        :rtype: str
        """
        network_name = self._container_name + '_oobm'
        address = self._client.inspect_container(
            self.container_id
        )['NetworkSettings']['Networks'][network_name]['IPAddress']
        return address

    def notify_add_biport(self, node, biport):
        """
        Get notified that a new biport was added to this engine node.

        :param node: The specification node that spawn this engine node.
        :type node: pynml.nml.Node
        :param biport: The specification bidirectional port added.
        :type biport: pynml.nml.BidirectionalPort
        :rtype: str
        :return: The assigned interface name of the port.
        """

        network_config = self._get_network_config()

        category = biport.metadata.get('category',
                                       network_config['default_category'])
        category_config = network_config['mapping'][category]

        if category_config['managed_by'] == 'docker':
            netname = category_config.get(
                'connect_to', '{}_{}'.format(self._container_name, category))
            return get_iface_name(self, netname)
        else:
            return biport.metadata.get('label', biport.identifier)

    def notify_add_bilink(self, nodeport, bilink):
        """
        Get notified that a new bilink was added to a port of this engine node.

        :param nodeport: A tuple with the specification node and port being
         linked.
        :type nodeport: (pynml.nml.Node, pynml.nml.BidirectionalPort)
        :param bilink: The specification bidirectional link added.
        :type bilink: pynml.nml.BidirectionalLink
        """

    def notify_post_build(self):
        """
        Get notified that the post build stage of the topology build was
        reached.
        """
        # Log container data
        image_data = self._client.inspect_image(image=self._image)
        log.info('Started container {}:\n'
                 '    Image name: {}\n'
                 '    Image id: {}\n'
                 '    Image creation date: {}'
                 '    Image tags: {}'.format(
                     self._container_name, self._image,
                     image_data.get('Id', '????'),
                     image_data.get('Created', '????'),
                     ', '.join(image_data.get('RepoTags', []))))
        container_data = self._client.inspect_container(
            container=self._container_id)
        log.debug(container_data)

    def start(self):
        """
        Start the docker node and configures a netns for it.
        """
        self._client.start(self._container_id)
        self._pid = self._client.inspect_container(
            self._container_id)['State']['Pid']

    def stop(self):
        """
        Request container to stop.
        """
        self._client.stop(self._container_id)
        self._client.wait(self._container_id)
        self._client.remove_container(self._container_id)

    def disable(self):
        """
        Disable the node.

        In Docker implementation this pauses the container.
        """
        for portlbl in self.ports:
            self.set_port_state(portlbl, False)
        self._client.pause(self._container_id)

    def enable(self):
        """
        Enable the node.

        In Docker implementation this unpauses the container.
        """
        self._client.unpause(self._container_id)
        for portlbl in self.ports:
            self.set_port_state(portlbl, True)

    def set_port_state(self, portlbl, state):
        """
        Set the given port label to the given state.

        :param str portlbl: The label of the port.
        :param bool state: True for up, False for down.
        """
        iface = self.ports[portlbl]
        state = 'up' if state else 'down'

        command = ('ip netns exec front_panel '
                   'ip link set dev {iface} {state}'.format(**locals()))
        self._docker_exec(command)
Example #37
0
    def __init__(self,
                 identifier,
                 image='ubuntu:latest',
                 registry=None,
                 command='bash',
                 binds=None,
                 network_mode='none',
                 hostname=None,
                 environment=None,
                 privileged=True,
                 tty=True,
                 shared_dir_base='/tmp/topology/docker/',
                 shared_dir_mount='/var/topology',
                 create_host_config_kwargs=None,
                 create_container_kwargs=None,
                 **kwargs):

        super(DockerNode, self).__init__(identifier, **kwargs)

        self._pid = None
        self._image = image
        self._registry = registry
        self._command = command
        self._hostname = hostname
        self._environment = environment
        self._client = APIClient(version='auto')

        self._container_name = '{identifier}_{pid}_{timestamp}'.format(
            identifier=identifier,
            pid=getpid(),
            timestamp=datetime.now().isoformat().replace(':', '-'))
        self._shared_dir_base = shared_dir_base
        self._shared_dir_mount = shared_dir_mount
        self._shared_dir = join(shared_dir_base, self._container_name)

        self._create_host_config_kwargs = create_host_config_kwargs or {}
        self._create_container_kwargs = create_container_kwargs or {}

        # Autopull docker image if necessary
        self._autopull()

        # Create shared directory
        ensure_dir(self._shared_dir)

        # Add binded directories
        container_binds = [
            '{}:{}'.format(self._shared_dir, self._shared_dir_mount)
        ]
        if binds is not None:
            container_binds.extend(binds.split(';'))

        # Create host config
        create_host_config_call = {
            'privileged': privileged,
            'network_mode': network_mode,
            'binds': container_binds,
            'init': True
        }
        create_host_config_call.update(self._create_host_config_kwargs)

        self._host_config = self._client.create_host_config(
            **create_host_config_call)

        # Create container
        create_container_call = {
            'image': self._image,
            'command': self._command,
            'name': self._container_name,
            'detach': True,
            'tty': tty,
            'hostname': self._hostname,
            'host_config': self._host_config,
            'environment': self._environment,
        }
        create_container_call.update(self._create_container_kwargs)

        self._container_id = self._client.create_container(
            **create_container_call)['Id']
Example #38
0
def run_top_container(client: APIClient):
    c = client.create_container(
        constant.ALPINE, command="top", detach=True, tty=True, name="top"
    )
    client.start(c.get("Id"))
    return c.get("Id")
Example #39
0
def remove_all_images(client: APIClient):
    for image in client.images(quiet=True):
        client.remove_image(image, force=True)
class DockerHelper:
    def __init__(self, config):
        super().__init__()
        self.__padlock = threading.Lock()
        self.__check_in_progress = False
        self.__config = config
        self.__client = APIClient(base_url=config.docker_socket, timeout=config.docker_req_timeout_sec)
        self.__params_cache = {}
        self.last_check_containers_run_end_timestamp = datetime.datetime.min
        self.last_check_containers_run_start_timestamp = datetime.datetime.min
        self.last_check_containers_run_time = datetime.timedelta.min
        self.last_periodic_run_ok = False

    def check_container(self, container_id, remove_from_cache=False):
        try:
            if remove_from_cache:
                self.remove_from_cache(container_id)

            if not self.__config.disable_params:
                params = self.get_params(container_id)
            else:
                params = {}
            if not self.__config.disable_metrics:
                logger.debug("[{0}] Starting to fetch metrics for {1}".format(threading.current_thread().name,
                                                                              container_id))
                metrics = self.__client.stats(container=container_id, decode=True, stream=False)
            else:
                metrics = {}
            logger.debug("[{0}] Fetched data for container {1}".format(threading.current_thread().name, container_id))
        except NotFound as e:
            logger.warning("Container {0} not found - {1}.".format(container_id, e))
            return None
        except (ReadTimeout, ProtocolError, JSONDecodeError) as e:
            logger.error("Communication error when fetching info about container {0}: {1}".format(container_id, e))
            return None
        except Exception as e:
            logger.error("Unexpected error when fetching info about container {0}: {1}".format(container_id, e))
            return None
        return Container(container_id, params, metrics, 0)

    def check_containers(self):
        with self.__padlock:
            if self.__check_in_progress:
                logger.warning("[{0}] Previous check did not yet complete, consider increasing CHECK_INTERVAL_S"
                               .format(threading.current_thread().name))
                return
            self.__check_in_progress = True
        logger.debug("Periodic check start: connecting to get the list of containers")
        self.last_check_containers_run_start_timestamp = datetime.datetime.utcnow()
        try:
            containers = self.__client.containers(quiet=True)
            logger.debug("[{0}] Fetched containers list from docker daemon".format(threading.current_thread().name))
        except (ReadTimeout, ProtocolError, JSONDecodeError) as e:
            logger.error("Timeout while trying to get list of containers from docker: {0}".format(e))
            with self.__padlock:
                self.__check_in_progress = False
            self.last_periodic_run_ok = False
            return
        except Exception as e:
            logger.error("Unexpected error while trying to get list of containers from docker: {0}".format(e))
            with self.__padlock:
                self.__check_in_progress = False
            self.last_periodic_run_ok = False
            return
        ids = [container['Id'] for container in containers]
        for container_id in ids:
            container = self.check_container(container_id)
            if container is None:
                continue
            yield container
        logger.debug("Containers checked")
        if self.__config.cache_params:
            logger.debug("Purging cache")
            self.purge_cache(ids)
        self.last_periodic_run_ok = True
        self.last_check_containers_run_end_timestamp = datetime.datetime.utcnow()
        self.last_check_containers_run_time = self.last_check_containers_run_end_timestamp \
            - self.last_check_containers_run_start_timestamp
        logger.debug("Periodic check done")
        with self.__padlock:
            self.__check_in_progress = False

    def get_params(self, container_id):
        if self.__config.cache_params and container_id in self.__params_cache:
            logger.debug("Returning cached params for container {0}".format(container_id))
            return self.__params_cache[container_id]

        logger.debug("[{0}] Starting to fetch params for {1}".format(threading.current_thread().name, container_id))
        try:
            params = self.__client.inspect_container(container_id)
        except NotFound as e:
            logger.warning("Container {0} not found - {1}.".format(container_id, e))
            return None
        except (ReadTimeout, ProtocolError, JSONDecodeError) as e:
            logger.error("Communication error when fetching params for container {0}: {1}".format(container_id, e))
            return {}
        except Exception as e:
            logger.error("Unexpected error when fetching params for container {0}: {1}".format(container_id, e))
            return {}
        logger.debug("[{0}] Params fetched for {1}".format(threading.current_thread().name, container_id))
        if not self.__config.cache_params:
            return params

        logger.debug("[{0}] Storing params of {1} in cache".format(threading.current_thread().name, container_id))
        self.__params_cache[container_id] = params
        return params

    def purge_cache(self, running_container_ids):
        diff = [c for c in self.__params_cache.keys() if c not in running_container_ids]
        for cid in diff:
            self.__params_cache.pop(cid, None)

    def remove_from_cache(self, container_id):
        self.__params_cache.pop(container_id, None)

    def get_events_observable(self):
        successful = False
        ev = None
        while not successful:
            try:
                ev = self.__client.events(decode=True)
            except (ReadTimeout, ProtocolError, JSONDecodeError) as e:
                logger.error("Communication error when subscribing for container events, retrying in 5s: {0}".format(e))
                time.sleep(5)
            except Exception as e:
                logger.error("Unexpected error when subscribing for container events, retrying in 5s: {0}".format(e))
                time.sleep(5)
            successful = True
        return ev

    def kill_container(self, container):
        try:
            self.__client.stop(container.params['Id'])
        except (ReadTimeout, ProtocolError) as e:
            logger.error("Communication error when stopping container {0}: {1}".format(container.cid, e))
        except Exception as e:
            logger.error("Unexpected error when stopping container {0}: {1}".format(container.cid, e))
Example #41
0
 def get_conn(self) -> APIClient:
     client = APIClient(base_url=self.__base_url,
                        version=self.__version,
                        tls=self.__tls)
     self.__login(client)
     return client
Example #42
0
class DockerBuilder(BuilderInterface):
    """A builder using the local Docker client"""
    def __init__(self,
                 repository,
                 image_name=DEFAULT_IMAGE_NAME,
                 image_tag=None,
                 base_image=None,
                 dockerfile_path=None):

        self.repository = repository
        self.image_name = image_name
        self.base_image = base_image
        self.dockerfile_path = dockerfile_path

        if image_tag is None:
            self.image_tag = utils.get_unique_tag()
        else:
            self.image_tag = image_tag
        self.full_image_name = utils.get_image_full_name(
            self.repository, self.image_name, self.image_tag)
        self.docker_client = None

    def generate_pod_spec(self):
        """return a V1PodSpec initialized with the proper container"""

        return client.V1PodSpec(containers=[
            client.V1Container(
                name='model',
                image=self.full_image_name,
            )
        ],
                                restart_policy='Never')

    def execute(self):
        write_dockerfile(dockerfile_path=self.dockerfile_path,
                         base_image=self.base_image)
        self.docker_client = APIClient(version='auto')
        self.build()
        self.publish()

    def build(self):
        logger.warn('Building docker image {}...'.format(self.full_image_name))
        bld = self.docker_client.build(path='.',
                                       tag=self.full_image_name,
                                       encoding='utf-8')

        for line in bld:
            self._process_stream(line)

    def publish(self):
        logger.warn('Publishing image {}...'.format(self.full_image_name))
        for line in self.docker_client.push(self.full_image_name, stream=True):
            self._process_stream(line)

    def _process_stream(self, line):
        raw = line.decode('utf-8').strip()
        lns = raw.split('\n')
        for ln in lns:
            try:
                ljson = json.loads(ln)

                if ljson.get('error'):
                    msg = str(ljson.get('error', ljson))
                    logger.error('Build failed: ' + msg)
                    raise Exception('Image build failed: ' + msg)
                else:
                    if ljson.get('stream'):
                        msg = 'Build output: {}'.format(
                            ljson['stream'].strip())
                    elif ljson.get('status'):
                        msg = 'Push output: {} {}'.format(
                            ljson['status'], ljson.get('progress'))
                    elif ljson.get('aux'):
                        msg = 'Push finished: {}'.format(ljson.get('aux'))
                    else:
                        msg = str(ljson)
                    logger.info(msg)

            except json.JSONDecodeError:
                logger.warning('JSON decode error: {}'.format(ln))
Example #43
0
class DockerBuilder(object):
    LATEST_IMAGE_TAG = 'latest'
    WORKDIR = '/code'

    def __init__(self,
                 build_job,
                 repo_path,
                 from_image,
                 copy_code=True,
                 build_steps=None,
                 env_vars=None,
                 dockerfile_name='Dockerfile'):
        self.build_job = build_job
        self.job_uuid = build_job.uuid.hex
        self.job_name = build_job.unique_name
        self.from_image = from_image
        self.image_name = get_image_name(self.build_job)
        self.image_tag = self.job_uuid
        self.folder_name = repo_path.split('/')[-1]
        self.repo_path = repo_path
        self.copy_code = copy_code

        self.build_path = '/'.join(self.repo_path.split('/')[:-1])
        self.build_steps = get_list(build_steps)
        self.env_vars = get_list(env_vars)
        self.dockerfile_path = os.path.join(self.build_path, dockerfile_name)
        self.polyaxon_requirements_path = self._get_requirements_path()
        self.polyaxon_setup_path = self._get_setup_path()
        self.docker = APIClient(version='auto')
        self.registry_host = None
        self.docker_url = None

    def get_tagged_image(self):
        return get_tagged_image(self.build_job)

    def check_image(self):
        return self.docker.images(self.get_tagged_image())

    def clean(self):
        # Clean dockerfile
        delete_path(self.dockerfile_path)

    def login_internal_registry(self):
        try:
            self.docker.login(username=settings.REGISTRY_USER,
                              password=settings.REGISTRY_PASSWORD,
                              registry=settings.REGISTRY_HOST,
                              reauth=True)
        except DockerException as e:
            _logger.exception('Failed to connect to registry %s\n', e)

    def login_private_registries(self):
        if not settings.PRIVATE_REGISTRIES:
            return

        for registry in settings.PRIVATE_REGISTRIES:
            self.docker.login(username=registry.user,
                              password=registry.password,
                              registry=registry.host,
                              reauth=True)

    @staticmethod
    def _prepare_log_lines(log_line):
        raw = log_line.decode('utf-8').strip()
        raw_lines = raw.split('\n')
        log_lines = []
        for raw_line in raw_lines:
            try:
                json_line = json.loads(raw_line)

                if json_line.get('error'):
                    raise DockerBuilderError(str(json_line.get('error', json_line)))
                else:
                    if json_line.get('stream'):
                        log_lines.append('Build: {}'.format(json_line['stream'].strip()))
                    elif json_line.get('status'):
                        log_lines.append('Push: {} {}'.format(
                            json_line['status'],
                            json_line.get('progress')
                        ))
                    elif json_line.get('aux'):
                        log_lines.append('Push finished: {}'.format(json_line.get('aux')))
                    else:
                        log_lines.append(str(json_line))
            except json.JSONDecodeError:
                log_lines.append('JSON decode error: {}'.format(raw_line))
        return log_lines

    def _handle_logs(self, log_lines):
        publisher.publish_build_job_log(
            log_lines=log_lines,
            job_uuid=self.job_uuid,
            job_name=self.job_name
        )

    def _handle_log_stream(self, stream):
        log_lines = []
        last_emit_time = time.time()
        try:
            for log_line in stream:
                log_lines += self._prepare_log_lines(log_line)
                publish_cond = (
                    len(log_lines) == publisher.MESSAGES_COUNT or
                    (log_lines and time.time() - last_emit_time > publisher.MESSAGES_TIMEOUT)
                )
                if publish_cond:
                    self._handle_logs(log_lines)
                    log_lines = []
                    last_emit_time = time.time()
            if log_lines:
                self._handle_logs(log_lines)
        except (BuildError, APIError, DockerBuilderError) as e:
            self._handle_logs('Build Error {}'.format(e))
            return False

        return True

    def _get_requirements_path(self):
        def get_requirements(requirements_file):
            requirements_path = os.path.join(self.repo_path, requirements_file)
            if os.path.isfile(requirements_path):
                return os.path.join(self.folder_name, requirements_file)

        requirements = get_requirements('polyaxon_requirements.txt')
        if requirements:
            return requirements

        requirements = get_requirements('requirements.txt')
        if requirements:
            return requirements
        return None

    def _get_setup_path(self):
        def get_setup(setup_file):
            setup_file_path = os.path.join(self.repo_path, setup_file)
            has_setup = os.path.isfile(setup_file_path)
            if has_setup:
                st = os.stat(setup_file_path)
                os.chmod(setup_file_path, st.st_mode | stat.S_IEXEC)
                return os.path.join(self.folder_name, setup_file)

        setup_file = get_setup('polyaxon_setup.sh')
        if setup_file:
            return setup_file

        setup_file = get_setup('setup.sh')
        if setup_file:
            return setup_file
        return None

    def render(self):
        docker_template = jinja2.Template(POLYAXON_DOCKER_TEMPLATE)
        return docker_template.render(
            from_image=self.from_image,
            polyaxon_requirements_path=self.polyaxon_requirements_path,
            polyaxon_setup_path=self.polyaxon_setup_path,
            build_steps=self.build_steps,
            env_vars=self.env_vars,
            folder_name=self.folder_name,
            workdir=self.WORKDIR,
            nvidia_bin=settings.MOUNT_PATHS_NVIDIA.get('bin'),
            copy_code=self.copy_code
        )

    def build(self, nocache=False, memory_limit=None):
        _logger.debug('Starting build in `%s`', self.repo_path)
        # Checkout to the correct commit
        if self.image_tag != self.LATEST_IMAGE_TAG:
            git.checkout_commit(repo_path=self.repo_path, commit=self.image_tag)

        limits = {
            # Always disable memory swap for building, since mostly
            # nothing good can come of that.
            'memswap': -1
        }
        if memory_limit:
            limits['memory'] = memory_limit

        # Create DockerFile
        with open(self.dockerfile_path, 'w') as dockerfile:
            rendered_dockerfile = self.render()
            celery_app.send_task(
                SchedulerCeleryTasks.BUILD_JOBS_SET_DOCKERFILE,
                kwargs={'build_job_uuid': self.job_uuid, 'dockerfile': rendered_dockerfile})
            dockerfile.write(rendered_dockerfile)

        stream = self.docker.build(
            path=self.build_path,
            tag=self.get_tagged_image(),
            forcerm=True,
            rm=True,
            pull=True,
            nocache=nocache,
            container_limits=limits)
        return self._handle_log_stream(stream=stream)

    def push(self):
        stream = self.docker.push(self.image_name, tag=self.image_tag, stream=True)
        return self._handle_log_stream(stream=stream)
Example #44
0
 def execute(self):
     write_dockerfile(dockerfile_path=self.dockerfile_path,
                      base_image=self.base_image)
     self.docker_client = APIClient(version='auto')
     self.build()
     self.publish()
class DockerOperator(BaseOperator):
    """
    Execute a command inside a docker container.

    A temporary directory is created on the host and
    mounted into a container to allow storing files
    that together exceed the default disk size of 10GB in a container.
    The path to the mounted directory can be accessed
    via the environment variable ``AIRFLOW_TMP_DIR``.

    If a login to a private registry is required prior to pulling the image, a
    Docker connection needs to be configured in Airflow and the connection ID
    be provided with the parameter ``docker_conn_id``.

    :param image: Docker image from which to create the container.
        If image tag is omitted, "latest" will be used.
    :type image: str
    :param api_version: Remote API version. Set to ``auto`` to automatically
        detect the server's version.
    :type api_version: str
    :param auto_remove: Auto-removal of the container on daemon side when the
        container's process exits.
        The default is False.
    :type auto_remove: bool
    :param command: Command to be run in the container. (templated)
    :type command: str or list
    :param cpus: Number of CPUs to assign to the container.
        This value gets multiplied with 1024. See
        https://docs.docker.com/engine/reference/run/#cpu-share-constraint
    :type cpus: float
    :param dns: Docker custom DNS servers
    :type dns: list[str]
    :param dns_search: Docker custom DNS search domain
    :type dns_search: list[str]
    :param docker_url: URL of the host running the docker daemon.
        Default is unix://var/run/docker.sock
    :type docker_url: str
    :param environment: Environment variables to set in the container. (templated)
    :type environment: dict
    :param force_pull: Pull the docker image on every run. Default is False.
    :type force_pull: bool
    :param mem_limit: Maximum amount of memory the container can use.
        Either a float value, which represents the limit in bytes,
        or a string like ``128m`` or ``1g``.
    :type mem_limit: float or str
    :param network_mode: Network mode for the container.
    :type network_mode: str
    :param tls_ca_cert: Path to a PEM-encoded certificate authority
        to secure the docker connection.
    :type tls_ca_cert: str
    :param tls_client_cert: Path to the PEM-encoded certificate
        used to authenticate docker client.
    :type tls_client_cert: str
    :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client.
    :type tls_client_key: str
    :param tls_hostname: Hostname to match against
        the docker server certificate or False to disable the check.
    :type tls_hostname: str or bool
    :param tls_ssl_version: Version of SSL to use when communicating with docker daemon.
    :type tls_ssl_version: str
    :param tmp_dir: Mount point inside the container to
        a temporary directory created on the host by the operator.
        The path is also made available via the environment variable
        ``AIRFLOW_TMP_DIR`` inside the container.
    :type tmp_dir: str
    :param user: Default user inside the docker container.
    :type user: int or str
    :param volumes: List of volumes to mount into the container, e.g.
        ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``.
    :type volumes: list
    :param working_dir: Working directory to
        set on the container (equivalent to the -w switch the docker client)
    :type working_dir: str
    :param xcom_all: Push all the stdout or just the last line.
        The default is False (last line).
    :type xcom_all: bool
    :param docker_conn_id: ID of the Airflow connection to use
    :type docker_conn_id: str
    :param shm_size: Size of ``/dev/shm`` in bytes. The size must be
        greater than 0. If omitted uses system default.
    :type shm_size: int
    """
    template_fields = ('command', 'environment',)
    template_ext = ('.sh', '.bash',)

    @apply_defaults
    def __init__(
            self,
            image,
            api_version=None,
            command=None,
            cpus=1.0,
            docker_url='unix://var/run/docker.sock',
            environment=None,
            force_pull=False,
            mem_limit=None,
            network_mode=None,
            tls_ca_cert=None,
            tls_client_cert=None,
            tls_client_key=None,
            tls_hostname=None,
            tls_ssl_version=None,
            tmp_dir='/tmp/airflow',
            user=None,
            volumes=None,
            working_dir=None,
            xcom_all=False,
            docker_conn_id=None,
            dns=None,
            dns_search=None,
            auto_remove=False,
            shm_size=None,
            *args,
            **kwargs):

        super().__init__(*args, **kwargs)
        self.api_version = api_version
        self.auto_remove = auto_remove
        self.command = command
        self.cpus = cpus
        self.dns = dns
        self.dns_search = dns_search
        self.docker_url = docker_url
        self.environment = environment or {}
        self.force_pull = force_pull
        self.image = image
        self.mem_limit = mem_limit
        self.network_mode = network_mode
        self.tls_ca_cert = tls_ca_cert
        self.tls_client_cert = tls_client_cert
        self.tls_client_key = tls_client_key
        self.tls_hostname = tls_hostname
        self.tls_ssl_version = tls_ssl_version
        self.tmp_dir = tmp_dir
        self.user = user
        self.volumes = volumes or []
        self.working_dir = working_dir
        self.xcom_all = xcom_all
        self.docker_conn_id = docker_conn_id
        self.shm_size = shm_size
        if kwargs.get('xcom_push') is not None:
            raise AirflowException("'xcom_push' was deprecated, use 'BaseOperator.do_xcom_push' instead")

        self.cli = None
        self.container = None

    def get_hook(self):
        return DockerHook(
            docker_conn_id=self.docker_conn_id,
            base_url=self.docker_url,
            version=self.api_version,
            tls=self.__get_tls_config()
        )

    def execute(self, context):
        self.log.info('Starting docker container from image %s', self.image)

        tls_config = self.__get_tls_config()

        if self.docker_conn_id:
            self.cli = self.get_hook().get_conn()
        else:
            self.cli = APIClient(
                base_url=self.docker_url,
                version=self.api_version,
                tls=tls_config
            )

        if self.force_pull or len(self.cli.images(name=self.image)) == 0:
            self.log.info('Pulling docker image %s', self.image)
            for l in self.cli.pull(self.image, stream=True):
                output = json.loads(l.decode('utf-8').strip())
                if 'status' in output:
                    self.log.info("%s", output['status'])

        with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir:
            self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir
            self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                command=self.get_command(),
                environment=self.environment,
                host_config=self.cli.create_host_config(
                    auto_remove=self.auto_remove,
                    binds=self.volumes,
                    network_mode=self.network_mode,
                    shm_size=self.shm_size,
                    dns=self.dns,
                    dns_search=self.dns_search,
                    cpu_shares=int(round(self.cpus * 1024)),
                    mem_limit=self.mem_limit),
                image=self.image,
                user=self.user,
                working_dir=self.working_dir
            )
            self.cli.start(self.container['Id'])

            line = ''
            for line in self.cli.logs(container=self.container['Id'], stream=True):
                line = line.strip()
                if hasattr(line, 'decode'):
                    line = line.decode('utf-8')
                self.log.info(line)

            result = self.cli.wait(self.container['Id'])
            if result['StatusCode'] != 0:
                raise AirflowException('docker container failed: ' + repr(result))

            # duplicated conditional logic because of expensive operation
            if self.do_xcom_push:
                return self.cli.logs(container=self.container['Id']) \
                    if self.xcom_all else line.encode('utf-8')

    def get_command(self):
        if self.command is not None and self.command.strip().find('[') == 0:
            commands = ast.literal_eval(self.command)
        else:
            commands = self.command
        return commands

    def on_kill(self):
        if self.cli is not None:
            self.log.info('Stopping docker container')
            self.cli.stop(self.container['Id'])

    def __get_tls_config(self):
        tls_config = None
        if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key:
            tls_config = tls.TLSConfig(
                ca_cert=self.tls_ca_cert,
                client_cert=(self.tls_client_cert, self.tls_client_key),
                verify=True,
                ssl_version=self.tls_ssl_version,
                assert_hostname=self.tls_hostname
            )
            self.docker_url = self.docker_url.replace('tcp://', 'https://')
        return tls_config
Example #46
0
class DockerCli:

    def __init__(self):
        self.client = APIClient('unix://var/run/docker.sock')
        self.filtered_statuses = ('running', 'restarting', 'paused', 'exited')
        self.config = Config()

    def _get_containers(self, filters=None):
        filters = filters if filters else dict()

        for status in self.filtered_statuses:
            filters.update({'status': status})

            for container in self.client.containers(
                    all=True,
                    filters=filters
            ):
                img_name, _, img_version = container['Image'].partition(':')
                service = self.config.get_service_by_name(img_name)

                if service:
                    instance = dict()
                    instance['created'] = container['Created']
                    instance['id'] = container['Id']
                    instance['image'] = img_name

                    for con_port in container['Ports']:
                        if service['port'] is con_port['PrivatePort']:
                            instance['port'] = con_port.get('PublicPort')
                        else:
                            instance['port'] = None

                    instance['state'] = container['State']
                    instance['status'] = container['Status']
                    instance['version'] = img_version

                    yield instance

        return

    def get_all_containers(self):
        containers = []
        for container in self._get_containers():
            if container:
                containers.append(container)

        return containers

    def get_container(self, by_id):
        for container in self._get_containers({'id': by_id}):
            return container

        raise NotFoundContainerException(
            'Container was not found: {}'.format(by_id)
        )

    def create_container(self, image):
        service = self.config.get_service_by_name(image)
        if service:
            container = self.client.create_container(
                image='{0}:{1}'.format(image, service['version']),
                ports=[service['port']],
                detach=True,
                host_config=self.client.create_host_config(
                    port_bindings={service['port']: None}
                )
            )
            self.client.start(container=container['Id'])
            return self.get_container(container['Id'])

        raise NotFoundImageException('Image was not found: {}'.format(image))

    def remove_container(self, by_id):
        try:
            self.client.remove_container(
                container=by_id,
                force=True,
                v=True
            )
        except errors.NotFound as e:
            raise NotFoundContainerException(e)

        return {'status': 'OK'}
Example #47
0
    def execute(self, context):
        self.log.info('Starting docker container from image %s', self.image)

        tls_config = self.__get_tls_config()

        if self.docker_conn_id:
            self.cli = self.get_hook().get_conn()
        else:
            self.cli = APIClient(
                base_url=self.docker_url,
                version=self.api_version,
                tls=tls_config
            )

        if self.force_pull or len(self.cli.images(name=self.image)) == 0:
            self.log.info('Pulling docker image %s', self.image)
            for l in self.cli.pull(self.image, stream=True):
                output = json.loads(l.decode('utf-8').strip())
                if 'status' in output:
                    self.log.info("%s", output['status'])

        with TemporaryDirectory(prefix='airflowtmp', dir=self.host_tmp_dir) as host_tmp_dir:
            self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir
            self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                command=self.get_command(),
                name=self.container_name,
                environment=self.environment,
                host_config=self.cli.create_host_config(
                    auto_remove=self.auto_remove,
                    binds=self.volumes,
                    network_mode=self.network_mode,
                    shm_size=self.shm_size,
                    dns=self.dns,
                    dns_search=self.dns_search,
                    cpu_shares=int(round(self.cpus * 1024)),
                    mem_limit=self.mem_limit),
                image=self.image,
                user=self.user,
                working_dir=self.working_dir
            )
            self.cli.start(self.container['Id'])

            line = ''
            for line in self.cli.attach(container=self.container['Id'],
                                        stdout=True,
                                        stderr=True,
                                        stream=True):
                line = line.strip()
                if hasattr(line, 'decode'):
                    line = line.decode('utf-8')
                self.log.info(line)

            result = self.cli.wait(self.container['Id'])
            if result['StatusCode'] != 0:
                raise AirflowException('docker container failed: ' + repr(result))

            # duplicated conditional logic because of expensive operation
            if self.do_xcom_push:
                return self.cli.logs(container=self.container['Id']) \
                    if self.xcom_all else line.encode('utf-8')
Example #48
0
 def cleanup():
     client = Client(version='auto', **kwargs_from_env())
     test_images = client.images(TEST_ORG + "/*")
     for image in test_images:
         client.remove_image(image)
Example #49
0
def docker():
    if os.environ.get('DOCKER_MACHINE_IP') is not None:
        docker = from_env(assert_hostname=False)
    else:
        docker = APIClient(version='auto')
    return docker
Example #50
0
class HubIO:
    """:class:`HubIO` provides the way to interact with Jina Hub registry.

    You can use it with CLI to package a directory into a Jina Hub image and publish it to the world.

    Examples:
        - :command:`jina hub build my_pod/` build the image
        - :command:`jina hub build my_pod/ --push` build the image and push to the public registry
        - :command:`jina hub pull jinahub/pod.dummy_mwu_encoder:0.0.6` to download the image
    """
    def __init__(self, args: 'argparse.Namespace'):
        """Create a new HubIO.

        :param args: arguments
        """
        self.logger = JinaLogger(self.__class__.__name__, **vars(args))
        self.args = args
        self._load_docker_client()

    def _load_docker_client(self):
        with ImportExtensions(
                required=False,
                help_text=
                'missing "docker" dependency, available CLIs limited to "jina hub [list, new]"'
                'to enable full CLI, please do pip install "jina[docker]"',
        ):
            import docker
            from docker import APIClient, DockerClient

            self._client: DockerClient = docker.from_env()

            # low-level client
            self._raw_client = APIClient(base_url='unix://var/run/docker.sock')

    def new(self, no_input: bool = False) -> None:
        """
        Create a new executor using cookiecutter template.

        :param no_input: Argument to avoid prompting dialogue (just to be used for testing)
        """
        with ImportExtensions(required=True):
            from cookiecutter.main import cookiecutter
            import click  # part of cookiecutter

        cookiecutter_template = self.args.template
        if self.args.type == 'app':
            cookiecutter_template = 'https://github.com/jina-ai/cookiecutter-jina.git'
        elif self.args.type == 'pod':
            cookiecutter_template = (
                'https://github.com/jina-ai/cookiecutter-jina-hub.git')

        try:
            cookiecutter(
                template=cookiecutter_template,
                overwrite_if_exists=self.args.overwrite,
                output_dir=self.args.output_dir,
                no_input=no_input,
            )
        except click.exceptions.Abort:
            self.logger.info('nothing is created, bye!')

    def login(self) -> None:
        """Login using Github Device flow to allow push access to Jina Hub Registry."""
        import requests

        with resource_stream('jina', '/'.join(
            ('resources', 'hubapi.yml'))) as fp:
            hubapi_yml = JAML.load(fp)

        client_id = hubapi_yml['github']['client_id']
        scope = hubapi_yml['github']['scope']
        device_code_url = hubapi_yml['github']['device_code_url']
        access_token_url = hubapi_yml['github']['access_token_url']
        grant_type = hubapi_yml['github']['grant_type']
        login_max_retry = hubapi_yml['github']['login_max_retry']

        headers = {'Accept': 'application/json'}
        code_request_body = {'client_id': client_id, 'scope': scope}
        try:
            self.logger.info(
                'Jina Hub login will use Github Device to generate one time token'
            )
            response = requests.post(url=device_code_url,
                                     headers=headers,
                                     data=code_request_body)
            if response.status_code != requests.codes.ok:
                self.logger.error(
                    'cannot reach github server. please make sure you\'re connected to internet'
                )

            code_response = response.json()
            device_code = code_response['device_code']
            user_code = code_response['user_code']
            verification_uri = code_response['verification_uri']

            try:
                self.logger.info(
                    f'You should see a "Device Activation" page open in your browser. '
                    f'If not, please go to {colored(verification_uri, "cyan", attrs=["underline"])}'
                )
                self.logger.info(
                    'Please follow the steps:\n\n'
                    f'1. Enter the following code to that page: {colored(user_code, "cyan", attrs=["bold"])}\n'
                    '2. Click "Continue"\n'
                    '3. Come back to this terminal\n')
                # allowing sometime for the user to view the message
                time.sleep(0.5)
                webbrowser.open(verification_uri, new=2)
            except:
                pass  # intentional pass, browser support isn't cross-platform

            access_request_body = {
                'client_id': client_id,
                'device_code': device_code,
                'grant_type': grant_type,
            }

            for _ in range(login_max_retry):
                access_token_response = requests.post(
                    url=access_token_url,
                    headers=headers,
                    data=access_request_body).json()
                if access_token_response.get('error',
                                             None) == 'authorization_pending':
                    self.logger.warning('still waiting for authorization')
                    countdown(
                        10,
                        reason=colored('re-fetch access token',
                                       'cyan',
                                       attrs=['bold', 'reverse']),
                    )
                elif 'access_token' in access_token_response:
                    token = {
                        'access_token': access_token_response['access_token']
                    }
                    with open(credentials_file(), 'w') as cf:
                        JAML.dump(token, cf)
                    self.logger.success(f'successfully logged in!')
                    break
            else:
                self.logger.error(f'max retries {login_max_retry} reached')

        except KeyError as exp:
            self.logger.error(f'can not read the key in response: {exp}')

    def list(self) -> Optional[List[Dict[str, Any]]]:
        """List all hub images given a filter specified by CLI.

        :return: list of dictionaries of images
        """
        if self.args.local_only:
            return _list_local(self.logger)
        else:
            return _list(
                logger=self.logger,
                image_name=self.args.name,
                image_kind=self.args.kind,
                image_type=self.args.type,
                image_keywords=self.args.keywords,
            )

    def push(
        self,
        name: Optional[str] = None,
        build_result: Optional[Dict] = None,
    ) -> None:
        """Push image to Jina Hub.

        :param name: name of image
        :param build_result: dictionary containing the build summary
        :return: None
        """
        name = name or self.args.name
        try:
            # check if image exists
            # fail if it does
            if (self.args.no_overwrite and build_result
                    and self._image_version_exists(
                        build_result['manifest_info']['name'],
                        build_result['manifest_info']['version'],
                        jina_version,
                    )):
                raise ImageAlreadyExists(
                    f'Image with name {name} already exists. Will NOT overwrite.'
                )
            else:
                self.logger.debug(
                    f'Image with name {name} does not exist. Pushing now...')

            self._push_docker_hub(name)

            if not build_result:
                file_path = get_summary_path(name)
                if os.path.isfile(file_path):
                    with open(file_path) as f:
                        build_result = json.load(f)
                else:
                    self.logger.error(
                        f'can not find the build summary file.'
                        f'please use "jina hub build" to build the image first '
                        f'before pushing.')
                    return

            if build_result:
                if build_result.get('is_build_success', False):
                    _register_to_mongodb(logger=self.logger,
                                         summary=build_result)

                if build_result.get('details', None) and build_result.get(
                        'build_history', None):
                    self._write_slack_message(
                        build_result,
                        build_result['details'],
                        build_result['build_history'],
                    )

        except Exception as e:
            self.logger.error(f'Error when trying to push image {name}: {e!r}')
            if isinstance(e, ImageAlreadyExists):
                raise e

    def _push_docker_hub(self, name: Optional[str] = None) -> None:
        """Push to Docker Hub.

        :param name: name of image
        """
        check_registry(self.args.registry, name, self.args.repository)
        self._check_docker_image(name)
        self._docker_login()
        with ProgressBar(task_name=f'pushing {name}', batch_unit='') as t:
            for line in self._client.images.push(name,
                                                 stream=True,
                                                 decode=True):
                t.update(1)
                self.logger.debug(line)
        self.logger.success(f'🎉 {name} is now published!')

        share_link = f'https://api.jina.ai/hub/?jh={urllib.parse.quote_plus(name)}'

        try:
            webbrowser.open(share_link, new=2)
        except:
            # pass intentionally, dont want to bother users on opening browser failure
            pass
        finally:
            self.logger.info(
                f'Check out the usage {colored(share_link, "cyan", attrs=["underline"])} and share it with others!'
            )

    def pull(self) -> None:
        """Pull docker image."""
        check_registry(self.args.registry, self.args.name,
                       self.args.repository)
        try:
            self._docker_login()
            with TimeContext(f'pulling {self.args.name}', self.logger):
                image = self._client.images.pull(self.args.name)
            if isinstance(image, list):
                image = image[0]
            image_tag = image.tags[0] if image.tags else ''
            self.logger.success(
                f'🎉 pulled {image_tag} ({image.short_id}) uncompressed size: {get_readable_size(image.attrs["Size"])}'
            )
        except Exception as ex:
            self.logger.error(
                f'can not pull image {self.args.name} from {self.args.registry} due to {ex!r}'
            )

    def _check_docker_image(self, name: str) -> None:
        # check local image
        image = self._client.images.get(name)
        for r in _allowed:
            if f'{_label_prefix}{r}' not in image.labels.keys():
                self.logger.warning(
                    f'{r} is missing in your docker image labels, you may want to check it'
                )
        try:
            image.labels['ai.jina.hub.jina_version'] = jina_version
            label_info = (
                f'{self.args.repository}/' +
                '{type}.{kind}.{name}:{version}-{jina_version}'.format(
                    **{
                        k.replace(_label_prefix, ''): v
                        for k, v in image.labels.items()
                    }))
            safe_name = safe_url_name(label_info)
            if name != safe_name:
                raise ValueError(
                    f'image {name} does not match with label info in the image. name should be {safe_name}'
                )
        except KeyError as e:
            self.logger.error(
                f'missing key in the label of the image {repr(e)}')
            raise

        self.logger.info(
            f'✅ {name} is a valid Jina Hub image, ready to publish')

    def _docker_login(self) -> None:
        """Log-in to Docker."""
        from docker.errors import APIError

        if not (self.args.username and self.args.password):
            self.args.username, self.args.password = _fetch_docker_auth(
                logger=self.logger)
        try:
            self._client.login(
                username=self.args.username,
                password=self.args.password,
                registry=self.args.registry,
            )
            self.logger.debug(f'successfully logged in to docker hub')
        except APIError:
            raise DockerLoginFailed(
                f'invalid credentials passed. docker login failed')

    def build(self) -> Dict:
        """
        Perform a build of the Docker image.

        :return: dictionary with information on image (manifest)
        """
        if self.args.dry_run:
            result = self.dry_run()
        else:
            is_build_success, is_push_success = True, False
            _logs = []
            _except_strs = []
            _excepts = []

            with TimeContext(f'building {colored(self.args.path, "green")}',
                             self.logger) as tc:
                try:
                    _check_result = self._check_completeness()
                    self._freeze_jina_version()

                    _dockerfile = os.path.basename(_check_result['Dockerfile'])
                    _labels = {
                        _label_prefix + k: str(v)
                        for k, v in self.manifest.items()
                    }
                    streamer = self._raw_client.build(
                        decode=True,
                        path=self.args.path,
                        tag=self.tag,
                        pull=self.args.pull,
                        dockerfile=_dockerfile,
                        labels=_labels,
                        rm=True,
                    )

                    for chunk in streamer:
                        if 'stream' in chunk:
                            for line in chunk['stream'].splitlines():
                                if is_error_message(line):
                                    self.logger.critical(line)
                                    _except_strs.append(line)
                                elif 'warning' in line.lower():
                                    self.logger.warning(line)
                                else:
                                    self.logger.info(line)
                                _logs.append(line)
                except Exception as ex:
                    # if pytest fails it should end up here as well
                    is_build_success = False
                    ex = HubBuilderBuildError(ex)
                    _except_strs.append(repr(ex))
                    _excepts.append(ex)

            if is_build_success:
                # compile it again, but this time don't show the log
                image, log = self._client.images.build(
                    path=self.args.path,
                    tag=self.tag,
                    pull=self.args.pull,
                    dockerfile=_dockerfile,
                    labels=_labels,
                    rm=True,
                )

                # success

                _details = {
                    'inspect': self._raw_client.inspect_image(image.tags[0]),
                    'tag': image.tags[0],
                    'hash': image.short_id,
                    'size': get_readable_size(image.attrs['Size']),
                }

                self.logger.success(
                    '🎉 built {tag} ({hash}) uncompressed size: {size}'.
                    format_map(_details))

            else:
                self.logger.error(
                    f'can not build the image, please double check the log')
                _details = {}

            if is_build_success:
                if self.args.test_uses:
                    p_names = []
                    try:
                        is_build_success = False
                        p_names, failed_test_levels = HubIO._test_build(
                            image,
                            self.args.test_level,
                            self.config_yaml_path,
                            self.args.timeout_ready,
                            self.args.daemon,
                            self.logger,
                        )
                        if any(test_level in failed_test_levels
                               for test_level in [
                                   BuildTestLevel.POD_DOCKER,
                                   BuildTestLevel.FLOW,
                               ]):
                            is_build_success = False
                            self.logger.error(
                                f'build unsuccessful, failed at {str(failed_test_levels)} level'
                            )
                        else:
                            is_build_success = True
                            self.logger.warning(
                                f'Build successful. Tests failed at : {str(failed_test_levels)} levels. '
                                f'This could be due to the fact that the executor has non-installed external dependencies'
                            )
                    except Exception as ex:
                        self.logger.error(
                            f'something wrong while testing the build: {ex!r}')
                        ex = HubBuilderTestError(ex)
                        _except_strs.append(repr(ex))
                        _excepts.append(ex)
                    finally:
                        if self.args.daemon:
                            try:
                                for p in p_names:
                                    self._raw_client.stop(p)
                            except:
                                pass  # suppress on purpose
                        self._raw_client.prune_containers()

                info, env_info = get_full_version()
                _host_info = {
                    'jina': info,
                    'jina_envs': env_info,
                    'docker': self._raw_client.info(),
                    'build_args': vars(self.args),
                }

            _build_history = {
                'time':
                get_now_timestamp(),
                'host_info':
                _host_info if is_build_success and self.args.host_info else '',
                'duration':
                tc.readable_duration,
                'logs':
                _logs,
                'exception':
                _except_strs,
            }

            if self.args.prune_images:
                self.logger.info('deleting unused images')
                self._raw_client.prune_images()

            # since db tracks `version` & `jina_version` on the top level, let's get rid of them in `manifest`
            if is_build_success:
                _version = self.manifest['version']
                self.manifest.pop('version', None)
                self.manifest.pop('jina_version', None)
            else:
                _version = '0.0.1'

            result = {
                'name': self.executor_name if is_build_success else '',
                'version': _version,
                'jina_version': jina_version,
                'path': self.args.path,
                'manifest_info': self.manifest if is_build_success else '',
                'details': _details,
                'is_build_success': is_build_success,
                'build_history': _build_history,
            }

            # only successful build (NOT dry run) writes the summary to disk
            if result['is_build_success']:
                self._write_summary_to_file(summary=result)
                if self.args.push:
                    self.push(image.tags[0], result)

        if not result['is_build_success'] and self.args.raise_error:
            # remove the very verbose build log when throw error
            if 'build_history' in result:
                result['build_history'].pop('logs', None)
            raise HubBuilderError(_excepts)

        return result

    @staticmethod
    def _test_build(
        image,  # type docker image object
        test_level: 'BuildTestLevel',
        config_yaml_path: str,
        timeout_ready: int,
        daemon_arg: bool,
        logger: 'JinaLogger',
    ):
        p_names = []
        failed_levels = []
        logger.info(f'run tests using test level {test_level}')
        # test uses at executor level
        if test_level >= BuildTestLevel.EXECUTOR:
            logger.info(
                f'test to initialize an executor from yaml configuration: {config_yaml_path}'
            )
            try:
                with BaseExecutor.load_config(config_yaml_path):
                    pass
                logger.info(f'successfully tested to initialize an executor')
            except:
                logger.error(f'failed to initialize an executor')
                failed_levels.append(BuildTestLevel.EXECUTOR)

        # test uses at Pod level (no docker)
        if test_level >= BuildTestLevel.POD_NONDOCKER:
            logger.info(
                f'test to initialize a pod from yaml configuration: {config_yaml_path}'
            )
            try:
                with Pod(set_pod_parser().parse_args([
                        '--uses',
                        config_yaml_path,
                        '--timeout-ready',
                        str(timeout_ready),
                ])):
                    pass
                logger.info(
                    f'successfully tested to initialize a pod from yaml configuration'
                )
            except:
                logger.error(f'failed to initialize a pod')
                failed_levels.append(BuildTestLevel.POD_NONDOCKER)

        # test uses at Pod level (with docker)
        if test_level >= BuildTestLevel.POD_DOCKER:
            p_name = random_name()
            logger.info(
                f'test to initialize a pod via docker image {image.tags[0]} named {p_name}'
            )
            try:
                with Pod(set_pod_parser().parse_args([
                        '--uses',
                        f'docker://{image.tags[0]}',
                        '--name',
                        p_name,
                        '--timeout-ready',
                        str(timeout_ready),
                ] + ['--daemon'] if daemon_arg else [])):
                    pass
                p_names.append(p_name)
                logger.info(
                    f'successfully tested to initialize a pod via docker')
            except:
                logger.error(f'failed to initialize a pod via docker image')
                failed_levels.append(BuildTestLevel.POD_DOCKER)

        # test uses at Flow level
        if test_level >= BuildTestLevel.FLOW:
            p_name = random_name()
            logger.info(
                f'test to build a flow from docker image {image.tags[0]} named {p_name} '
                f'with daemon={daemon_arg} and timeout_ready={timeout_ready}')
            try:
                with Flow().add(
                        name=p_name,
                        uses=f'docker://{image.tags[0]}',
                        daemon=daemon_arg,
                        timeout_ready=timeout_ready,
                ):
                    pass
                p_names.append(p_name)
                logger.info(
                    'successfully tested to build a flow from docker image')
            except:
                logger.error(f'failed to build a flow from docker image')
                failed_levels.append(BuildTestLevel.FLOW)
        return p_names, failed_levels

    def dry_run(self) -> Dict:
        """
        Perform a dry-run.

        :return: a dict with the manifest info.
        """
        try:
            s = self._check_completeness()
            s['is_build_success'] = True
        except Exception as ex:
            s = {'is_build_success': False, 'exception': str(ex)}
        return s

    def _write_summary_to_file(self, summary: Dict) -> None:
        file_path = get_summary_path(f'{summary["name"]}:{summary["version"]}')
        with open(file_path, 'w+') as f:
            json.dump(summary, f)
        self.logger.debug(f'stored the summary from build to {file_path}')

    def _freeze_jina_version(self) -> None:
        import pkg_resources

        requirements_path = get_exist_path(self.args.path, 'requirements.txt')
        if requirements_path and os.path.exists(requirements_path):
            new_requirements = []
            update = False
            with open(requirements_path, 'r') as fp:
                requirements = pkg_resources.parse_requirements(fp)
                for req in requirements:
                    if 'jina' in str(req):
                        update = True
                        self.logger.info(
                            f'Freezing jina version to {jina_version}')
                        new_requirements.append(f'jina=={jina_version}')
                    else:
                        new_requirements.append(str(req))

            if update:
                with open(requirements_path, 'w') as fp:
                    fp.write('\n'.join(new_requirements))

    def _check_completeness(self) -> Dict:
        dockerfile_path = get_exist_path(self.args.path, self.args.file)
        manifest_path = get_exist_path(self.args.path, 'manifest.yml')
        self.config_yaml_path = get_exist_path(self.args.path, 'config.yml')
        readme_path = get_exist_path(self.args.path, 'README.md')
        requirements_path = get_exist_path(self.args.path, 'requirements.txt')

        yaml_glob = set(glob.glob(os.path.join(self.args.path, '*.yml')))
        yaml_glob.difference_update({manifest_path, self.config_yaml_path})

        if not self.config_yaml_path:
            self.config_yaml_path = yaml_glob.pop()

        py_glob = glob.glob(os.path.join(self.args.path, '*.py'))

        test_glob = glob.glob(os.path.join(self.args.path, 'tests/test_*.py'))

        completeness = {
            'Dockerfile': dockerfile_path,
            'manifest.yml': manifest_path,
            'config.yml': self.config_yaml_path,
            'README.md': readme_path,
            'requirements.txt': requirements_path,
            '*.yml': yaml_glob,
            '*.py': py_glob,
            'tests': test_glob,
        }

        self.logger.info(f'completeness check\n' + '\n'.join(
            f'{colored("✓", "green") if v else colored("✗", "red"):>4} {k:<20} {v}'
            for k, v in completeness.items()) + '\n')

        if not (completeness['Dockerfile'] and completeness['manifest.yml']):
            self.logger.critical(
                'Dockerfile or manifest.yml is not given, can not build')
            raise FileNotFoundError(
                'Dockerfile or manifest.yml is not given, can not build')

        self.manifest = self._read_manifest(manifest_path)
        self.manifest['jina_version'] = jina_version
        self.executor_name = safe_url_name(
            f'{self.args.repository}/' +
            f'{self.manifest["type"]}.{self.manifest["kind"]}.{self.manifest["name"]}'
        )
        self.tag = self.executor_name + f':{self.manifest["version"]}-{jina_version}'
        return completeness

    def _read_manifest(self, path: str, validate: bool = True) -> Dict:
        with resource_stream(
                'jina', '/'.join(
                    ('resources', 'hub-builder', 'manifest.yml'))) as fp:
            tmp = JAML.load(
                fp
            )  # do not expand variables at here, i.e. DO NOT USE expand_dict(yaml.load(fp))

        with open(path) as fp:
            tmp.update(JAML.load(fp))

        if validate:
            self._validate_manifest(tmp)

        return tmp

    def _validate_manifest(self, manifest: Dict) -> None:
        required = {'name', 'type', 'version'}

        # check the required field in manifest
        for r in required:
            if r not in manifest:
                raise ValueError(
                    f'{r} is missing in the manifest.yaml, it is required')

        # check if all fields are there
        for r in _allowed:
            if r not in manifest:
                self.logger.warning(
                    f'{r} is missing in your manifest.yml, you may want to check it'
                )

        # check name
        check_name(manifest['name'])
        # check_image_type
        check_image_type(manifest['type'])
        # check version number
        check_version(manifest['version'])
        # check version number
        check_license(manifest['license'])
        # check platform
        if not isinstance(manifest['platform'], list):
            manifest['platform'] = list(manifest['platform'])
        check_platform(manifest['platform'])

        # replace all chars in value to safe chars
        for k, v in manifest.items():
            if v and isinstance(v, str):
                manifest[k] = remove_control_characters(v)

        # show manifest key-values
        for k, v in manifest.items():
            self.logger.debug(f'{k}: {v}')

    def _write_slack_message(self, *args):
        def _expand_fn(v):
            if isinstance(v, str):
                for d in args:
                    try:
                        v = v.format(**d)
                    except KeyError:
                        pass
            return v

        if 'JINAHUB_SLACK_WEBHOOK' in os.environ:
            with resource_stream(
                    'jina',
                    '/'.join(('resources', 'hub-builder-success',
                              'slack-jinahub.json')),
            ) as fp:
                tmp = expand_dict(json.load(fp),
                                  _expand_fn,
                                  resolve_cycle_ref=False)
                req = urllib.request.Request(
                    os.environ['JINAHUB_SLACK_WEBHOOK'])
                req.add_header('Content-Type',
                               'application/json; charset=utf-8')
                jdb = json.dumps(tmp).encode('utf-8')  # needs to be bytes
                req.add_header('Content-Length', str(len(jdb)))
                with urllib.request.urlopen(req, jdb) as f:
                    res = f.read()
                    self.logger.info(f'push to Slack: {res}')

    # alias of "new" in cli
    create = new
    init = new

    def _image_version_exists(self, name, module_version, req_jina_version):
        manifests = _list(self.logger, name)
        # check if matching module version and jina version exists
        if manifests:
            matching = [
                m for m in manifests
                if m['version'] == module_version and 'jina_version' in
                m.keys() and m['jina_version'] == req_jina_version
            ]
            return len(matching) > 0
        return False
Example #51
0
 def __init__(self):
     self.docker_client = APIClient(version='auto')
Example #52
0
def check_image(build_job):
    from docker import APIClient

    docker = APIClient(version='auto')
    return docker.images(get_tagged_image(build_job))
Example #53
0
def remove_all_containers(client: APIClient):
    for ctnr in client.containers(quiet=True):
        client.remove_container(ctnr, force=True)
Example #54
0
from containers import main
from docker import APIClient
import os, appinit

client = APIClient(base_url="unix://var/run/docker.sock")


def run(service, force=False):
    path = os.path.dirname(appinit.__file__)
    prefix = main.settings.get_variable("docker-prefix")

    image_name = '%s-base:latest' % prefix

    print("Building base image.")
    kwargs = {
        'nocache': force,
        'decode': True,
        'forcerm': True,
        'path': path + "/docker/base/",
        'dockerfile': path + "/docker/base/Dockerfile",
        'rm': True,
        'tag': image_name,
    }
    for line in client.build(**kwargs):
        if "stream" in line: print(line['stream'])
    print("Done -- building base image.")
Example #55
0
 def build(self):
     logging.info("Building image using docker")
     self.docker_client = APIClient(version='auto')
     self._build()
     if self.push:
         self.publish()
Example #56
0
class DockerBuilder(BaseBuilder):
    """A builder using the local Docker client"""
    def __init__(self,
                 registry=None,
                 image_name=constants.DEFAULT_IMAGE_NAME,
                 base_image=constants.DEFAULT_BASE_IMAGE,
                 preprocessor=None,
                 push=True,
                 dockerfile_path=None):
        super().__init__(
            registry=registry,
            image_name=image_name,
            push=push,
            base_image=base_image,
            preprocessor=preprocessor,
        )

    def build(self):
        logging.info("Building image using docker")
        self.docker_client = APIClient(version='auto')
        self._build()
        if self.push:
            self.publish()

    def _build(self):
        docker_command = self.preprocessor.get_command()
        logger.warning("Docker command: {}".format(docker_command))
        if not docker_command:
            logger.warning(
                "Not setting a command for the output docker image.")
        install_reqs_before_copy = self.preprocessor.is_requirements_txt_file_present(
        )
        dockerfile_path = dockerfile.write_dockerfile(
            docker_command=docker_command,
            dockerfile_path=self.dockerfile_path,
            path_prefix=self.preprocessor.path_prefix,
            base_image=self.base_image,
            install_reqs_before_copy=install_reqs_before_copy)
        self.preprocessor.output_map[dockerfile_path] = 'Dockerfile'
        context_file, context_hash = self.preprocessor.context_tar_gz()
        self.image_tag = self.full_image_name(context_hash)
        logger.warn('Building docker image {}...'.format(self.image_tag))
        with open(context_file, 'rb') as fileobj:
            bld = self.docker_client.build(path='.',
                                           custom_context=True,
                                           fileobj=fileobj,
                                           tag=self.image_tag,
                                           encoding='utf-8')
        for line in bld:
            self._process_stream(line)

    def publish(self):
        logger.warn('Publishing image {}...'.format(self.image_tag))
        for line in self.docker_client.push(self.image_tag, stream=True):
            self._process_stream(line)

    def _process_stream(self, line):
        raw = line.decode('utf-8').strip()
        lns = raw.split('\n')
        for ln in lns:
            try:
                ljson = json.loads(ln)
                if ljson.get('error'):
                    msg = str(ljson.get('error', ljson))
                    logger.error('Build failed: ' + msg)
                    raise Exception('Image build failed: ' + msg)
                else:
                    if ljson.get('stream'):
                        msg = 'Build output: {}'.format(
                            ljson['stream'].strip())
                    elif ljson.get('status'):
                        msg = 'Push output: {} {}'.format(
                            ljson['status'], ljson.get('progress'))
                    elif ljson.get('aux'):
                        msg = 'Push finished: {}'.format(ljson.get('aux'))
                    else:
                        msg = str(ljson)
                    logger.info(msg)

            except json.JSONDecodeError:
                logger.warning('JSON decode error: {}'.format(ln))
Example #57
0
    def execute_raw(self, context):
        """Modified only to use the get_host_tmp_dir helper."""
        self.log.info("Starting docker container from image %s", self.image)

        tls_config = self.__get_tls_config()
        if self.docker_conn_id:
            self.cli = self.get_hook().get_conn()
        else:
            self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config)

        if self.force_pull or len(self.cli.images(name=self.image)) == 0:
            self.log.info("Pulling docker image %s", self.image)
            for l in self.cli.pull(self.image, stream=True):
                output = seven.json.loads(l.decode("utf-8").strip())
                if "status" in output:
                    self.log.info("%s", output["status"])

        with self.get_host_tmp_dir() as host_tmp_dir:
            self.environment["AIRFLOW_TMP_DIR"] = self.tmp_dir
            self.volumes.append("{0}:{1}".format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                command=self.get_docker_command(context.get("ts")),
                environment=self.environment,
                host_config=self.cli.create_host_config(
                    auto_remove=self.auto_remove,
                    binds=self.volumes,
                    network_mode=self.network_mode,
                    shm_size=self.shm_size,
                    dns=self.dns,
                    dns_search=self.dns_search,
                    cpu_shares=int(round(self.cpus * 1024)),
                    mem_limit=self.mem_limit,
                ),
                image=self.image,
                user=self.user,
                working_dir=self.working_dir,
            )
            self.cli.start(self.container["Id"])

            res = []
            line = ""
            for new_line in self.cli.logs(
                container=self.container["Id"], stream=True, stdout=True, stderr=False
            ):
                line = new_line.strip()
                if hasattr(line, "decode"):
                    line = line.decode("utf-8")
                self.log.info(line)
                res.append(line)

            result = self.cli.wait(self.container["Id"])
            if result["StatusCode"] != 0:
                raise AirflowException(
                    "docker container failed with result: {result} and logs: {logs}".format(
                        result=repr(result), logs="\n".join(res)
                    )
                )

            if self.xcom_push_flag:
                # Try to avoid any kind of race condition?
                return res if self.xcom_all else str(line)
Example #58
0
from invoke import task
from docker import APIClient
import os
from invoke_tools import lxc, system, vcs

cli = APIClient(base_url='unix://var/run/docker.sock',
                timeout=600,
                version="auto")


def __check_branch():
    if os.getenv("TRAVIS_PULL_REQUEST") != "false":
        exit("This is a PR, so not deploying.")

    if os.getenv("TRAVIS_BRANCH") == "master":
        return "production"
    elif os.getenv("TRAVIS_BRANCH") == "develop":
        return "beta"
    else:
        exit("Not master or develop, so not deploying.")


@task
def build_dev_image(ctx):
    """
  Builds development image to run tests on
  """
    git = vcs.Git()
    version = git.get_version()

    lxc.Docker.build(
Example #59
0
class DagsterDockerOperator(DockerOperator):
    """Dagster operator for Apache Airflow.

    Wraps a modified DockerOperator incorporating https://github.com/apache/airflow/pull/4315.

    Additionally, if a Docker client can be initialized using docker.from_env,
    Unlike the standard DockerOperator, this operator also supports config using docker.from_env,
    so it isn't necessary to explicitly set docker_url, tls_config, or api_version.


    Incorporates https://github.com/apache/airflow/pull/4315/ and an implementation of
    https://issues.apache.org/jira/browse/AIRFLOW-3825.

    Parameters:
    host_tmp_dir (str): Specify the location of the temporary directory on the host which will
        be mapped to tmp_dir. If not provided defaults to using the standard system temp directory.
    """

    def __init__(self, dagster_operator_parameters, *args):
        kwargs = dagster_operator_parameters.op_kwargs
        tmp_dir = kwargs.pop("tmp_dir", DOCKER_TEMPDIR)
        host_tmp_dir = kwargs.pop("host_tmp_dir", seven.get_system_temp_directory())
        self.host_tmp_dir = host_tmp_dir

        run_config = dagster_operator_parameters.run_config
        if "filesystem" in run_config["intermediate_storage"]:
            if (
                "config" in (run_config["intermediate_storage"].get("filesystem", {}) or {})
                and "base_dir"
                in (
                    (run_config["intermediate_storage"].get("filesystem", {}) or {}).get(
                        "config", {}
                    )
                    or {}
                )
                and run_config["intermediate_storage"]["filesystem"]["config"]["base_dir"]
                != tmp_dir
            ):
                warnings.warn(
                    "Found base_dir '{base_dir}' set in filesystem storage config, which was not "
                    "the tmp_dir we expected ('{tmp_dir}', mounting host_tmp_dir "
                    "'{host_tmp_dir}' from the host). We assume you know what you are doing, but "
                    "if you are having trouble executing containerized workloads, this may be the "
                    "issue".format(
                        base_dir=run_config["intermediate_storage"]["filesystem"]["config"][
                            "base_dir"
                        ],
                        tmp_dir=tmp_dir,
                        host_tmp_dir=host_tmp_dir,
                    )
                )
            else:
                run_config["intermediate_storage"]["filesystem"] = dict(
                    run_config["intermediate_storage"]["filesystem"] or {},
                    **{
                        "config": dict(
                            (
                                (
                                    run_config["intermediate_storage"].get("filesystem", {}) or {}
                                ).get("config", {})
                                or {}
                            ),
                            **{"base_dir": tmp_dir},
                        )
                    },
                )

        self.docker_conn_id_set = kwargs.get("docker_conn_id") is not None
        self.run_config = run_config
        self.pipeline_name = dagster_operator_parameters.pipeline_name
        self.pipeline_snapshot = dagster_operator_parameters.pipeline_snapshot
        self.execution_plan_snapshot = dagster_operator_parameters.execution_plan_snapshot
        self.parent_pipeline_snapshot = dagster_operator_parameters.parent_pipeline_snapshot
        self.mode = dagster_operator_parameters.mode
        self.step_keys = dagster_operator_parameters.step_keys
        self.recon_repo = dagster_operator_parameters.recon_repo
        self._run_id = None

        self.instance_ref = dagster_operator_parameters.instance_ref
        check.invariant(self.instance_ref)
        self.instance = DagsterInstance.from_ref(self.instance_ref)

        # These shenanigans are so we can override DockerOperator.get_hook in order to configure
        # a docker client using docker.from_env, rather than messing with the logic of
        # DockerOperator.execute
        if not self.docker_conn_id_set:
            try:
                from_env().version()
            except Exception:  # pylint: disable=broad-except
                pass
            else:
                kwargs["docker_conn_id"] = True

        if "environment" not in kwargs:
            kwargs["environment"] = get_aws_environment()

        super(DagsterDockerOperator, self).__init__(
            task_id=dagster_operator_parameters.task_id,
            dag=dagster_operator_parameters.dag,
            tmp_dir=tmp_dir,
            host_tmp_dir=host_tmp_dir,
            xcom_push=True,
            # We do this because log lines won't necessarily be emitted in order (!) -- so we can't
            # just check the last log line to see if it's JSON.
            xcom_all=True,
            *args,
            **kwargs,
        )

    @contextmanager
    def get_host_tmp_dir(self):
        yield self.host_tmp_dir

    def execute_raw(self, context):
        """Modified only to use the get_host_tmp_dir helper."""
        self.log.info("Starting docker container from image %s", self.image)

        tls_config = self.__get_tls_config()
        if self.docker_conn_id:
            self.cli = self.get_hook().get_conn()
        else:
            self.cli = APIClient(base_url=self.docker_url, version=self.api_version, tls=tls_config)

        if self.force_pull or len(self.cli.images(name=self.image)) == 0:
            self.log.info("Pulling docker image %s", self.image)
            for l in self.cli.pull(self.image, stream=True):
                output = seven.json.loads(l.decode("utf-8").strip())
                if "status" in output:
                    self.log.info("%s", output["status"])

        with self.get_host_tmp_dir() as host_tmp_dir:
            self.environment["AIRFLOW_TMP_DIR"] = self.tmp_dir
            self.volumes.append("{0}:{1}".format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                command=self.get_docker_command(context.get("ts")),
                environment=self.environment,
                host_config=self.cli.create_host_config(
                    auto_remove=self.auto_remove,
                    binds=self.volumes,
                    network_mode=self.network_mode,
                    shm_size=self.shm_size,
                    dns=self.dns,
                    dns_search=self.dns_search,
                    cpu_shares=int(round(self.cpus * 1024)),
                    mem_limit=self.mem_limit,
                ),
                image=self.image,
                user=self.user,
                working_dir=self.working_dir,
            )
            self.cli.start(self.container["Id"])

            res = []
            line = ""
            for new_line in self.cli.logs(
                container=self.container["Id"], stream=True, stdout=True, stderr=False
            ):
                line = new_line.strip()
                if hasattr(line, "decode"):
                    line = line.decode("utf-8")
                self.log.info(line)
                res.append(line)

            result = self.cli.wait(self.container["Id"])
            if result["StatusCode"] != 0:
                raise AirflowException(
                    "docker container failed with result: {result} and logs: {logs}".format(
                        result=repr(result), logs="\n".join(res)
                    )
                )

            if self.xcom_push_flag:
                # Try to avoid any kind of race condition?
                return res if self.xcom_all else str(line)

    # This is a class-private name on DockerOperator for no good reason --
    # all that the status quo does is inhibit extension of the class.
    # See https://issues.apache.org/jira/browse/AIRFLOW-3880
    def __get_tls_config(self):
        # pylint: disable=no-member
        return super(DagsterDockerOperator, self)._DockerOperator__get_tls_config()

    @property
    def run_id(self):
        if self._run_id is None:
            return ""
        else:
            return self._run_id

    def query(self, airflow_ts):
        check.opt_str_param(airflow_ts, "airflow_ts")

        recon_pipeline = self.recon_repo.get_reconstructable_pipeline(self.pipeline_name)

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=recon_pipeline.get_python_origin(),
                pipeline_run_id=self.run_id,
                instance_ref=self.instance_ref,
                step_keys_to_execute=self.step_keys,
            )
        )

        command = "dagster api execute_step {}".format(json.dumps(input_json))
        self.log.info("Executing: {command}\n".format(command=command))
        return command

    def get_docker_command(self, airflow_ts):
        """Deliberately renamed from get_command to avoid shadoowing the method of the base class"""
        check.opt_str_param(airflow_ts, "airflow_ts")

        if self.command is not None and self.command.strip().find("[") == 0:
            commands = ast.literal_eval(self.command)
        elif self.command is not None:
            commands = self.command
        else:
            commands = self.query(airflow_ts)
        return commands

    def get_hook(self):
        if self.docker_conn_id_set:
            return super(DagsterDockerOperator, self).get_hook()

        class _DummyHook:
            def get_conn(self):
                return from_env().api

        return _DummyHook()

    def execute(self, context):
        if "run_id" in self.params:
            self._run_id = self.params["run_id"]
        elif "dag_run" in context and context["dag_run"] is not None:
            self._run_id = context["dag_run"].run_id

        try:
            tags = {AIRFLOW_EXECUTION_DATE_STR: context.get("ts")} if "ts" in context else {}

            self.instance.register_managed_run(
                pipeline_name=self.pipeline_name,
                run_id=self.run_id,
                run_config=self.run_config,
                mode=self.mode,
                solids_to_execute=None,
                step_keys_to_execute=None,
                tags=tags,
                root_run_id=None,
                parent_run_id=None,
                pipeline_snapshot=self.pipeline_snapshot,
                execution_plan_snapshot=self.execution_plan_snapshot,
                parent_pipeline_snapshot=self.parent_pipeline_snapshot,
            )

            res = self.execute_raw(context)
            self.log.info("Finished executing container.")

            if not res:
                raise AirflowException("Missing query response")

            try:
                events = [deserialize_json_to_dagster_namedtuple(line) for line in res if line]

            except Exception:  # pylint: disable=broad-except
                raise AirflowException(
                    "Could not parse response {response}".format(response=repr(res))
                )

            if len(events) == 1 and isinstance(events[0], StepExecutionSkipped):
                raise AirflowSkipException(
                    "Dagster emitted skip event, skipping execution in Airflow"
                )

            check_events_for_failures(events)
            check_events_for_skips(events)

            return events

        finally:
            self._run_id = None
Example #60
0
class HubIO:
    """ :class:`HubIO` provides the way to interact with Jina Hub registry.
    You can use it with CLI to package a directory into a Jina Hub image and publish it to the world.

    Examples:
        - :command:`jina hub build my_pod/` build the image
        - :command:`jina hub build my_pod/ --push` build the image and push to the public registry
        - :command:`jina hub pull jinahub/pod.dummy_mwu_encoder:0.0.6` to download the image
    """
    def __init__(self, args: 'argparse.Namespace'):
        self.logger = get_logger(self.__class__.__name__, **vars(args))
        self.args = args
        try:
            import docker
            from docker import APIClient

            self._client = docker.from_env()

            # low-level client
            self._raw_client = APIClient(base_url='unix://var/run/docker.sock')
        except (ImportError, ModuleNotFoundError):
            self.logger.critical(
                'requires "docker" dependency, please install it via "pip install jina[docker]"'
            )
            raise

    def new(self) -> None:
        """Create a new executor using cookiecutter template """
        try:
            from cookiecutter.main import cookiecutter
        except (ImportError, ModuleNotFoundError):
            self.logger.critical(
                'requires "cookiecutter" dependency, please install it via "pip install cookiecutter"'
            )
            raise

        import click
        cookiecutter_template = self.args.template
        if self.args.type == 'app':
            cookiecutter_template = 'https://github.com/jina-ai/cookiecutter-jina.git'
        elif self.args.type == 'pod':
            cookiecutter_template = 'https://github.com/jina-ai/cookiecutter-jina-hub.git'
        cookiecutter(cookiecutter_template,
                     overwrite_if_exists=self.args.overwrite,
                     output_dir=self.args.output_dir)

        try:
            cookiecutter(cookiecutter_template,
                         overwrite_if_exists=self.args.overwrite,
                         output_dir=self.args.output_dir)
        except click.exceptions.Abort:
            self.logger.info('nothing is created, bye!')

    def push(self, name: str = None, readme_path: str = None) -> None:
        """ A wrapper of docker push 
        - Checks for the tempfile, returns without push if it cannot find
        - Pushes to docker hub, returns withput writing to db if it fails
        - Writes to the db
        """
        name = name or self.args.name
        file_path = get_summary_path(name)
        if not os.path.isfile(file_path):
            self.logger.error(f'can not find the build summary file')
            return

        try:
            self._push_docker_hub(name, readme_path)
        except:
            self.logger.error('can not push to the docker hub registry')
            return

        with open(file_path) as f:
            result = json.load(f)
        if result['is_build_success']:
            self._write_summary_to_db(summary=result)

    def _push_docker_hub(self,
                         name: str = None,
                         readme_path: str = None) -> None:
        """ Helper push function """
        check_registry(self.args.registry, name, _repo_prefix)
        self._check_docker_image(name)
        self.login()
        with ProgressBar(task_name=f'pushing {name}', batch_unit='') as t:
            for line in self._client.images.push(name,
                                                 stream=True,
                                                 decode=True):
                t.update(1)
                self.logger.debug(line)
        self.logger.success(f'🎉 {name} is now published!')

        if False and readme_path:
            # unfortunately Docker Hub Personal Access Tokens cannot be used as they are not supported by the API
            _volumes = {
                os.path.dirname(os.path.abspath(readme_path)): {
                    'bind': '/workspace'
                }
            }
            _env = {
                'DOCKERHUB_USERNAME': self.args.username,
                'DOCKERHUB_PASSWORD': self.args.password,
                'DOCKERHUB_REPOSITORY': name.split(':')[0],
                'README_FILEPATH': '/workspace/README.md',
            }

            self._client.containers.run('peterevans/dockerhub-description:2.1',
                                        auto_remove=True,
                                        volumes=_volumes,
                                        environment=_env)

        share_link = f'https://api.jina.ai/hub/?jh={urllib.parse.quote_plus(name)}'

        try:
            webbrowser.open(share_link, new=2)
        except:
            pass
        finally:
            self.logger.info(
                f'Check out the usage {colored(share_link, "cyan", attrs=["underline"])} and share it with others!'
            )

    def pull(self) -> None:
        """A wrapper of docker pull """
        check_registry(self.args.registry, self.args.name, _repo_prefix)
        self.login()
        try:
            with TimeContext(f'pulling {self.args.name}', self.logger):
                image = self._client.images.pull(self.args.name)
            if isinstance(image, list):
                image = image[0]
            image_tag = image.tags[0] if image.tags else ''
            self.logger.success(
                f'🎉 pulled {image_tag} ({image.short_id}) uncompressed size: {get_readable_size(image.attrs["Size"])}'
            )
        except:
            self.logger.error(
                f'can not pull image {self.args.name} from {self.args.registry}'
            )
            raise

    def _check_docker_image(self, name: str) -> None:
        # check local image
        image = self._client.images.get(name)
        for r in _allowed:
            if f'{_label_prefix}{r}' not in image.labels.keys():
                self.logger.warning(
                    f'{r} is missing in your docker image labels, you may want to check it'
                )
        try:
            if name != safe_url_name(f'{_repo_prefix}' +
                                     '{type}.{kind}.{name}:{version}'.format(
                                         **{
                                             k.replace(_label_prefix, ''): v
                                             for k, v in image.labels.items()
                                         })):
                raise ValueError(
                    f'image {name} does not match with label info in the image'
                )
        except KeyError:
            self.logger.error('missing key in the label of the image')
            raise

        self.logger.info(
            f'✅ {name} is a valid Jina Hub image, ready to publish')

    def login(self) -> None:
        """A wrapper of docker login """
        if self.args.username and self.args.password:
            self._client.login(username=self.args.username,
                               password=self.args.password,
                               registry=self.args.registry)
        else:
            raise ValueError(
                'no username/password specified, docker login failed')

    def build(self) -> Dict:
        """A wrapper of docker build """
        if self.args.dry_run:
            result = self.dry_run()
        else:
            is_build_success, is_push_success = True, False
            _logs = []
            _excepts = []

            with TimeContext(f'building {colored(self.args.path, "green")}',
                             self.logger) as tc:
                try:
                    self._check_completeness()

                    streamer = self._raw_client.build(
                        decode=True,
                        path=self.args.path,
                        tag=self.tag,
                        pull=self.args.pull,
                        dockerfile=self.dockerfile_path_revised,
                        rm=True)

                    for chunk in streamer:
                        if 'stream' in chunk:
                            for line in chunk['stream'].splitlines():
                                if is_error_message(line):
                                    self.logger.critical(line)
                                    _excepts.append(line)
                                elif 'warning' in line.lower():
                                    self.logger.warning(line)
                                else:
                                    self.logger.info(line)
                                _logs.append(line)
                except Exception as ex:
                    # if pytest fails it should end up here as well
                    is_build_success = False
                    _excepts.append(str(ex))

            if is_build_success:
                # compile it again, but this time don't show the log
                image, log = self._client.images.build(
                    path=self.args.path,
                    tag=self.tag,
                    pull=self.args.pull,
                    dockerfile=self.dockerfile_path_revised,
                    rm=True)

                # success

                _details = {
                    'inspect': self._raw_client.inspect_image(image.tags[0]),
                    'tag': image.tags[0],
                    'hash': image.short_id,
                    'size': get_readable_size(image.attrs['Size']),
                }

                self.logger.success(
                    '🎉 built {tag} ({hash}) uncompressed size: {size}'.
                    format_map(_details))

            else:
                self.logger.error(
                    f'can not build the image, please double check the log')
                _details = {}

            if is_build_success:
                if self.args.test_uses:
                    try:
                        is_build_success = False
                        from jina.flow import Flow
                        p_name = random_name()
                        with Flow().add(name=p_name,
                                        uses=image.tags[0],
                                        daemon=self.args.daemon):
                            pass
                        if self.args.daemon:
                            self._raw_client.stop(p_name)
                        self._raw_client.prune_containers()
                        is_build_success = True
                    except PeaFailToStart:
                        self.logger.error(
                            f'can not use it in the Flow, please check your file bundle'
                        )
                    except Exception as ex:
                        self.logger.error(
                            f'something wrong but it is probably not your fault. {repr(ex)}'
                        )

                _version = self.manifest[
                    'version'] if 'version' in self.manifest else '0.0.1'
                info, env_info = get_full_version()
                _host_info = {
                    'jina': info,
                    'jina_envs': env_info,
                    'docker': self._raw_client.info(),
                    'build_args': vars(self.args)
                }

            _build_history = {
                'time':
                get_now_timestamp(),
                'host_info':
                _host_info if is_build_success and self.args.host_info else '',
                'duration':
                tc.readable_duration,
                'logs':
                _logs,
                'exception':
                _excepts
            }

            if self.args.prune_images:
                self.logger.info('deleting unused images')
                self._raw_client.prune_images()

            result = {
                'name':
                getattr(self, 'canonical_name', ''),
                'version':
                self.manifest['version'] if is_build_success
                and 'version' in self.manifest else '0.0.1',
                'path':
                self.args.path,
                'manifest_info':
                self.manifest if is_build_success else '',
                'details':
                _details,
                'is_build_success':
                is_build_success,
                'build_history': [_build_history]
            }

            # only successful build (NOT dry run) writes the summary to disk
            if result['is_build_success']:
                self._write_summary_to_file(summary=result)
                if self.args.push:
                    try:
                        self._push_docker_hub(image.tags[0], self.readme_path)
                        self._write_summary_to_db(summary=result)
                        self._write_slack_message(result, _details,
                                                  _build_history)
                    except Exception as ex:
                        self.logger.error(
                            f'can not complete the push due to {repr(ex)}')

        if not result['is_build_success'] and self.args.raise_error:
            # remove the very verbose build log when throw error
            result['build_history'][0].pop('logs')
            raise RuntimeError(result)

        return result

    def dry_run(self) -> Dict:
        try:
            s = self._check_completeness()
            s['is_build_success'] = True
        except Exception as ex:
            s = {'is_build_success': False, 'exception': str(ex)}
        return s

    def _write_summary_to_db(self, summary: Dict) -> None:
        """ Inserts / Updates summary document in mongodb """
        if not is_db_envs_set():
            self.logger.warning(
                'MongoDB environment vars are not set! bookkeeping skipped.')
            return

        build_summary = handle_dot_in_keys(document=summary)
        _build_query = {
            'name': build_summary['name'],
            'version': build_summary['version']
        }
        _current_build_history = build_summary['build_history']
        with MongoDBHandler(
                hostname=os.environ['JINA_DB_HOSTNAME'],
                username=os.environ['JINA_DB_USERNAME'],
                password=os.environ['JINA_DB_PASSWORD'],
                database_name=os.environ['JINA_DB_NAME'],
                collection_name=os.environ['JINA_DB_COLLECTION']) as db:
            existing_doc = db.find(query=_build_query)
            if existing_doc:
                build_summary['build_history'] = existing_doc[
                    'build_history'] + _current_build_history
                _modified_count = db.replace(document=build_summary,
                                             query=_build_query)
                self.logger.debug(
                    f'Updated the build + push summary in db. {_modified_count} documents modified'
                )
            else:
                _inserted_id = db.insert(document=build_summary)
                self.logger.debug(
                    f'Inserted the build + push summary in db with id {_inserted_id}'
                )

    def _write_summary_to_file(self, summary: Dict) -> None:
        file_path = get_summary_path(f'{summary["name"]}:{summary["version"]}')
        with open(file_path, 'w+') as f:
            json.dump(summary, f)
        self.logger.debug(f'stored the summary from build to {file_path}')

    def _check_completeness(self) -> Dict:
        self.dockerfile_path = get_exist_path(self.args.path, 'Dockerfile')
        self.manifest_path = get_exist_path(self.args.path, 'manifest.yml')
        self.readme_path = get_exist_path(self.args.path, 'README.md')
        self.requirements_path = get_exist_path(self.args.path,
                                                'requirements.txt')

        yaml_glob = glob.glob(os.path.join(self.args.path, '*.yml'))
        if yaml_glob:
            yaml_glob.remove(self.manifest_path)

        py_glob = glob.glob(os.path.join(self.args.path, '*.py'))

        test_glob = glob.glob(os.path.join(self.args.path, 'tests/test_*.py'))

        completeness = {
            'Dockerfile': self.dockerfile_path,
            'manifest.yml': self.manifest_path,
            'README.md': self.readme_path,
            'requirements.txt': self.requirements_path,
            '*.yml': yaml_glob,
            '*.py': py_glob,
            'tests': test_glob
        }

        self.logger.info(f'completeness check\n' + '\n'.join(
            '%4s %-20s %s' %
            (colored('✓', 'green') if v else colored('✗', 'red'), k, v)
            for k, v in completeness.items()) + '\n')

        if completeness['Dockerfile'] and completeness['manifest.yml']:
            pass
        else:
            self.logger.critical(
                'Dockerfile or manifest.yml is not given, can not build')
            raise FileNotFoundError(
                'Dockerfile or manifest.yml is not given, can not build')

        self.manifest = self._read_manifest(self.manifest_path)
        self.dockerfile_path_revised = self._get_revised_dockerfile(
            self.dockerfile_path, self.manifest)
        self.tag = safe_url_name(f'{_repo_prefix}' +
                                 '{type}.{kind}.{name}:{version}'.format(
                                     **self.manifest))
        self.canonical_name = safe_url_name(f'{_repo_prefix}' +
                                            '{type}.{kind}.{name}'.format(
                                                **self.manifest))
        return completeness

    def _read_manifest(self, path: str, validate: bool = True) -> Dict:
        with resource_stream(
                'jina', '/'.join(
                    ('resources', 'hub-builder', 'manifest.yml'))) as fp:
            tmp = yaml.load(
                fp
            )  # do not expand variables at here, i.e. DO NOT USE expand_dict(yaml.load(fp))

        with open(path) as fp:
            tmp.update(yaml.load(fp))

        if validate:
            self._validate_manifest(tmp)

        return tmp

    def _validate_manifest(self, manifest: Dict) -> None:
        required = {'name', 'type', 'version'}

        # check the required field in manifest
        for r in required:
            if r not in manifest:
                raise ValueError(
                    f'{r} is missing in the manifest.yaml, it is required')

        # check if all fields are there
        for r in _allowed:
            if r not in manifest:
                self.logger.warning(
                    f'{r} is missing in your manifest.yml, you may want to check it'
                )

        # check name
        check_name(manifest['name'])
        # check_image_type
        check_image_type(manifest['type'])
        # check version number
        check_version(manifest['version'])
        # check version number
        check_license(manifest['license'])
        # check platform
        if not isinstance(manifest['platform'], list):
            manifest['platform'] = list(manifest['platform'])
        check_platform(manifest['platform'])

        # replace all chars in value to safe chars
        for k, v in manifest.items():
            if v and isinstance(v, str):
                manifest[k] = remove_control_characters(v)

        # show manifest key-values
        for k, v in manifest.items():
            self.logger.debug(f'{k}: {v}')

    def _get_revised_dockerfile(self, dockerfile_path: str,
                                manifest: Dict) -> str:
        # modify dockerfile
        revised_dockerfile = []
        with open(dockerfile_path) as fp:
            for l in fp:
                revised_dockerfile.append(l)
                if l.startswith('FROM'):
                    revised_dockerfile.append('LABEL ')
                    revised_dockerfile.append(' \\      \n'.join(
                        f'{_label_prefix}{k}="{v}"'
                        for k, v in manifest.items()))

        f = tempfile.NamedTemporaryFile('w', delete=False).name
        with open(f, 'w', encoding='utf8') as fp:
            fp.writelines(revised_dockerfile)

        for k in revised_dockerfile:
            self.logger.debug(k)
        return f

    def _write_slack_message(self, *args):
        def _expand_fn(v):
            if isinstance(v, str):
                for d in args:
                    try:
                        v = v.format(**d)
                    except KeyError:
                        pass
            return v

        if 'JINAHUB_SLACK_WEBHOOK' in os.environ:
            with resource_stream(
                    'jina', '/'.join(('resources', 'hub-builder-success',
                                      'slack-jinahub.json'))) as fp:
                tmp = expand_dict(json.load(fp),
                                  _expand_fn,
                                  resolve_cycle_ref=False)
                req = urllib.request.Request(
                    os.environ['JINAHUB_SLACK_WEBHOOK'])
                req.add_header('Content-Type',
                               'application/json; charset=utf-8')
                jdb = json.dumps(tmp).encode('utf-8')  # needs to be bytes
                req.add_header('Content-Length', str(len(jdb)))
                with urllib.request.urlopen(req, jdb) as f:
                    res = f.read()
                    self.logger.info(f'push to Slack: {res}')

    # alias of "new" in cli
    create = new
    init = new