def mark_as_done(self, task, job, execution): h = execution.metadata["docker_host"] p = execution.metadata["docker_port"] host, port, cl = self.pool.get_client(self, task.task_id, h, p) container_id = execution.metadata["container_id"] logger = self.logger.bind( task_id=task.task_id, job=str(job.job_id), execution_id=str(execution.execution_id), operation="docker_host.mark_as_done", host=host, port=port, container_id=container_id, ) try: logger.debug("Finding container...") container = cl.containers.get(container_id) logger.info("Container found.") new_name = f"defunct-{container.name}" logger.debug("Renaming container...", new_name=new_name) container.rename(new_name) logger.debug("Container renamed.", new_name=new_name) except requests.exceptions.ConnectionError as err: error = traceback.format_exc() logger.error("Failed to connect to Docker Host.", error=error) raise HostUnavailableError(host, port, err)
def get_container_by_id(self, container_id, host, port, client): logger = self.logger.bind( host=host, port=port, container_id=container_id, operation="docker_host.get_container_by_id", ) circuit = self.get_circuit(f"{host}:{port}") @circuit def run(logger): try: logger = logger.bind(container_id=container_id) logger.debug("Finding container...") container = client.containers.get(container_id) logger.info("Container found.") return container except requests.exceptions.ConnectionError as err: raise HostUnavailableError(host, port, err) from err except NullResource: raise ContainerUnavailableError( f"Container {container_id} was not found in {host}:{port}!" ) try: return run(logger) except pybreaker.CircuitBreakerError as err: raise HostUnavailableError(host, port, err) from err
def get_streaming_logs(self, task_id, job, execution): h = execution.metadata["docker_host"] p = execution.metadata["docker_port"] host, port, cl = self.pool.get_client(self, task_id, h, p) container_id = execution.metadata["container_id"] logger = self.logger.bind( task_id=task_id, job=str(job.job_id), execution_id=str(execution.execution_id), operation="docker_host.get_streaming_logs", host=host, port=port, container_id=container_id, ) try: container = cl.containers.get(container_id) for log in container.logs(stdout=True, stderr=True, stream=True): yield log.decode("utf-8") except requests.exceptions.ConnectionError as err: error = traceback.format_exc() logger.error("Failed to connect to Docker Host.", error=error) raise HostUnavailableError(host, port, err)
def run(logger): try: container_name = f"{job_prefix}-{execution.execution_id}" envs = job.metadata.get("envs", {}) logger = logger.bind(container_name=container_name, envs=envs) logger.debug("Running the Job in Docker Host...") container = cl.containers.run( image=f"{image}:{tag}", name=container_name, command=command, detach=True, environment=envs, ) execution.metadata["container_id"] = container.id logger.info( "Container started successfully. Container ID stored as Job Execution metadata.", container_id=container.id, ) except requests.exceptions.ConnectionError as err: error = traceback.format_exc() logger.error( "Failed to connect to Docker Host. Will retry job later with a new host.", error=error, ) if "docker_host" in execution.metadata: del execution.metadata["docker_host"] if "docker_port" in execution.metadata: del execution.metadata["docker_port"] raise HostUnavailableError(host, port, err)
def test_downloading_image2(worker): """ Test updating an image when executor raises HostUnavailableError, the job is re-enqueued and method returns False """ app = worker.app.app with app.app_context(): task, job, execution = JobExecutionFixture.new_defaults() exec_mock = MagicMock() exec_mock.update_image.side_effect = HostUnavailableError( "docker", "9999", "failed" ) result = job_mod.download_image( exec_mock, job, execution, job.image, "latest", job.command, app.logger ) expect(result).to_be_false() expect(job.metadata["enqueued_id"]).not_to_be_null() expect(app.redis.zcard(Queue.SCHEDULED_QUEUE_NAME)).to_equal(1) item = app.redis.zrank(Queue.SCHEDULED_QUEUE_NAME, job.metadata["enqueued_id"]) expect(item).to_equal(0)
def stop_job(self, task, job, execution): logger = self.logger.bind( task_id=task.task_id, job_id=str(job.job_id), execution_id=str(execution.execution_id), operation="docker_executor.stop_job", ) if "container_id" not in execution.metadata or execution.metadata.get( "container_id") is None: logger.warn( "Can't stop Job Execution, since it has not been started. Aborting..." ) return False docker_host = execution.metadata["docker_host"] docker_port = execution.metadata["docker_port"] host, port, client = self.pool.get_client(self, task.task_id, docker_host, docker_port) logger = logger.bind(host=host, port=port) circuit = self.get_circuit(f"{host}:{port}") container_id = execution.metadata.get("container_id") logger = logger.bind(container_id=container_id) container = self.get_container_by_id(container_id, host, port, client) if container is None: logger.warn( "Can't stop Job Execution, since container was not found. Aborting..." ) return False @circuit def run(logger): try: logger.info("Container found.") logger.debug("Stopping container...") container.stop() logger.info("Container stopped.") except requests.exceptions.ConnectionError as err: error = traceback.format_exc() logger.error("Failed to connect to Docker Host.", error=error) raise HostUnavailableError(host, port, err) from err try: run(logger) except pybreaker.CircuitBreakerError as err: raise HostUnavailableError(host, port, err) from err return True
def update_image(self, task, job, execution, image, tag, blacklisted_hosts=None): if blacklisted_hosts is None: blacklisted_hosts = self.get_blacklisted_hosts() logger = self.logger.bind( task_id=task.task_id, job_id=str(job.job_id), execution_id=str(execution.execution_id), image=image, tag=tag, blacklisted_hosts=blacklisted_hosts, operation="docker_executor.update_image", ) host, port, client = self.pool.get_client(self, task.task_id, blacklist=blacklisted_hosts) circuit = self.get_circuit(f"{host}:{port}") logger = logger.bind(host=host, port=port) @circuit def run(logger): try: logger.debug("Updating image in docker host...") client.images.pull(image, tag=tag) execution.metadata["docker_host"] = host execution.metadata["docker_port"] = port logger.info("Image updated successfully. Docker host and port " "stored in Job Execution for future reference.") except requests.exceptions.ConnectionError as err: error = traceback.format_exc() logger.error( "Failed to connect to Docker Host. Will retry job later with a new host.", error=error, ) if "docker_host" in execution.metadata: del execution.metadata["docker_host"] if "docker_port" in execution.metadata: del execution.metadata["docker_port"] raise HostUnavailableError(host, port, err) from err try: run(logger) except pybreaker.CircuitBreakerError as err: raise HostUnavailableError(host, port, err) from err
def run(logger): try: logger.info("Container found.") logger.debug("Stopping container...") container.stop() logger.info("Container stopped.") except requests.exceptions.ConnectionError as err: error = traceback.format_exc() logger.error("Failed to connect to Docker Host.", error=error) raise HostUnavailableError(host, port, err) from err
def run(logger): try: logger = logger.bind(container_id=container_id) logger.debug("Finding container...") container = client.containers.get(container_id) logger.info("Container found.") return container except requests.exceptions.ConnectionError as err: raise HostUnavailableError(host, port, err) from err
def run(logger): try: container_id = execution.metadata["container_id"] logger = logger.bind(container_id=container_id) logger.debug("Finding container...") container = cl.containers.get(container_id) logger.info("Container found.") logger.debug("Stopping container...") container.stop() logger.info("Container stopped.") except requests.exceptions.ConnectionError as err: error = traceback.format_exc() logger.error("Failed to connect to Docker Host.", error=error) raise HostUnavailableError(host, port, err)
def mark_as_done(self, task, job, execution): execution_host = execution.metadata["docker_host"] execution_port = execution.metadata["docker_port"] host, port, client = self.pool.get_client(self, task.task_id, execution_host, execution_port) container_id = execution.metadata.get("container_id") logger = self.logger.bind( task_id=task.task_id, job=str(job.job_id), execution_id=str(execution.execution_id), operation="docker_host.mark_as_done", host=host, port=port, container_id=container_id, ) container = self.get_container_by_id(container_id, host, port, client) if container is None: return False try: new_name = f"defunct-{container.name}" logger.debug("Renaming container...", new_name=new_name) container.rename(new_name) logger.debug("Container renamed.", new_name=new_name) return True except ( pybreaker.CircuitBreakerError, requests.exceptions.ConnectionError, ) as err: error = traceback.format_exc() logger.error("Failed to connect to Docker Host.", error=error) raise HostUnavailableError(host, port, err) from err
def run(logger): try: logger.debug("Updating image in docker host...") client.images.pull(image, tag=tag) execution.metadata["docker_host"] = host execution.metadata["docker_port"] = port logger.info("Image updated successfully. Docker host and port " "stored in Job Execution for future reference.") except requests.exceptions.ConnectionError as err: error = traceback.format_exc() logger.error( "Failed to connect to Docker Host. Will retry job later with a new host.", error=error, ) if "docker_host" in execution.metadata: del execution.metadata["docker_host"] if "docker_port" in execution.metadata: del execution.metadata["docker_port"] raise HostUnavailableError(host, port, err) from err
def run(self, task, job, execution, image, tag, command, blacklisted_hosts=None): logger = self.logger.bind( task_id=task.task_id, job_id=str(job.job_id), execution_id=str(execution.execution_id), image=image, tag=tag, command=command, blacklisted_hosts=blacklisted_hosts, operation="docker_executor.run", ) if "docker_host" not in execution.metadata: raise RuntimeError( "Can't run job without docker_host and docker_port in execution metadata." ) docker_host = execution.metadata["docker_host"] docker_port = execution.metadata["docker_port"] host, port, client = self.pool.get_client(self, task.task_id, docker_host, docker_port) logger = logger.bind(host=host, port=port) circuit = self.get_circuit(f"{host}:{port}") @circuit def run(logger): try: container_name = f"{JOB_PREFIX}-{execution.execution_id}" envs = job.metadata.get("envs", {}) additional_dns_entries = dict( job.metadata.get("additional_dns_entries", [])) logger = logger.bind( container_name=container_name, envs=envs, additional_dns_entries=additional_dns_entries, ) logger.debug("Running the Job in Docker Host...") container = client.containers.run( image=f"{image}:{tag}", name=container_name, command=command, detach=True, environment=envs, extra_hosts=additional_dns_entries, ) execution.metadata["container_id"] = container.id logger.info( "Container started successfully. Container ID " "stored as Job Execution metadata.", container_id=container.id, ) except (requests.exceptions.ConnectionError, ) as err: error = traceback.format_exc() logger.error( "Failed to connect to Docker Host. Will retry job later with a new host.", error=error, ) if "docker_host" in execution.metadata: del execution.metadata["docker_host"] if "docker_port" in execution.metadata: del execution.metadata["docker_port"] raise HostUnavailableError(host, port, err) from err try: run(logger) except pybreaker.CircuitBreakerError as err: raise HostUnavailableError(host, port, err) from err return True