def execute_actor(actor_id, worker_id, execution_id, image, msg, user=None, d={}, privileged=False, mounts=[], leave_container=False, fifo_host_path=None, socket_host_path=None, mem_limit=None, max_cpus=None, tenant=None): """ Creates and runs an actor container and supervises the execution, collecting statistics about resource consumption from the Docker daemon. :param actor_id: the dbid of the actor; for updating worker status :param worker_id: the worker id; also for updating worker status :param execution_id: the id of the execution. :param image: the actor's image; worker must have already downloaded this image to the local docker registry. :param msg: the message being passed to the actor. :param user: string in the form {uid}:{gid} representing the uid and gid to run the command as. :param d: dictionary representing the environment to instantiate within the actor container. :param privileged: whether this actor is "privileged"; i.e., its container should run in privileged mode with the docker daemon mounted. :param mounts: list of dictionaries representing the mounts to add; each dictionary mount should have 3 keys: host_path, container_path and format (which should have value 'ro' or 'rw'). :param fifo_host_path: If not None, a string representing a path on the host to a FIFO used for passing binary data to the actor. :param socket_host_path: If not None, a string representing a path on the host to a socket used for collecting results from the actor. :param mem_limit: The maximum amount of memory the Actor container can use; should be the same format as the --memory Docker flag. :param max_cpus: The maximum number of CPUs each actor will have available to them. Does not guarantee these CPU resources; serves as upper bound. :return: result (dict), logs (str) - `result`: statistics about resource consumption; `logs`: output from docker logs. """ logger.debug("top of execute_actor(); (worker {};{})".format( worker_id, execution_id)) # initially set the global force_quit variable to False globals.force_quit = False # initial stats object, environment, binds and volumes result = {'cpu': 0, 'io': 0, 'runtime': 0} # instantiate docker client cli = docker.APIClient(base_url=dd, version="auto") # don't try to pass binary messages through the environment as these can cause # broken pipe errors. the binary data will be passed through the FIFO momentarily. if not fifo_host_path: d['MSG'] = msg binds = {} volumes = [] # if container is privileged, mount the docker daemon so that additional # containers can be started. logger.debug("privileged: {};(worker {};{})".format( privileged, worker_id, execution_id)) if privileged: binds = { '/var/run/docker.sock': { 'bind': '/var/run/docker.sock', 'ro': False } } volumes = ['/var/run/docker.sock'] # add a bind key and dictionary as well as a volume for each mount for m in mounts: binds[m.get('host_path')] = { 'bind': m.get('container_path'), 'ro': m.get('format') == 'ro' } volumes.append(m.get('host_path')) # mem_limit # -1 => unlimited memory if mem_limit == '-1': mem_limit = None # max_cpus try: max_cpus = int(max_cpus) except: max_cpus = None # -1 => unlimited cpus if max_cpus == -1: max_cpus = None host_config = cli.create_host_config(binds=binds, privileged=privileged, mem_limit=mem_limit, nano_cpus=max_cpus) logger.debug("host_config object created by (worker {};{}).".format( worker_id, execution_id)) # write binary data to FIFO if it exists: fifo = None if fifo_host_path: try: fifo = os.open(fifo_host_path, os.O_RDWR) os.write(fifo, msg) except Exception as e: logger.error( "Error writing the FIFO. Exception: {};(worker {};{})".format( e, worker_id, execution_id)) os.remove(fifo_host_path) raise DockerStartContainerError("Error writing to fifo: {}; " "(worker {};{})".format( e, worker_id, execution_id)) # set up results socket ----------------------- # make sure socket doesn't already exist: try: os.unlink(socket_host_path) except OSError as e: if os.path.exists(socket_host_path): logger.error( "socket at {} already exists; Exception: {}; (worker {};{})". format(socket_host_path, e, worker_id, execution_id)) raise DockerStartContainerError( "Got an OSError trying to create the results docket; " "exception: {}".format(e)) # use retry logic since, when the compute node is under load, we see errors initially trying to create the socket # server object. keep_trying = True count = 0 server = None while keep_trying and count < 10: keep_trying = False count = count + 1 try: server = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) except Exception as e: keep_trying = True logger.info("Could not instantiate socket at {}. " "Count: {}; Will keep trying. " "Exception: {}; type: {}; (worker {};{})".format( socket_host_path, count, e, type(e), worker_id, execution_id)) try: server.bind(socket_host_path) except Exception as e: keep_trying = True logger.info("Could not bind socket at {}. " "Count: {}; Will keep trying. " "Exception: {}; type: {}; (worker {};{})".format( socket_host_path, count, e, type(e), worker_id, execution_id)) try: os.chmod(socket_host_path, 0o777) logger.debug( "results socket permissions set to 777. socket_host_path: {}". format(socket_host_path)) except Exception as e: msg = f"Got exception trying to set permissions on the results socket. Not sure what to do. e: {e}" logger.error(msg) # for now, we'll just swallow it but this is really a TODO. try: server.settimeout(RESULTS_SOCKET_TIMEOUT) except Exception as e: keep_trying = True logger.info("Could not set timeout for socket at {}. " "Count: {}; Will keep trying. " "Exception: {}; type: {}; (worker {};{})".format( socket_host_path, count, e, type(e), worker_id, execution_id)) if not server: msg = "Failed to instantiate results socket. " \ "Abaco compute host could be overloaded. (worker {};{})".format(worker_id, execution_id) logger.error(msg) raise DockerStartContainerError(msg) logger.debug( "results socket server instantiated. path: {} (worker {};{})".format( socket_host_path, worker_id, execution_id)) # instantiate the results channel: results_ch = ExecutionResultsChannel(actor_id, execution_id) # create and start the container logger.debug("Final container environment: {};(worker {};{})".format( d, worker_id, execution_id)) logger.debug( "Final binds: {} and host_config: {} for the container.(worker {};{})". format(binds, host_config, worker_id, execution_id)) container = cli.create_container(image=image, environment=d, user=user, volumes=volumes, host_config=host_config) # get the UTC time stamp start_time = get_current_utc_time() # start the timer to track total execution time. start = timeit.default_timer() logger.debug("right before cli.start: {}; container id: {}; " "(worker {};{})".format(start, container.get('Id'), worker_id, execution_id)) try: cli.start(container=container.get('Id')) except Exception as e: # if there was an error starting the container, user will need to debug logger.info( "Got exception starting actor container: {}; (worker {};{})". format(e, worker_id, execution_id)) raise DockerStartContainerError( "Could not start container {}. Exception {}".format( container.get('Id'), str(e))) # local bool tracking whether the actor container is still running running = True Execution.update_status(actor_id, execution_id, RUNNING) logger.debug("right before creating stats_cli: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) # create a separate cli for checking stats objects since these should be fast and we don't want to wait stats_cli = docker.APIClient(base_url=dd, timeout=1, version="auto") logger.debug("right after creating stats_cli: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) # under load, we can see UnixHTTPConnectionPool ReadTimeout's trying to create the stats_obj # so here we are trying up to 3 times to create the stats object for a possible total of 3s # timeouts ct = 0 stats_obj = None logs = None while ct < 3: try: stats_obj = stats_cli.stats(container=container.get('Id'), decode=True) break except ReadTimeout: ct += 1 except Exception as e: logger.error( "Unexpected exception creating stats_obj. Exception: {}; (worker {};{})" .format(e, worker_id, execution_id)) # in this case, we need to kill the container since we cannot collect stats; # UPDATE - 07-2018: under load, a errors can occur attempting to create the stats object. # the container could still be running; we need to explicitly check the container status # to be sure. logger.debug( "right after attempting to create stats_obj: {}; (worker {};{})". format(timeit.default_timer(), worker_id, execution_id)) # a counter of the number of iterations through the main "running" loop; # this counter is used to determine when less frequent actions, such as log aggregation, need to run. loop_idx = 0 while running and not globals.force_quit: loop_idx += 1 logger.debug( "top of while running loop; loop_idx: {}".format(loop_idx)) datagram = None stats = None try: datagram = server.recv(MAX_RESULT_FRAME_SIZE) except socket.timeout: pass except Exception as e: logger.error( "got exception from server.recv: {}; (worker {};{})".format( e, worker_id, execution_id)) logger.debug( "right after try/except datagram block: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) if datagram: try: results_ch.put(datagram) except Exception as e: logger.error( "Error trying to put datagram on results channel. " "Exception: {}; (worker {};{})".format( e, worker_id, execution_id)) logger.debug("right after results ch.put: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) # only try to collect stats if we have a stats_obj: if stats_obj: logger.debug( "we have a stats_obj; trying to collect stats. (worker {};{})". format(worker_id, execution_id)) try: logger.debug( "waiting on a stats obj: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) stats = next(stats_obj) logger.debug("got the stats obj: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) except StopIteration: # we have read the last stats object - no need for processing logger.debug( "Got StopIteration; no stats object. (worker {};{})". format(worker_id, execution_id)) except ReadTimeoutError: # this is a ReadTimeoutError from docker, not requests. container is finished. logger.info( "next(stats) just timed out: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) # UPDATE - 07-2018: under load, a ReadTimeoutError from the attempt to get a stats object # does NOT imply the container has stopped; we need to explicitly check the container status # to be sure. # if we got a stats object, add it to the results; it is possible stats collection timed out and the object # is None if stats: logger.debug("adding stats to results; (worker {};{})".format( worker_id, execution_id)) try: result['cpu'] += stats['cpu_stats']['cpu_usage']['total_usage'] except KeyError as e: logger.info( "Got a KeyError trying to fetch the cpu object: {}; " "(worker {};{})".format(e, worker_id, execution_id)) try: result['io'] += stats['networks']['eth0']['rx_bytes'] except KeyError as e: logger.info( "Got KeyError exception trying to grab the io object. " "running: {}; Exception: {}; (worker {};{})".format( running, e, worker_id, execution_id)) # grab the logs every 5th iteration -- if loop_idx % 5 == 0: logs = cli.logs(container.get('Id')) Execution.set_logs(execution_id, logs, actor_id, tenant, worker_id) logs = None # checking the container status to see if it is still running ---- if running: logger.debug( "about to check container status: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) # we need to wait for the container id to be available i = 0 c = None while i < 10: try: c = cli.containers(all=True, filters={'id': container.get('Id')})[0] break except IndexError: logger.error( "Got an IndexError trying to get the container object. " "(worker {};{})".format(worker_id, execution_id)) time.sleep(0.1) i += 1 logger.debug( "done checking status: {}; i: {}; (worker {};{})".format( timeit.default_timer(), i, worker_id, execution_id)) # if we were never able to get the container object, we need to stop processing and kill this # worker; the docker daemon could be under heavy load, but we need to not launch another # actor container with this worker, because the existing container may still be running, if i == 10 or not c: # we'll try to stop the container logger.error( "Never could retrieve the container object! Attempting to stop container; " "container id: {}; (worker {};{})".format( container.get('Id'), worker_id, execution_id)) # stop_container could raise an exception - if so, we let it pass up and have the worker # shut itself down. stop_container(cli, container.get('Id')) logger.info("container {} stopped. (worker {};{})".format( container.get('Id'), worker_id, execution_id)) # if we were able to stop the container, we can set running to False and keep the # worker running running = False continue state = c.get('State') if not state == 'running': logger.debug( "container finished, final state: {}; (worker {};{})". format(state, worker_id, execution_id)) running = False continue else: # container still running; check if a force_quit has been sent OR # we are beyond the max_run_time runtime = timeit.default_timer() - start if globals.force_quit or (max_run_time > 0 and max_run_time < runtime): logs = cli.logs(container.get('Id')) if globals.force_quit: logger.info( "issuing force quit: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) else: logger.info( "hit runtime limit: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) cli.stop(container.get('Id')) running = False logger.debug( "right after checking container state: {}; (worker {};{})". format(timeit.default_timer(), worker_id, execution_id)) logger.info("container stopped:{}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) stop = timeit.default_timer() globals.force_quit = False # get info from container execution, including exit code; Exceptions from any of these commands # should not cause the worker to shutdown or prevent starting subsequent actor containers. exit_code = 'undetermined' try: container_info = cli.inspect_container(container.get('Id')) try: container_state = container_info['State'] try: exit_code = container_state['ExitCode'] except KeyError as e: logger.error("Could not determine ExitCode for container {}. " "Exception: {}; (worker {};{})".format( container.get('Id'), e, worker_id, execution_id)) exit_code = 'undetermined' # Converting ISO8601 times to unix timestamps try: # Slicing to 23 to account for accuracy up to milliseconds and replace to get rid of ISO 8601 'Z' startedat_ISO = container_state['StartedAt'].replace('Z', '')[:23] finishedat_ISO = container_state['FinishedAt'].replace( 'Z', '')[:23] container_state['StartedAt'] = datetime.datetime.strptime( startedat_ISO, "%Y-%m-%dT%H:%M:%S.%f") container_state['FinishedAt'] = datetime.datetime.strptime( finishedat_ISO, "%Y-%m-%dT%H:%M:%S.%f") except Exception as e: logger.error( f"Datetime conversion failed for container {container.get('Id')}. ", f"Exception: {e}; (worker {worker_id};{execution_id})") container_state = {'unavailable': True} except KeyError as e: logger.error( f"Could not determine final state for container {container.get('Id')}. ", f"Exception: {e}; (worker {worker_id};{execution_id})") container_state = {'unavailable': True} except docker.errors.APIError as e: logger.error(f"Could not inspect container {container.get('Id')}. ", f"Exception: {e}; (worker {worker_id};{execution_id})") logger.debug( "right after getting container_info: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) # get logs from container if not logs: logs = cli.logs(container.get('Id')) if not logs: # there are issues where container do not have logs associated with them when they should. logger.info("Container id {} had NO logs associated with it. " "(worker {};{})".format(container.get('Id'), worker_id, execution_id)) logger.debug( "right after getting container logs: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) # get any additional results from the execution: while True: datagram = None try: datagram = server.recv(MAX_RESULT_FRAME_SIZE) except socket.timeout: break except Exception as e: logger.error( "Got exception from server.recv: {}; (worker {};{})".format( e, worker_id, execution_id)) if datagram: try: results_ch.put(datagram) except Exception as e: logger.error( "Error trying to put datagram on results channel. " "Exception: {}; (worker {};{})".format( e, worker_id, execution_id)) logger.debug( "right after getting last execution results from datagram socket: {}; " "(worker {};{})".format(timeit.default_timer(), worker_id, execution_id)) if socket_host_path: server.close() os.remove(socket_host_path) logger.debug("right after removing socket: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) # remove actor container with retrying logic -- check for specific filesystem errors from the docker daemon: if not leave_container: keep_trying = True count = 0 while keep_trying and count < 10: keep_trying = False count = count + 1 try: cli.remove_container(container=container) logger.info("Actor container removed. (worker {};{})".format( worker_id, execution_id)) except Exception as e: # if the container is already gone we definitely want to quit: if 'No such container' in str(e): logger.info("Got 'no such container' exception - quiting. " "Exception: {}; (worker {};{})".format( e, worker_id, execution_id)) break # if we get a resource busy/internal server error from docker, we need to keep trying to remove the # container. elif 'device or resource busy' in str( e) or 'failed to remove root filesystem' in str(e): logger.error( "Got resource busy/failed to remove filesystem exception trying to remove " "actor container; will keep trying." "Count: {}; Exception: {}; (worker {};{})".format( count, e, worker_id, execution_id)) time.sleep(1) keep_trying = True else: logger.error( "Unexpected exception trying to remove actor container. Giving up." "Exception: {}; type: {}; (worker {};{})".format( e, type(e), worker_id, execution_id)) else: logger.debug("leaving actor container since leave_container was True. " "(worker {};{})".format(worker_id, execution_id)) logger.debug( "right after removing actor container: {}; (worker {};{})".format( timeit.default_timer(), worker_id, execution_id)) if fifo_host_path: try: os.close(fifo) os.remove(fifo_host_path) except Exception as e: logger.debug( f"got Exception trying to clean up fifo_host_path; e: {e}") if results_ch: results_ch.close() result['runtime'] = int(stop - start) logger.debug( "right after removing fifo; about to return: {}; (worker {};{})". format(timeit.default_timer(), worker_id, execution_id)) return result, logs, container_state, exit_code, start_time
def start_database(benchmarker_config, test, database): ''' Sets up a container for the given database and port, and starts said docker container. ''' image_name = "techempower/%s:latest" % database log_prefix = image_name + ": " database_dir = os.path.join(benchmarker_config.fwroot, "toolset", "setup", "docker", "databases", database) docker_file = "%s.dockerfile" % database pulled = False client = docker.DockerClient( base_url=benchmarker_config.database_docker_host) try: # Don't pull if we have it client.images.get(image_name) pulled = True log("Found published image; skipping build", prefix=log_prefix) except: # Pull the dependency image try: log("Attempting docker pull for image (this can take some time)", prefix=log_prefix) client.images.pull(image_name) pulled = True log("Found published image; skipping build", prefix=log_prefix) except: pass if not pulled: for line in docker.APIClient( base_url=benchmarker_config.database_docker_host).build( path=database_dir, dockerfile=docker_file, tag="techempower/%s" % database): if line.startswith('{"stream":'): line = json.loads(line) line = line[line.keys()[0]].encode('utf-8') log(line, prefix=log_prefix, color=Fore.WHITE + Style.BRIGHT \ if re.match(r'^Step \d+\/\d+', line) else '') client = docker.DockerClient( base_url=benchmarker_config.database_docker_host) sysctl = {'net.core.somaxconn': 65535, 'kernel.sem': "250 32000 256 512"} ulimit = [{'name': 'nofile', 'hard': 65535, 'soft': 65535}] container = client.containers.run( "techempower/%s" % database, name="tfb-database", network=benchmarker_config.network, network_mode=benchmarker_config.network_mode, detach=True, ulimits=ulimit, sysctls=sysctl) # Sleep until the database accepts connections slept = 0 max_sleep = 60 database_ready = False while not database_ready and slept < max_sleep: time.sleep(1) slept += 1 database_ready = test_database(benchmarker_config, database) if not database_ready: log("Database was not ready after startup", prefix=log_prefix) return container
def check(): # autoscale function d = {} sl = zk.get_children("/worker/slave") for i in sl: nm = "/worker/slave/" + i data, stat = zk.get(nm) data = data.decode("utf-8") ind = data.find('PID') pid = data[ind + 5:len(data) + 1] pid = int(pid) ind = data.find('CID') cid = data[ind + 6:ind + 18] d[pid] = cid c = d.keys() print("DICT: ", d) l = [] for i in c: l.append(i) print("LIST: ", l) l.sort() l.reverse() current_slaves = len(l) global count if (count == 0): needed_slaves = 1 elif (count % 20 == 0): needed_slaves = count // 20 else: needed_slaves = count // 20 + 1 todo = needed_slaves - current_slaves print("-----------SLAVES WE HAVE------------------", current_slaves) print("-----------TOTAL SLAVES WE NEED------------", needed_slaves) print("-----------TODO----------------------------", todo) if (todo > 0): # spawn todo number of slaves i = 0 ms = "/worker/master" data, stat = zk.get(ms) data = data.decode("utf-8") ind = data.find('PID') pid = data[ind + 5:len(data) + 1] masterdb = str(pid) + ".db" while (i < todo): client = docker.from_env() print("VALUE OF I", i) new_container = client.containers.create( image="zook_worker:latest", command="python /code/worker.py", volumes={ '/var/run/docker.sock': { 'bind': '/var/run/docker.sock', 'mode': 'rw' }, '/home/ubuntu/one/cloud/stage4/zook': { 'bind': '/code', 'mode': 'rw' } }, network="zook_default", detach=True) print("Trying to start a new container") new_container.start() # print(new_container.logs()) print("--------NEW CONTAINER--------", new_container) # copy to new container db from master db new_cid = new_container.id client2 = docker.APIClient() new_pid = client2.inspect_container(new_cid)['State']['Pid'] print("-----new container pid-----", new_pid) cmd = "cp " + masterdb + " " + str(new_pid) + ".db" res = os.system(cmd) i = i + 1 elif (todo < 0): # kill todo number of slaves todo = -todo i = 0 while (i < todo): pid = l[i] cid = d[pid] zk.set("/worker/slave/slave" + str(pid), b"dummy data") res = os.system("rm " + str(pid) + ".db") zk.delete("/worker/slave/slave" + str(pid), version=-1, recursive=False) client = docker.from_env() container = client.containers.get(cid) container.kill() i = i + 1 count = 0 # check for scaling every 120 seconds timer = threading.Timer(120.0, check) timer.start()
def __init__(self): self.client = docker.from_env() self.buildclient = docker.APIClient(base_url='unix://var/run/docker.sock')
def _get_docker_client(self) -> "docker.APIClient": # 'import docker' is expensive time-wise, we should do this just-in-time to keep # the 'import prefect' time low import docker return docker.APIClient(base_url=self.base_url, version="auto")
def __init__(self, endpoint, name=None, dry_run=False, **config): self.dry_run = dry_run self.name = name if name else endpoint self.cli = docker.APIClient(base_url=endpoint)
def setUpClass(cls): cls._client = docker.from_env() cls._low_level_client = docker.APIClient()
def deploy_ebotserver(ebot_ip, tls_config: any, topo: IO) -> None: """ Deploy ebot (ebot web and his DB) on a physical server. :param ebot_ip: IP of the server on which ebot should be deployed :param tls_config: TLS configuration to use :param topo: File descriptor to the topology file """ client = docker.APIClient(base_url="tcp://{}:2376".format(ebot_ip), tls=tls_config) db_container = client.create_container( "mysql:5.7", detach=True, host_config=client.create_host_config( restart_policy={"Name": "always"}, mounts=[ docker.types.Mount(target="/var/lib/mysql", source="ebot_mysql", type="volume") ], network_mode="host", ), environment={ "MYSQL_DATABASE": "ebotv3", "MYSQL_USER": "******", "MYSQL_PASSWORD": "******", "MYSQL_ROOT_PASSWORD": "******", }, command="mysqld", name="db_container", ) topo.write("db_container;{};{}\n".format(ebot_ip, client.base_url)) ebot_container = client.create_container( "hsfactory/ebot", detach=True, hostname="ebot", host_config=client.create_host_config( restart_policy={"Name": "always"}, extra_hosts={ "mysql": ebot_ip, "ebot": ebot_ip }, mounts=[ docker.types.Mount("/ebot/logs", "ebot_logs", type="volume"), docker.types.Mount("/ebot/demos", "ebot_demo", type="volume"), ], network_mode="host", ), environment={ "EXTERNAL_IP": ebot_ip, "MYSQL_HOST": "mysql", "MYSQL_PORT": "3306", "MYSQL_DB": "ebotv3", "MYSQL_USER": "******", "MYSQL_PASS": "******", "LO3_METHOD": "restart", "KO3_METHOD": "restart", "DEMO_DOWNLOAD": "true", "REMIND_RECORD": "false", "DAMAGE_REPORT": "true", "DELAY_READY": "false", "NODE_STARTUP_METHOD": "node", "TOORNAMENT_PLUGIN_KEY": "", }, name="ebot_container", ) topo.write("ebot_container;{};{}\n".format(ebot_ip, client.base_url)) ebotweb_container = client.create_container( "hsfactory/ebotweb", detach=True, host_config=client.create_host_config( restart_policy={"Name": "always"}, extra_hosts={ "mysql": ebot_ip, "ebot": ebot_ip }, mounts=[ docker.types.Mount("/opt/ebot/logs", "ebot_logs", type="volume"), docker.types.Mount("/opt/ebot/demos", "ebot_demo", type="volume"), ], network_mode="host", ), environment={ "EBOT_IP": ebot_ip, "EBOT_PORT": "12360", "EBOT_ADMIN_USER": "******", "EBOT_ADMIN_PASS": "******", "EBOT_ADMIN_MAIL": "insalade@ebot", "MYSQL_HOST": "mysql", "MYSQL_PORT": "3306", "MYSQL_DB": "ebotv3", "MYSQL_USER": "******", "MYSQL_PASS": "******", "DEMO_DOWNLOAD": "true", "DEFAULT_RULES": "esl5on5", "TOORNAMENT_ID": "", "TOORNAMENT_SECRET": "", "TOORNAMENT_PLUGIN_KEY": "", "TOORNAMENT_API_KEY": "", }, name="ebotweb_container", ) topo.write("ebotweb_container;{};{}\n".format(ebot_ip, client.base_url)) client.start(db_container) time.sleep(10) client.start(ebot_container) time.sleep(10) client.start(ebotweb_container)
def docker_api_client(): """ docker APIClient object - needed to inspect containers """ return docker.APIClient()
#!/usr/bin/env python import docker import time import traceback client = docker.from_env() cmd = 'bash -c "for i in `seq 1 10`; do echo $i >> /tmp/foo.txt && sleep 1; done"' container = client.containers.create( 'bluesky', cmd, name="foo") try: container.start() api_client = docker.APIClient() while True: print('status: {}'.format(container.status)) try: api_client.top(container.id) except docker.errors.APIError as e: print("no longer running") break e = api_client.exec_create(container.id, 'tail -1 /tmp/foo.txt') r = api_client.exec_start(e['Id']) print(r) time.sleep(2) # for l in api_client.logs(container.id, stream=True): # print(l) #api_client.wait(container.id) except Exception as e:
def deploy_csgoserver(nb_csgo: int, servers: List[Dict[str, str]], ebot_ip: str, image: str, tls_config: any, topo: IO) -> None: """ Deploy csgo containers over physical servers. :param nb_csgo: Number of container to deploy :param servers: List of physical servers on which the containers will be deployed :param ebot_ip: IP address of ebot (#FIXME confirm with original author) :param image: Name of the docker image to deploy :param tls_config: TLS configuration to use :param topo: File descriptor to the topology file """ ip = ipaddress.ip_address(ebot_ip) hostport = 27015 clientport = hostport + nb_csgo stvport = clientport + nb_csgo hostname = "csgoinsalan" for y in range(0, len(servers)): for i in range( int(ceil(nb_csgo / len(servers)) * y), int(ceil(nb_csgo / len(servers)) * (y + 1)), ): ip = ipaddress.ip_address(ip + 1) client = docker.APIClient(base_url="tcp://{}:2376".format( servers[y]), tls=tls_config) container = client.create_container( image, detach=True, hostname=hostname, host_config=client.create_host_config( extra_hosts={hostname: servers[y]}, restart_policy={"Name": "always"}, network_mode="host", ), environment={ "IP": "{}".format(servers[y]), "CSGO_HOSTNAME": "csgo-server-{}".format(i), "CSGO_PASSWORD": "", "RCON_PASSWORD": "******", "STEAM_ACCOUNT_TOKEN": config["csgo"]["tokens"][i] if len(config["csgo"]["tokens"]) > i else "", # FIXME : check before anything instead of failing midway "HOST_PORT": str(hostport + i), "CLIENT_PORT": str(clientport + i), "STV_PORT": str(stvport + i), }, name="csgo-servers-{}".format(i), ) client.start(container) topo.write("csgo-servers-{};{};{}\n".format( i, str(ip), client.base_url))
def new_docker_api_client_creator(): return docker.APIClient(base_url=docker_url)
def dc(self): if self._dc is not None: return self._dc docker_kwargs = self.docker_kwargs.copy() self._dc = docker.APIClient(version='auto', **docker_kwargs) return self._dc
def docker() -> pydocker.APIClient: return pydocker.APIClient(base_url="unix://var/run/docker.sock", version="auto")
def __init__(self): self.client = docker.APIClient(base_url = 'unix://var/run/docker.sock')
def __init__(self): self.cli = docker.APIClient(base_url='unix://var/run/docker.sock', version="auto")
def _build_image(self, push: bool = True) -> tuple: """ Build a Docker image using the docker python library. Args: - push (bool, optional): Whether or not to push the built Docker image, this requires the `registry_url` to be set Returns: - tuple: generated UUID strings `image_name`, `image_tag` Raises: - InterruptedError: if either pushing or pulling the image fails """ assert isinstance(self.image_name, str), "Image name must be provided" assert isinstance(self.image_tag, str), "An image tag must be provided" # Make temporary directory to hold serialized flow, healthcheck script, and dockerfile with tempfile.TemporaryDirectory() as tempdir: # Build the dockerfile if self.base_image and not self.local_image: self.pull_image() self.create_dockerfile_object(directory=tempdir) client = docker.APIClient(base_url=self.base_url, version="auto") # Verify that a registry url has been provided for images that should be pushed if self.registry_url: full_name = str( PurePosixPath(self.registry_url, self.image_name)) elif push is True: warnings.warn( "This Docker storage object has no `registry_url`, and will not be pushed.", UserWarning, ) full_name = self.image_name else: full_name = self.image_name # Use the docker client to build the image logging.info("Building the flow's Docker storage...") output = client.build( path=tempdir, tag="{}:{}".format(full_name, self.image_tag), forcerm=True, ) self._parse_generator_output(output) if len(client.images(name=full_name)) == 0: raise SerializationError( "Your flow failed one of its deployment health checks! Please ensure that all necessary files and dependencies have been included." ) # Push the image if requested if push and self.registry_url: self.push_image(full_name, self.image_tag) # Remove the image locally after being pushed client.remove_image(image="{}:{}".format( full_name, self.image_tag), force=True) return self.image_name, self.image_tag
def __init__(self, kard, *args, **kwargs): self.docker = docker.APIClient(*args, version='auto', **kwargs) self.kard = kard
parser.add_argument('--debug', action='store_true', help='Debug switch.', default=False) args = parser.parse_args(argv[1:]) return args if __name__ == '__main__': args = parse_args(sys.argv) dockerclient = docker.from_env() if (args.debug): print(str(dockerclient.containers.list())) print("etherpad container : " + str(dockerclient.containers.list(filters={"name": args.name}))) contether = dockerclient.containers.list(filters={"name": args.name})[0] try: APIK = contether.exec_run(cmd="cat /opt/etherpad-lite/APIKEY.txt") APIKEY = APIK.output c = docker.APIClient() IP = c.inspect_container( contether.id)['NetworkSettings']['Networks']['bridge']['IPAddress'] print('APIKEY: ' + str(APIKEY) + ' IP: ' + IP) except Exception as err: traceback.print_exc(file=sys.stderr) print("Error APIKEY or IP : %s" % (err))
def get_ip_address(container_id): return docker.APIClient().inspect_container( container_id)['NetworkSettings']['Networks']['bridge']['IPAddress']
# https://github.com/TomasTomecek/sen/blob/master/sen/util.py#L158 def calculate_cpu_percent(d): cpu_count = len(d["cpu_stats"]["cpu_usage"]["percpu_usage"]) cpu_percent = 0.0 cpu_delta = float(d["cpu_stats"]["cpu_usage"]["total_usage"]) - \ float(d["precpu_stats"]["cpu_usage"]["total_usage"]) system_delta = float(d["cpu_stats"]["system_cpu_usage"]) - \ float(d["precpu_stats"]["system_cpu_usage"]) if system_delta > 0.0: cpu_percent = cpu_delta / system_delta * 100.0 * cpu_count return cpu_percent client = docker.APIClient(base_url='unix://var/run/docker.sock') # client = docker.Client(base_url='unix://var/run/docker.sock') while(1): # ids = map(itemgetter('Id'), client.containers(quiet=True)) # container_info = map(itemgetter('Names'), client_on_.containers(quiet=True)) info_on_container = client.containers() # print(client.containers()) # print("\n\n\n") # time.sleep(3) # continue # print(containers) # print ("ids:", dir(ids)) # print(inspect.getmembers(ids)) # print(ids.names)
def docker_container_with_neo4j(self,neo4jImportCmd,port,volume,ns,archivalName): print("INFO::Creating Docker Instance") responseTemplate = self.getResponseTemplate() dataTransferMetadata = self.config.get('dynamicTemplate', {}).get('dataTransferMetadata', None) mq_response={} try: cli= docker.APIClient(base_url='tcp://'+self.config.get('dockerHost','')+':'+ str(self.config.get('dockerPort',''))) #for line in cli.pull(self.config.get('dockerImageName','')+':'+self.config.get('dockerImageTag',''),auth_config=docker_auth, stream=True): # print(json.dumps(json.loads(line), indent=4 )) #####creating conatiner and importing csv ###### container_id = cli.create_container(self.config.get('dockerImageName','')+':'+self.config.get('dockerImageTag',''), 'ls', ports=self.config.get('dynamicTemplate', {}).get('bindPort',''), volumes=self.config.get('dynamicTemplate', {}).get('mountVolume',''),environment=['COMMAND_HERE='+neo4jImportCmd],host_config=cli.create_host_config(port_bindings=port,binds=volume)) response = cli.start(container=container_id.get('Id')) #print(response) #print(container_id) #####getting status of spawned container##### container_details=cli.inspect_container(container_id["Id"]) mq_response["sourceUrl"]="http://"+container_details["NetworkSettings"]["Ports"][str(self.config.get('dynamicTemplate', {}).get('bindPort','')[0])+"/tcp"][0]["HostIp"]+":"+container_details["NetworkSettings"]["Ports"][str(self.config.get('dynamicTemplate', {}).get('bindPort','')[0])+"/tcp"][0]["HostPort"] mq_response["archivalName"]=archivalName if container_details["State"]["Status"]=="running": mq_response["status"]="Success" ''' #####getting list of conatiners and its details##### containers_list=cli.containers() print("***************************") for x in range(len(containers_list)): print("Id : "+containers_list[x]["Id"]) print("Status : "+containers_list[x]["State"]) print("Ports : "+str(containers_list[x]["Ports"])) print("Mounts : "+str(containers_list[x]["Mounts"])) print("***************") #####executing command inside a conatiner##### ''' time.sleep(70) print("INFO::DockerContainer Creation Success..."+mq_response["sourceUrl"]) #print(container_id["Id"]) exec_id=cli.exec_create(container_id["Id"],cmd=['/bin/bash','-c','cd neo4j-Insights && bin/cypher-shell -a bolt://localhost:7687 -u neo4j -p C0gnizant@1 "match(n) return count (n)"']) #print(exec_id) result=cli.exec_start(exec_id["Id"]) print("INFO::Cypher Test Exec Result::"+str(result)) if "Connection refused" in str(result): raise Exception('Docker ERROR::Cyhper Test Exec. Connection refused') nodecount=str(result).split("\\n")[1] if(int(nodecount)!=ns.totalNode): mq_response["message"]="Mismatched node count : Expected Node Count = "+str(ns.totalNode)+ "Imported Node Count = "+str(nodecount) #print('****************MQ RESPONSE****************') #print(mq_response) parsedData = self.parseResponse(responseTemplate, mq_response, {}) #print(parsedData) print("ERROR::Cont Mismatch. Neo4j Import Error(s)") self.publishToolsData(parsedData) else: mq_response["message"]="Node Count = "+str(nodecount) #print('****************MQ RESPONSE****************') #print(mq_response) print("Docker Container Succesfully started") data=list() data += self.parseResponse(responseTemplate, mq_response) #print(data) metadata = { "dataUpdateSupported" : False } self.publishToolsData(data,metadata) ##comment these except Exception as ex: logging.error(ex) print("ERROR::Docker Exception") print(ex) #ex= archivalName+" \n Docker Exception--->"+ str(ex) additionalProperties = {'archivalName':archivalName} self.publishHealthDataForExceptions(ex, additionalProperties=additionalProperties)
def connect_api(url): return docker.APIClient(base_url=url)
def import_base_image(self, base_image_file_name): docker.APIClient().import_image(src=base_image_file_name, repository=self.base_image_file_name)
import time import docker from evality import Evality def main(evality): cmd = "print('hello world')" while cmd != "!q": t0 = time.time() res = evality.run_cmd(cmd) t1 = time.time() print(res, f"{t1 - t0} seconds") cmd = input("cmd> ") evality.quit() if __name__ == "__main__": docker_client = docker.from_env() api_client = docker.APIClient(base_url="unix://var/run/docker.sock") evality = Evality(docker_client, api_client) main(evality)
def make_image(image, redis_key): try: Redis.lpush(redis_key, 'start build image %s......' % image) _flow_log('start build image %s......' % image) project = image.split('/')[-1].split(':')[0] dockerfile = "%s/%s" % (dockerfile_path, project) if os.path.exists(dockerfile): try: client = docker.APIClient(base_url=docker_base_url) response = [ line for line in client.build( path=dockerfile, rm=True, tag=image) ] result = eval(response[-1]) if 'Successfully' in str(result): Redis.lpush(redis_key, "docker build %s success!" % image) _flow_log("docker build %s success!" % image) else: Redis.lpush(redis_key, 'fail:%s' % result) _flow_log('fail:%s' % result) return False except Exception as e: logging.error(e) if 'BaseException' not in str(e): Redis.lpush(redis_key, 'fail:%s' % e) _flow_log('fail:%s' % e) else: try: Files = tools.get_k8s_packages() response = [ line for line in client.push(image, stream=True, auth_config={ 'username': docker_user, 'password': docker_password }) ] result = eval(response[-1])['aux']['Tag'] version = image.split(':')[-1] if version == result: #删除代码包 for file in os.listdir(dockerfile): if Files[project].split('.')[0] in file: try: os.remove('%s/%s' % (dockerfile, file)) except: shutil.rmtree('%s/%s' % (dockerfile, file)) Redis.lpush(redis_key, "docker push %s success!" % image) _flow_log("docker push %s success!" % image) return True else: Redis.lpush(redis_key, 'fail:%s' % result) _flow_log('fail:%s' % result) return False except Exception as e: logging.error(e) Redis.lpush(redis_key, 'fail:%s' % e) _flow_log('fail:%s' % e) return False else: Redis.lpush(redis_key, 'dockerfile %s path not exists!' % dockerfile, 'fail') _flow_log('dockerfile %s path not exists!' % dockerfile) return False except Exception as e: logging.error(e) if 'BaseException' not in str(e): Redis.lpush(redis_key, 'fail:%s' % e) _flow_log('fail:%s' % e) return False
def __build_dependencies(benchmarker_config, test, docker_buildargs, build_log_dir=os.devnull): ''' Builds all the dependency docker images for the given test. Does not build the test docker image. ''' dependencies = OrderedSet( list( reversed( __gather_dependencies( benchmarker_config, os.path.join(test.directory, "%s.dockerfile" % test.name))))) docker_dir = os.path.join(benchmarker_config.fwroot, "toolset", "setup", "docker") for dep in dependencies: log_prefix = dep + ": " pulled = False # Do not pull techempower/ images if we are building specifically if not benchmarker_config.build and 'techempower/' not in dep: client = docker.DockerClient( base_url=benchmarker_config.server_docker_host) try: # If we have it, use it client.images.get(dep) pulled = True log("Found published image; skipping build", prefix=log_prefix) except: # Pull the dependency image try: log("Attempting docker pull for image (this can take some time)", prefix=log_prefix) client.images.pull(dep) pulled = True log("Found published image; skipping build", prefix=log_prefix) except: log("Docker pull failed; %s could not be found; terminating" % dep, prefix=log_prefix, color=Fore.RED) return 1 if not pulled: dep_ref = dep.strip().split(':')[0].strip() dependency = dep_ref.split('/')[1] build_log_file = build_log_dir if build_log_dir is not os.devnull: build_log_file = os.path.join(build_log_dir, "%s.log" % dependency.lower()) with open(build_log_file, 'w') as build_log: docker_file = os.path.join(test.directory, dependency + ".dockerfile") if not docker_file or not os.path.exists(docker_file): docker_file = find(docker_dir, dependency + ".dockerfile") if not docker_file: log("Docker build failed; %s could not be found; terminating" % (dependency + ".dockerfile"), prefix=log_prefix, file=build_log, color=Fore.RED) return 1 # Build the dependency image try: for line in docker.APIClient( base_url=benchmarker_config.server_docker_host ).build(path=os.path.dirname(docker_file), dockerfile="%s.dockerfile" % dependency, tag=dep, buildargs=docker_buildargs, forcerm=True): if line.startswith('{"stream":'): line = json.loads(line) line = line[line.keys()[0]].encode('utf-8') log(line, prefix=log_prefix, file=build_log, color=Fore.WHITE + Style.BRIGHT \ if re.match(r'^Step \d+\/\d+', line) else '') except Exception: tb = traceback.format_exc() log("Docker dependency build failed; terminating", prefix=log_prefix, file=build_log, color=Fore.RED) log(tb, prefix=log_prefix, file=build_log) return 1
def __init__(self, token: str, channel: int): self._channel = channel self._x_data = [] self._bot = telegram.Bot(token=token) self._docker = docker.APIClient()
'draft': { 'mergeable': False, 'desc': 'the merge is blocked due to the pull request being a draft' }, 'has_hooks': { 'mergeable': True, 'desc': 'mergeable with passing commit status and pre-receive hooks' }, 'unknown': { 'mergeable': True, 'desc': 'the state cannot currently be determined' }, 'unstable': { 'mergeable': True, 'desc': 'mergeable with non-passing commit status' } } DOCKER_DIST_MANIFESTS = { 'v1': 'application/vnd.docker.distribution.manifest.v1+json', 'v2': 'application/vnd.docker.distribution.manifest.v2+json' } DOCKER_DIST_MANIFEST_LIST = 'application/vnd.docker.distribution.manifest.list.v2+json' docker_rwlock = fasteners.InterProcessReaderWriterLock(docker_pushpull_rwlock) docker_api = docker.APIClient(base_url=docker_daemon_socket) # Get the labels of a local docker image, raise exception # if image doesn't exist or has invalid labels. def get_local_image_labels(host_name, user_name, image_name, image_tag, image_labels): image_full = ((host_name + '/' if host_name else '') + (user_name + '/' if user_name else '') + image_name + ':' + image_tag) info = docker_api.inspect_image(image_full) logging.info('local image %s labels: %s', image_full, info['Config']['Labels']) labels = info['Config']['Labels'] if (labels): labels_ok = True for label in image_labels: if not labels[label]: labels_ok = False
def run_container_with_docker(image, command, name=None, environment={}, mounts=[], log_file=None, auto_remove=False, client_id=None, client_access_token=None, client_refresh_token=None, actor_id=None, tenant=None, api_server=None, client_secret=None): """ Run a container with docker mounted in it. Note: this function always mounts the abaco conf file so it should not be used by execute_actor(). """ logger.debug("top of run_container_with_docker().") cli = docker.APIClient(base_url=dd, version="auto") # bind the docker socket as r/w since this container gets docker. volumes = ['/var/run/docker.sock'] binds = { '/var/run/docker.sock': { 'bind': '/var/run/docker.sock', 'ro': False } } # add a bind key and dictionary as well as a volume for each mount for m in mounts: binds[m.get('host_path')] = { 'bind': m.get('container_path'), 'ro': m.get('format') == 'ro' } volumes.append(m.get('host_path')) # mount the abaco conf file. first we look for the environment variable, falling back to the value in Config. try: abaco_conf_host_path = os.environ.get('abaco_conf_host_path') if not abaco_conf_host_path: abaco_conf_host_path = Config.get('spawner', 'abaco_conf_host_path') logger.debug("docker_utils using abaco_conf_host_path={}".format( abaco_conf_host_path)) # mount config file at the root of the container as r/o volumes.append('/service.conf') binds[abaco_conf_host_path] = {'bind': '/service.conf', 'ro': True} except configparser.NoOptionError as e: # if we're here, it's bad. we don't have a config file. better to cut and run, msg = "Did not find the abaco_conf_host_path in Config. Exception: {}".format( e) logger.error(msg) raise DockerError(msg) # also add it to the environment if not already there if 'abaco_conf_host_path' not in environment: environment['abaco_conf_host_path'] = abaco_conf_host_path if 'client_id' not in environment: environment['client_id'] = client_id if 'client_access_token' not in environment: environment['client_access_token'] = client_access_token if 'actor_id' not in environment: environment['actor_id'] = actor_id if 'tenant' not in environment: environment['tenant'] = tenant if 'api_server' not in environment: environment['api_server'] = api_server if 'client_secret' not in environment: environment['client_secret'] = client_secret if 'client_refresh_token' not in environment: environment['client_refresh_token'] = client_refresh_token # if not passed, determine what log file to use if not log_file: if get_log_file_strategy() == 'split': log_file = 'worker.log' else: log_file = 'abaco.log' # mount the logs file. volumes.append('/var/log/service.log') # first check to see if the logs directory config was set: try: logs_host_dir = Config.get('logs', 'host_dir') except (configparser.NoSectionError, configparser.NoOptionError): # if the directory is not configured, default it to abaco_conf_host_path logs_host_dir = os.path.dirname(abaco_conf_host_path) binds['{}/{}'.format(logs_host_dir, log_file)] = { 'bind': '/var/log/service.log', 'rw': True } host_config = cli.create_host_config(binds=binds, auto_remove=auto_remove) logger.debug("binds: {}".format(binds)) # add the container to a specific docker network, if configured netconf = None try: docker_network = Config.get('spawner', 'docker_network') except Exception: docker_network = None if docker_network: netconf = cli.create_networking_config( {docker_network: cli.create_endpoint_config()}) # create and start the container try: container = cli.create_container(image=image, environment=environment, volumes=volumes, host_config=host_config, command=command, name=name, networking_config=netconf) cli.start(container=container.get('Id')) logger.debug('container successfully started') except Exception as e: msg = "Got exception trying to run container from image: {}. Exception: {}".format( image, e) logger.info(msg) raise DockerError(msg) logger.info("container started successfully: {}".format(container)) return container