Beispiel #1
0
def api_image_run(sid: uuid.UUID, port: int):
    user: User = xtoken_user(AuthAPI.getXToken())

    tag = randomString(15)
    db: wrappers.Collection = mongo.db.images
    image: Image = deserialize_json(
        Image, db.find_one({
            "uid": user.uuid,
            "uuid": str(sid)
        }))

    try:
        container_id = DockerImageAPI.run(image.tag, "", port)
        container_uuid = str(uuid.uuid4())
        container = Container(user.uuid, tag, "start", str(sid), port,
                              container_id, container_uuid)
        db: wrappers.Collection = mongo.db.containers
        db.insert_one(container.__dict__)

        DockerContainerAPI.start(container)
        docker_daemon.notify(container_id)

        return json_result(0, "image run")
    except:
        return json_result(-1, "image not found")
Beispiel #2
0
def docker_run(sid: uuid.UUID):
    if request.method != "POST":
        return json_result(-1, "POST only")

    sid = str(sid)
    tag = request.form["tag"]
    sshport = int(request.form["sshport"])
    uid = session.get("uuid")

    db: wrappers.Collection = mongo.db.images
    image: Image = deserialize_json(Image,
                                    db.find_one({
                                        "uid": uid,
                                        "uuid": sid
                                    }))
    if not image:
        return json_result(-1, "Docker::Images::rmi failed")

    container_id = DockerImageAPI.run(tag, "",
                                      sshport)  # image.run(tag, port=sshport)
    container_uuid = str(uuid.uuid4())
    container = Container(uid, tag, "start", sid, sshport, container_id,
                          container_uuid)
    db: wrappers.Collection = mongo.db.containers
    db.insert_one(container.__dict__)

    DockerContainerAPI.start(container)
    docker_daemon.notify(container_id)

    return json_result(0, "Successfully run")
Beispiel #3
0
def configure():
    global status, models, containers, config

    logging.info("configuration started...")

    # read data
    data = request.get_json()
    config = ContainersManagerConfiguration(json_data=data)
    logging.info("configuration: %s", str(config.__dict__))

    # build models and containers list
    # models
    if config.models:
        logging.info("Found %d models from configuration", len(config.models))
        for model in config.models:
            m = Model(name=model["name"],
                      version=model["version"],
                      sla=model["sla"],
                      alpha=model["alpha"],
                      tfs_model_url=model["tfs_model_url"],
                      initial_replicas=model["initial_replicas"])
            if "profiled_rt" in model:
                m.profiled_rt = model["profiled_rt"]
            models.append(m)

    logging.info("+ %d models", len(models))

    # containers
    if config.containers:
        logging.info("Found %d containers from configuration",
                     len(config.containers))
        for container in config.containers:
            containers.append(
                Container(container["model"], container["version"],
                          container["active"], container["container"],
                          container["node"], container["port"],
                          container["device"], container["quota"]))
    logging.info(
        "+ %d CPU containers",
        len(list(filter(lambda m: m.device == Device.CPU, containers))))
    logging.info(
        "+ %d GPU containers",
        len(list(filter(lambda m: m.device == Device.GPU, containers))))
    logging.info([container.to_json() for container in containers])

    status = "configured"
    logging.info(status)

    return {"result": "ok"}, 200
Beispiel #4
0
def start_requests_store():
    global status, config, models, containers

    # get models information
    models = [Model(json_data=json_model) for json_model in get_data(config.models_endpoint)]
    logging.info("Models: %s", [model.to_json() for model in models])

    # get containers information
    containers = [Container(json_data=json_container) for json_container in get_data(config.containers_endpoint)]
    logging.info("Containers: %s", [container.to_json() for container in containers])

    status = "active"
    logging.info(status)

    return {"result": "ok"}, 200
    def init(self):
        # get the models
        self.models = {
            json_model["name"]: Model(json_data=json_model)
            for json_model in self.get_data(self.models_endpoint)
        }
        log_str = "Loaded " + str(len(self.models)) + " models: " + str(
            [model.name for model in self.models.values()])
        self.logs.append({
            "ts": time.time(),
            "date": str(datetime.datetime.now()),
            "msg": log_str
        })

        # get the containers
        self.containers = [
            Container(json_data=json_container)
            for json_container in self.get_data(self.containers_endpoint)
        ]

        # group containers by nodes
        self.nodes = set(map(lambda c: c.node, self.containers))
        self.containers_on_node = {}
        for node in self.nodes:
            self.containers_on_node[node] = list(
                filter(lambda c: c.node == node, self.containers))
        log_str = "Containers by node: " + str([{
            node: [c.to_json() for c in self.containers_on_node[node]]
        } for node in self.containers_on_node])
        self.logs.append({
            "ts": time.time(),
            "date": str(datetime.datetime.now()),
            "msg": log_str
        })

        # init controllers
        self.controllers = []
        t = time.time()
        for container in list(
                filter(lambda c: c.device == Device.CPU and c.active,
                       self.containers)):
            c = Controller(container)
            c.next_action = t
            self.controllers.append(c)
Beispiel #6
0
def admin_containers():
    containers = docker_api.container.getContainers()
    result = []
    for container in containers:
        rcontainer: Container = Container()
        rcontainer.uid = "system"
        rcontainer.uuid = "system"
        rcontainer.short_id = container["Id"]
        rcontainer.tag = container["Image"]
        rcontainer.status = container["State"]
        r: Container = docker_api.container.find_by_shortid(
            rcontainer.short_id)
        if r != None:
            rcontainer.uid = r.uid
            rcontainer.uuid = r.uuid
            rcontainer.status = r.status
        result += [rcontainer]

    return render_template("/admin/container.html", containers=result)
Beispiel #7
0
def read_config_file(config_file):
    """
    Read the configuration file and init the containers variable
    """
    with open(config_file, 'r') as file:
        data = file.read()
        config = yaml.load(data, Loader=yaml.FullLoader)

        # models
        if config["models"]:
            logging.info("Found %d models", len(config["models"]))

            for model in config["models"]:
                if "profiled_rt" in model:
                    models.append(
                        Model(model["name"], model["version"], model["sla"],
                              model["alpha"], model["profiled_rt"]))
                else:
                    models.append(
                        Model(model["name"], model["version"], model["sla"],
                              model["alpha"]))

        logging.info("+ %d models", len(models))

        # containers
        if config["containers"]:
            logging.info("Found %d containers", len(config["containers"]))

            for container in config["containers"]:
                containers.append(
                    Container(container["model"], container["version"],
                              container["active"], container["container"],
                              container["node"], container["port"],
                              container["device"], container["quota"]))
        logging.info(
            "+ %d CPU containers",
            len(list(filter(lambda m: m.device == Device.CPU, containers))))
        logging.info(
            "+ %d GPU containers",
            len(list(filter(lambda m: m.device == Device.GPU, containers))))
        logging.info([container.to_json() for container in containers])
Beispiel #8
0
def docker_build():
    """
    GET
    :param uid: user uuid

    POST
    :param tag: docker tag
    :parma dockfile: dockerfile uuid
    :param rootpass: root password for ssh
    :param sshport: ssh port forwarding

    build Dockerfile
    """
    if request.method != "POST":
        return json_result(-1, "POST only")

    uid = session.get("uuid")
    username = session.get("username")
    tag = request.form["tag"]
    dockfile = request.form["dockfile"]
    rootpass = request.form["rootpass"]
    sshport = int(request.form["sshport"])

    fn = "upload/{}/{}/Dockerfile".format(username, dockfile)
    with open(fn, "r") as f:
        df = f.read()

    name = tag.split(":")[0]
    ver = "latest"
    if len(tag.split(":")) == 1:
        ver = tag.split(":")[1]
    tag = randomString(20 - len(name)) + name + ":" + ver

    image_uuid = str(uuid.uuid4())
    container_uuid = str(uuid.uuid4())

    image = Image(uid, "", tag, "installing", sshport, "", image_uuid)
    db: wrappers.Collection = mongo.db.images
    db.insert_one(image.__dict__)

    # search Dockerfile
    df: wrappers.Collection = mongo.db.dockerfile
    result: Dockerfile = deserialize_json(Dockerfile,
                                          df.find_one({"uuid": dockfile}))
    if result == None:
        return json_result(-1, "Dockerfile is not exist")

    try:
        # image build
        image.status = "build"
        db.update({"uuid": image_uuid}, image.__dict__)
        result, imgs = DockerImageAPI.build(result.path, rootpass, tag)
        image.short_id = imgs[0]["Id"].split(":")[1]
        print(result)

        image.status = "done"
        db.update({"uuid": image_uuid}, image.__dict__)
    except:
        image.status = "fail"
        db.update({"uuid": image_uuid}, image.__dict__)
        return json_result(-1, "Dockerfile::Image::build fail")

    # container start
    container_id = DockerImageAPI.run(tag, "",
                                      sshport)  # image.run(tag, port=sshport)
    container = Container(uid, tag, "start", image_uuid, sshport, container_id,
                          container_uuid)
    container.start(container_id)
    db: wrappers.Collection = mongo.db.containers
    db.insert_one(container.__dict__)

    docker_daemon.notify(container_id)

    result_stream = []
    for item in result:
        try:
            result_stream += [item["stream"]]
        except:
            continue

    return json_result(0, "".join(result_stream))
Beispiel #9
0
def configure():
    global status, active, reqs_queues, config

    if not config:
        logging.info("reading config from file")
        if not read_config_from_file():
            logging.error("configuration reading error")
            return False
        else:
            logging.info("configuration read from file")

    logging.info("configuration read: " + str(config.__dict__))
    logging.info("Getting models from: %s", config.models_endpoint)
    logging.info("Getting containers from: %s", config.containers_endpoint)

    # init models
    models = [
        Model(json_data=json_model)
        for json_model in get_data(config.models_endpoint)
    ]
    if len(models) > 0:
        logging.info("Models: %s", [model.to_json() for model in models])
    else:
        logging.warning("No models found")

    # init containers
    containers = [
        Container(json_data=json_container)
        for json_container in get_data(config.containers_endpoint)
    ]
    if len(containers) > 0:
        logging.info("Containers: %s",
                     [container.to_json() for container in containers])
    else:
        logging.warning("No containers found")
    logging.info("Found %d models and %d containers", len(models),
                 len(containers))

    # init requests queues
    reqs_queues = {model.name: queue.Queue() for model in models}
    responses_list = {model.name: [] for model in models}

    # init policy
    queues_policies = QueuesPolicies(reqs_queues, responses_list, models,
                                     logging)
    gpu_policy = queues_policies.policies.get(config.gpu_queues_policy)
    cpu_policy = queues_policies.policies.get(config.cpu_queues_policy)
    logging.info("Policy for GPUs: %s", config.gpu_queues_policy)
    logging.info("Policy for CPUs: %s", config.cpu_queues_policy)

    # disable logging if verbose == 0
    logging.info("Verbose: %d", config.verbose)
    if config.verbose == 0:
        app.logger.disabled = True
        logging.getLogger('werkzeug').setLevel(logging.WARNING)

    # init dispatchers
    status = "Init dispatchers"
    logging.info(status)
    dispatcher_gpu = Dispatcher(app.logger, models, containers,
                                DispatchingPolicy.ROUND_ROBIN, Device.GPU)
    dispatcher_cpu = Dispatcher(app.logger, models, containers,
                                DispatchingPolicy.ROUND_ROBIN, Device.CPU)

    # start the send requests thread
    status = "Start send reqs thread"
    logging.info(status)
    log_consumer_threads_pool = ThreadPoolExecutor(
        max_workers=config.max_log_consumers)
    for i in range(config.max_log_consumers):
        log_consumer_threads_pool.submit(log_consumer)

    # start the queues consumer threads
    status = "Start queues consumer threads"
    logging.info(status)

    if list(filter(lambda c: c.device == Device.GPU and c.active, containers)):
        # threads that pools from the apps queues and dispatch to gpus
        polling_gpu_threads_pool = ThreadPoolExecutor(
            max_workers=config.max_polling_threads)
        for i in range(config.max_polling_threads):
            polling_gpu_threads_pool.submit(queues_pooling, dispatcher_gpu,
                                            gpu_policy,
                                            config.max_consumers_gpu)

    if list(filter(lambda c: c.device == Device.CPU and c.active, containers)):
        # threads that pools from the apps queues and dispatch to cpus
        pooling_cpu_threads_pool = ThreadPoolExecutor(
            max_workers=config.max_polling_threads)
        for i in range(config.max_polling_threads):
            pooling_cpu_threads_pool.submit(queues_pooling, dispatcher_cpu,
                                            cpu_policy,
                                            config.max_consumers_cpu)

    status = "active"
    active = True
    logging.info(status)
    return True
Beispiel #10
0
def create_app(
        containers_manager="http://localhost:5001",
        requests_store="http://localhost:5002",
        verbose=1,
        gpu_queues_policy=QueuesPolicy.HEURISTIC_1,
        cpu_queues_policy=QueuesPolicy.ROUND_ROBIN,
        max_log_consumers=1,
        max_polling=1,  # the number of threads waiting for requests
        max_consumers_cpu=100,
        max_consumers_gpu=100):  # the number of concurrent threads requests
    global reqs_queues, requests_store_host, status, gpu_policy, cpu_policy, responses_list
    requests_store_host = requests_store + "/requests"

    # init log
    coloredlogs.install(level='DEBUG', milliseconds=True)
    # log_format = "%(asctime)s:%(levelname)s:%(name)s: %(filename)s:%(lineno)d:%(message)s"
    # logging.basicConfig(level='DEBUG', format=log_format)

    # init models and containers
    status = "Init models and containers"
    logging.info(status)
    models_endpoint = containers_manager + "/models"
    containers_endpoint = containers_manager + "/containers"
    logging.info("Getting models from: %s", models_endpoint)
    logging.info("Getting containers from: %s", containers_endpoint)

    models = [
        Model(json_data=json_model) for json_model in get_data(models_endpoint)
    ]
    logging.info("Models: %s", [model.to_json() for model in models])
    containers = [
        Container(json_data=json_container)
        for json_container in get_data(containers_endpoint)
    ]
    logging.info("Containers: %s",
                 [container.to_json() for container in containers])
    logging.info("Found %d models and %d containers", len(models),
                 len(containers))

    # init reqs queues
    reqs_queues = {model.name: queue.Queue() for model in models}
    responses_list = {model.name: [] for model in models}

    # init policy
    queues_policies = QueuesPolicies(reqs_queues, responses_list, models,
                                     logging)
    gpu_policy = queues_policies.policies.get(gpu_queues_policy)
    cpu_policy = queues_policies.policies.get(cpu_queues_policy)
    logging.info("Policy for GPUs: %s", gpu_queues_policy)
    logging.info("Policy for CPUs: %s", cpu_queues_policy)

    # disable logging if verbose == 0
    logging.info("Verbose: %d", verbose)
    if verbose == 0:
        app.logger.disabled = True
        logging.getLogger('werkzeug').setLevel(logging.WARNING)

    # init dispatchers
    status = "Init dispatchers"
    logging.info(status)
    dispatcher_gpu = Dispatcher(app.logger, models, containers,
                                DispatchingPolicy.ROUND_ROBIN, Device.GPU)
    dispatcher_cpu = Dispatcher(app.logger, models, containers,
                                DispatchingPolicy.ROUND_ROBIN, Device.CPU)

    # start the send requests thread
    status = "Start send reqs thread"
    logging.info(status)
    log_consumer_threads_pool = ThreadPoolExecutor(
        max_workers=max_log_consumers)
    for i in range(max_log_consumers):
        log_consumer_threads_pool.submit(log_consumer)

    # start the queues consumer threads
    status = "Start queues consumer threads"
    logging.info(status)

    if list(filter(lambda c: c.device == Device.GPU and c.active, containers)):
        # threads that pools from the apps queues and dispatch to gpus
        polling_gpu_threads_pool = ThreadPoolExecutor(max_workers=max_polling)
        for i in range(max_polling):
            polling_gpu_threads_pool.submit(queues_pooling, dispatcher_gpu,
                                            gpu_policy, max_consumers_gpu)

    if list(filter(lambda c: c.device == Device.CPU and c.active, containers)):
        # threads that pools from the apps queues and dispatch to cpus
        pooling_cpu_threads_pool = ThreadPoolExecutor(max_workers=max_polling)
        for i in range(max_polling):
            pooling_cpu_threads_pool.submit(queues_pooling, dispatcher_cpu,
                                            cpu_policy, max_consumers_cpu)

    # start
    status = "Running"
    logging.info(status)
    return app
Beispiel #11
0
    status = "running"

    parser = argparse.ArgumentParser()
    parser.add_argument('--containers_manager', type=str, required=True)
    args = parser.parse_args()

    # init log
    log_format = "%(asctime)s:%(levelname)s:%(name)s:" \
                 "%(filename)s:%(lineno)d:%(message)s"
    logging.basicConfig(level='DEBUG', format=log_format)

    # get models information
    models_endpoint = args.containers_manager + "/models"
    logging.info("Getting models from: %s", models_endpoint)
    models = [
        Model(json_data=json_model) for json_model in get_data(models_endpoint)
    ]
    logging.info("Models: %s", [model.to_json() for model in models])

    # get containers information
    containers_endpoint = args.containers_manager + "/containers"
    logging.info("Getting containers from: %s", containers_endpoint)
    containers = [
        Container(json_data=json_container)
        for json_container in get_data(containers_endpoint)
    ]
    logging.info("Containers: %s",
                 [container.to_json() for container in containers])

    app.run(host='0.0.0.0', port=5002)
    def k8s_deployment_generator(k8s_config: K8sConfiguration):
        # add containers
        containers = []
        k8s_containers = []
        # add actuator container
        k8s_container = client.V1Container(
            name="nodemanager-actuator",
            image=k8s_config.actuator_image,
            ports=[
                client.V1ContainerPort(container_port=k8s_config.actuator_port)
            ],
            volume_mounts=[
                client.V1VolumeMount(name="docker-sock", mount_path="/var/run")
            ],
            image_pull_policy=k8s_config.k8s_image_pull_policy)
        k8s_containers.append(k8s_container)

        # add CPU containers
        base_port = 8501
        for i, model in enumerate(
                ConfigurationsGenerator.model_list(k8s_config.models)):
            container_name = "nodemanager-rest-cpu-" + str(i + 1)
            k8s_container = client.V1Container(
                name=container_name,
                image=k8s_config.tfs_image,
                args=[
                    "--model_config_file=" + k8s_config.tfs_config_file_name,
                    "--rest_api_port=" + str(base_port)
                ],
                ports=[client.V1ContainerPort(container_port=base_port)],
                volume_mounts=[
                    client.V1VolumeMount(name="shared-models",
                                         mount_path=k8s_config.tfs_models_path)
                ])
            k8s_containers.append(k8s_container)
            containers.append(
                Container(model=model.name,
                          version=model.version,
                          active=False,
                          container=container_name,
                          node=None,
                          port=base_port,
                          device=Device.CPU,
                          quota=None))
            base_port += 1

        # add GPU containers
        for gpu in range(k8s_config.available_gpus):
            container_name = "nodemanager-rest-gpu-" + str(gpu + 1)
            k8s_container = client.V1Container(
                name=container_name,
                image=k8s_config.tfs_image + "-gpu",
                args=[
                    "--model_config_file=" + k8s_config.tfs_config_file_name,
                    "--rest_api_port=" + str(base_port)
                ],
                ports=[client.V1ContainerPort(container_port=base_port)],
                volume_mounts=[
                    client.V1VolumeMount(name="shared-models",
                                         mount_path=k8s_config.tfs_models_path)
                ],
                env=[
                    client.V1EnvVar(name="NVIDIA_VISIBLE_DEVICES",
                                    value=str(gpu + 1))
                ])
            k8s_containers.append(k8s_container)
            containers.append(
                Container(model="all",
                          version=1,
                          active=False,
                          container=container_name,
                          node=None,
                          port=base_port,
                          device=Device.GPU,
                          quota=None))
            base_port += 1

        # add volumes
        volumes = [
            client.V1Volume(
                name="docker-sock",
                host_path=client.V1HostPathVolumeSource(path="/var/run")),
            client.V1Volume(name="shared-models",
                            empty_dir=client.V1EmptyDirVolumeSource())
        ]

        # set pod affinity
        affinity = client.V1Affinity(pod_anti_affinity=client.V1PodAffinity(
            required_during_scheduling_ignored_during_execution=[
                client.V1PodAffinityTerm(topology_key="kubernetes.io/hostname")
            ]))

        # init containers
        init_containers = []
        for i, model in enumerate(
                ConfigurationsGenerator.model_list(k8s_config.models)):
            container_name = "tfs-init-" + str(i + 1)
            init_containers.append(
                client.V1Container(
                    name=container_name,
                    image=k8s_config.tfs_init_image,
                    args=[
                        "-f", "/home/models/", "-d",
                        "/home/models/" + model.name, "-c",
                        k8s_config.tfs_config_endpoint, "-m",
                        model.tfs_model_url
                    ],
                    image_pull_policy=k8s_config.k8s_image_pull_policy,
                    volume_mounts=[
                        client.V1VolumeMount(
                            name="shared-models",
                            mount_path=k8s_config.tfs_models_path)
                    ]))

        # add pod spec
        pod_spec = client.V1PodSpec(containers=k8s_containers,
                                    volumes=volumes,
                                    affinity=affinity,
                                    init_containers=init_containers,
                                    host_network=k8s_config.k8s_host_network,
                                    dns_policy="Default")
        # add pod template spec
        pod_template_spec = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(labels={"run": "nodemanager"}),
            spec=pod_spec)
        # add deployment spec
        deployment_spec = client.V1DeploymentSpec(
            selector=client.V1LabelSelector(
                match_labels={"run": "nodemanager"}),
            template=pod_template_spec,
            replicas=k8s_config.initial_replicas)
        # build deployment
        deployment = client.V1Deployment(api_version="apps/v1",
                                         kind="Deployment",
                                         metadata=client.V1ObjectMeta(
                                             name="nodemanager-deploy",
                                             labels={"run": "nodemanager"}),
                                         spec=deployment_spec)

        return containers, deployment
Beispiel #13
0
 def __init__(self, name: str, element_set: ElementSet):
     self._element_set = element_set
     self._name = name
     self._container = Container(element_set)
     self._fill()