Example #1
0
    def __init__(self, num_workers, hosts, vineyard_socket, log_level,
                 timeout_seconds):
        super().__init__()
        self._num_workers = num_workers
        self._hosts = hosts
        self._vineyard_socket = vineyard_socket
        self._glog_level = parse_as_glog_level(log_level)
        self._timeout_seconds = timeout_seconds

        # analytical engine
        self._analytical_engine_process = None
Example #2
0
    def __init__(
        self,
        num_workers,
        hosts,
        etcd_addrs,
        vineyard_socket,
        shared_mem,
        log_level,
        instance_id,
        timeout_seconds,
    ):
        super().__init__()
        self._num_workers = num_workers
        self._hosts = hosts
        self._etcd_addrs = etcd_addrs
        self._vineyard_socket = vineyard_socket
        self._shared_mem = shared_mem
        self._glog_level = parse_as_glog_level(log_level)
        self._instance_id = instance_id
        self._timeout_seconds = timeout_seconds

        self._vineyard_socket_prefix = os.path.join(get_tempdir(),
                                                    "vineyard.sock.")

        # A graphsope instance may has multiple session by reconnecting to coordinator
        self._instance_workspace = os.path.join(WORKSPACE, self._instance_id)
        os.makedirs(self._instance_workspace, exist_ok=True)
        # setting during client connect to coordinator
        self._session_workspace = None

        # etcd
        self._etcd_peer_port = None
        self._etcd_client_port = None
        self._etcd_process = None
        # zookeeper
        self._zookeeper_port = None
        self._zetcd_process = None
        # vineyardd
        self._vineyard_rpc_port = None
        self._vineyardd_process = None
        # analytical engine
        self._analytical_engine_process = None
        # learning instance processes
        self._learning_instance_processes = {}

        self._closed = True
Example #3
0
    def __init__(self,
                 namespace=None,
                 service_type=None,
                 gs_image=None,
                 etcd_image=None,
                 zookeeper_image=None,
                 gie_graph_manager_image=None,
                 coordinator_name=None,
                 coordinator_service_name=None,
                 etcd_cpu=None,
                 etcd_mem=None,
                 zookeeper_cpu=None,
                 zookeeper_mem=None,
                 gie_graph_manager_cpu=None,
                 gie_graph_manager_mem=None,
                 engine_cpu=None,
                 engine_mem=None,
                 vineyard_cpu=None,
                 vineyard_mem=None,
                 vineyard_shared_mem=None,
                 image_pull_policy=None,
                 image_pull_secrets=None,
                 volumes=None,
                 num_workers=None,
                 instance_id=None,
                 log_level=None,
                 timeout_seconds=None,
                 waiting_for_delete=None,
                 delete_namespace=None,
                 **kwargs):
        try:
            kube_config.load_incluster_config()
        except:  # noqa: E722
            kube_config.load_kube_config()
        self._api_client = kube_client.ApiClient()
        self._core_api = kube_client.CoreV1Api(self._api_client)
        self._app_api = kube_client.AppsV1Api(self._api_client)

        self._instance_id = instance_id

        # random for multiple k8s cluster in the same namespace
        self._engine_name = self._engine_name_prefix + self._instance_id
        self._etcd_name = self._etcd_name_prefix + self._instance_id
        self._etcd_service_name = self._etcd_service_name_prefix + self._instance_id

        self._gie_graph_manager_name = (self._gie_graph_manager_name_prefix +
                                        self._instance_id)
        self._gie_graph_manager_service_name = (
            self._gie_graph_manager_service_name_prefix + self._instance_id)
        self._vineyard_service_name = (self._vineyard_service_name_prefix +
                                       self._instance_id)

        self._namespace = namespace
        self._service_type = service_type
        self._num_workers = num_workers

        self._coordinator_name = coordinator_name
        self._coordinator_service_name = coordinator_service_name

        self._resource_object = ResourceManager(self._api_client)

        # engine container info
        self._gs_image = gs_image
        self._engine_cpu = engine_cpu
        self._engine_mem = engine_mem

        # vineyard container info
        self._vineyard_cpu = vineyard_cpu
        self._vineyard_mem = vineyard_mem
        self._vineyard_shared_mem = vineyard_shared_mem

        # etcd pod info
        self._etcd_image = etcd_image
        self._etcd_cpu = etcd_cpu
        self._etcd_mem = etcd_mem

        # zookeeper pod info
        self._zookeeper_image = zookeeper_image
        self._zookeeper_cpu = zookeeper_cpu
        self._zookeeper_mem = zookeeper_mem

        # interactive engine graph manager info
        self._gie_graph_manager_image = gie_graph_manager_image
        self._gie_graph_manager_cpu = gie_graph_manager_cpu
        self._gie_graph_manager_mem = gie_graph_manager_mem

        self._image_pull_policy = image_pull_policy

        # image pull secrets
        self._etcd_endpoint = None
        if image_pull_secrets is not None:
            self._image_pull_secrets = image_pull_secrets.split(",")
        else:
            self._image_pull_secrets = []

        self._volumes = json.loads(volumes)

        self._host0 = None
        self._pod_name_list = None
        self._pod_ip_list = None
        self._pod_host_ip_list = None

        self._analytical_engine_endpoint = None
        self._vineyard_service_endpoint = None

        self._closed = False
        self._glog_level = parse_as_glog_level(log_level)
        self._timeout_seconds = timeout_seconds
        self._waiting_for_delete = waiting_for_delete
        self._delete_namespace = delete_namespace

        self._analytical_engine_process = None

        # 8000 ~ 9000 is exposed
        self._learning_engine_ports_usage = 8000
        self._graphlearn_services = dict()
        self._learning_instance_processes = {}
Example #4
0
    def __init__(self,
                 namespace=None,
                 service_type=None,
                 gs_image=None,
                 etcd_image=None,
                 dataset_image=None,
                 coordinator_name=None,
                 coordinator_service_name=None,
                 etcd_addrs=None,
                 etcd_num_pods=None,
                 etcd_cpu=None,
                 etcd_mem=None,
                 engine_cpu=None,
                 engine_mem=None,
                 vineyard_daemonset=None,
                 vineyard_cpu=None,
                 vineyard_mem=None,
                 vineyard_shared_mem=None,
                 mars_worker_cpu=None,
                 mars_worker_mem=None,
                 mars_scheduler_cpu=None,
                 mars_scheduler_mem=None,
                 with_mars=False,
                 image_pull_policy=None,
                 image_pull_secrets=None,
                 volumes=None,
                 mount_dataset=None,
                 num_workers=None,
                 preemptive=None,
                 instance_id=None,
                 log_level=None,
                 timeout_seconds=None,
                 waiting_for_delete=None,
                 delete_namespace=None,
                 **kwargs):

        super().__init__()
        self._api_client = resolve_api_client()
        self._core_api = kube_client.CoreV1Api(self._api_client)
        self._app_api = kube_client.AppsV1Api(self._api_client)

        self._saved_locals = locals()
        self._num_workers = self._saved_locals["num_workers"]
        self._instance_id = self._saved_locals["instance_id"]

        # random for multiple k8s cluster in the same namespace
        self._engine_name = self._engine_name_prefix + self._saved_locals[
            "instance_id"]
        self._etcd_addrs = etcd_addrs
        self._etcd_name = self._etcd_name_prefix + self._saved_locals[
            "instance_id"]
        self._etcd_service_name = (self._etcd_service_name_prefix +
                                   self._saved_locals["instance_id"])
        self._mars_scheduler_name = (self._mars_scheduler_name_prefix +
                                     self._saved_locals["instance_id"])

        self._coordinator_name = coordinator_name
        self._coordinator_service_name = coordinator_service_name

        self._resource_object = ResourceManager(self._api_client)

        # etcd pod info
        self._etcd_num_pods = max(1, self._saved_locals["etcd_num_pods"])
        self._etcd_endpoint = None

        # image pull secrets
        if image_pull_secrets is not None:
            self._image_pull_secrets = image_pull_secrets.split(",")
        else:
            self._image_pull_secrets = []

        self._volumes = json.loads(volumes)

        self._host0 = None
        self._pod_name_list = None
        self._pod_ip_list = None
        self._pod_host_ip_list = None

        self._analytical_engine_endpoint = None
        self._vineyard_service_endpoint = None
        self._mars_service_endpoint = None

        self._closed = False
        self._glog_level = parse_as_glog_level(log_level)

        self._analytical_engine_process = None
        self._zetcd_process = None

        # 8000 ~ 9000 is exposed
        self._learning_engine_ports_usage = 8000
        self._graphlearn_services = dict()
        self._learning_instance_processes = {}

        # workspace
        self._instance_workspace = os.path.join(
            WORKSPACE, self._saved_locals["instance_id"])
        os.makedirs(self._instance_workspace, exist_ok=True)
        self._session_workspace = None

        # component service name
        if self._exists_vineyard_daemonset(
                self._saved_locals["vineyard_daemonset"]):
            self._vineyard_service_name = (
                self._saved_locals["vineyard_daemonset"] + "-rpc")
        else:
            self._vineyard_service_name = (self._vineyard_service_name_prefix +
                                           self._saved_locals["instance_id"])
        self._mars_service_name = (self._mars_service_name_prefix +
                                   self._saved_locals["instance_id"])