def reservation(self): metadata = { "name": self.solution_name, "form_info": { "chatflow": "flist", "Solution name": self.solution_name, "env": self.env }, } self.solution_metadata.update(metadata) volume_config = {} if self.container_volume_attach: vol_id = deployer.deploy_volume( self.pool_id, self.selected_node.node_id, self.vol_size, solution_uuid=self.solution_id, **self.solution_metadata, ) success = deployer.wait_workload(vol_id, self) if not success: raise DeploymentFailed( f"Failed to deploy volume on node {self.selected_node.node_id} {vol_id}", wid=vol_id) volume_config[self.vol_mount_point] = vol_id self.resv_id = deployer.deploy_container( pool_id=self.pool_id, node_id=self.selected_node.node_id, network_name=self.network_view.name, ip_address=self.ip_address, flist=self.flist_link, cpu=self.resources["cpu"], memory=self.resources["memory"], disk_size=self.resources["disk_size"], env=self.env, interactive=self.interactive, entrypoint=self.entrypoint, log_config=self.log_config, volumes=volume_config, public_ipv6=self.public_ipv6, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.resv_id, self) if not success: raise DeploymentFailed(f"Failed to deploy workload {self.resv_id}", solution_uuid=self.solution_id, wid=self.resv_id)
def reservation(self): container_flist = f"{self.HUB_URL}/3bot-{self.version}.flist" metadata = { "name": self.solution_name, "form_info": {"chatflow": "ubuntu", "Solution name": self.solution_name}, } self.solution_metadata.update(metadata) self.resv_id = deployer.deploy_container( pool_id=self.pool_id, node_id=self.selected_node.node_id, network_name=self.network_view.name, ip_address=self.ip_address, flist=container_flist, cpu=self.resources["cpu"], memory=self.resources["memory"], disk_size=self.resources["disk_size"], env={"pub_key": self.public_key}, interactive=False, entrypoint="/bin/bash /start.sh", log_config=self.log_config, public_ipv6=self.public_ipv6, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.resv_id, self) if not success: raise DeploymentFailed(f"Failed to deploy workload {self.resv_id}", wid=self.resv_id)
def reservation(self): self.resv_id = deployer.delegate_domain( self.pool_id, self.gateway_id, self.domain, **self.solution_metadata, solution_uuid=self.solution_id ) success = deployer.wait_workload(self.resv_id, self) if not success: raise StopChatFlow(f"Failed to deploy workload {self.resv_id}")
def etcd_ip(self): result = deployer.add_network_node( self.network_view.name, self.selected_node, self.pool_id, self.network_view, bot=self, owner=self.solution_metadata.get("owner"), ) if result: self.md_show_update("Deploying Network on Nodes....") for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise DeploymentFailed( f"Failed to add node {self.selected_node.node_id} to network {wid}", wid=wid) self.network_view = self.network_view.copy() self.ip_addresses = [] self.etcd_clutser = "" for n in range(self.no_nodes.value): free_ips = self.network_view.get_node_free_ips(self.selected_node) ip = self.drop_down_choice( f"Please choose IP Address for ETCD Node {n+1}", free_ips, default=free_ips[0], required=True, ) self.network_view.used_ips.append(ip) self.ip_addresses.append(ip) self.etcd_clutser = self.etcd_clutser + f"etcd_{n+1}=http://{ip}:2380,"
def reservation(self): self.etcd_flist = "https://hub.grid.tf/essam.3bot/bitnami-etcd-latest.flist" metadata = { "name": self.solution_name, "form_info": { "chatflow": "etcd", "Solution name": self.solution_name }, } self.solution_metadata.update(metadata) self.resv_ids = deployer.deploy_etcd_containers( self.pool_id, self.selected_node.node_id, self.network_view.name, self.ip_addresses, self.etcd_clutser, self.etcd_flist, self.resources["cpu"], self.resources["memory"], self.resources["disk_size"], self.public_ipv6, **self.solution_metadata, solution_uuid=self.solution_id, ) for resv_id in self.resv_ids: success = deployer.wait_workload(resv_id, self) if not success: raise DeploymentFailed(f"Failed to deploy workload {resv_id}", solution_uuid=self.solution_id, wid=resv_id)
def deploy_s3_zdb(self, pool_id, scheduler, storage_per_zdb, password, solution_uuid, no_nodes=None): deployment_nodes = [] wids = [] no_nodes = no_nodes or S3_NO_DATA_NODES + S3_NO_PARITY_NODES for node in scheduler.nodes_by_capacity(pool_id=pool_id, hru=math.ceil(storage_per_zdb), ip_version="IPv6"): self.vdc_deployer.info(f"node {node.node_id} selected for zdb") deployment_nodes.append(node) if len(deployment_nodes) < no_nodes - len(wids): continue self.vdc_deployer.info( f"staring zdb deployment on nodes {[node.node_id for node in deployment_nodes]}" ) result = [] for node in deployment_nodes: self.vdc_deployer.info( f"deploying zdb on node: {node.node_id}") result.append( deployer.deploy_zdb( pool_id=pool_id, node_id=node.node_id, size=int(storage_per_zdb), disk_type=DiskType.HDD, mode=ZDBMode.Seq, password=password, # form_info={"chatflow": "minio"}, # name=self.vdc_name, solution_uuid=solution_uuid, identity_name=self.identity.instance_name, description=self.vdc_deployer.description, )) for wid in result: try: success = deployer.wait_workload( wid, bot=self.bot, expiry=5, identity_name=self.identity.instance_name, cancel_by_uuid=False) if not success: raise DeploymentFailed() wids.append(wid) self.vdc_deployer.info( f"zdb deployed successfully wid: {wid}") except DeploymentFailed: self.vdc_deployer.error(f"failed to deploy zdb wid: {wid}") continue if len(wids) == no_nodes: self.vdc_deployer.info( f"{no_nodes} zdbs deployed successfully on pool {pool_id}") return wids deployment_nodes = [] self.vdc_deployer.error("no nodes available to deploy zdb")
def container_ip(self): self.network_view_copy = self.network_view.copy() result = deployer.add_network_node( self.network_view.name, self.selected_node, self.pool_id, self.network_view_copy, bot=self, owner=self.solution_metadata.get("owner"), ) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise DeploymentFailed( f"Failed to add node {self.selected_node.node_id} to network {wid}", wid=wid) self.network_view_copy = self.network_view_copy.copy() free_ips = self.network_view_copy.get_node_free_ips(self.selected_node) self.ip_address = self.drop_down_choice( "Please choose IP Address for your solution", free_ips, default=free_ips[0], required=True)
def get_specific_public_ip(self, pool_id, node_id, address, solution_uuid=None): self.vdc_deployer.info( f"attempting to reserve public ip: {address} on farm: {self.farm_name} pool: {pool_id} node: {node_id}" ) wid = deployer.deploy_public_ip( pool_id, node_id, address, identity_name=self.identity.instance_name, description=self.vdc_deployer.description, solution_uuid=solution_uuid, ) try: success = deployer.wait_workload( wid, self.bot, 5, cancel_by_uuid=False, identity_name=self.identity.instance_name) if not success: raise DeploymentFailed( f"Public ip workload failed. wid: {wid}") return wid except DeploymentFailed as e: self.vdc_deployer.error( f"Failed to reserve public ip {address} on node {node_id} due to error {str(e)}" )
def _add_nodes_to_network(self, pool_id, nodes_generator, wids, no_nodes, network_view): deployment_nodes = [] self.vdc_deployer.info(f"Adding nodes to network. no_nodes: {no_nodes}, wids: {wids}") for node in nodes_generator: self.vdc_deployer.info(f"node {node.node_id} selected") deployment_nodes.append(node) if len(deployment_nodes) < no_nodes - len(wids): continue self.vdc_deployer.info(f"Adding nodes {[node.node_id for node in deployment_nodes]} to network") # add nodes to network network_view = network_view.copy() result = [] try: network_result = deployer.add_multiple_network_nodes( self.vdc_name, [node.node_id for node in deployment_nodes], [pool_id] * len(deployment_nodes), network_view, self.bot, self.identity.instance_name, ) self.vdc_deployer.info(f"Network update result: {network_result}") if network_result: result += network_result["ids"] for wid in result: try: success = deployer.wait_workload( wid, self.bot, 5, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: raise DeploymentFailed() except DeploymentFailed: # for failed network deployments workload = self.zos.workloads.get(wid) self.vdc_deployer.error(f"Failed to add node {workload.info.node_id} to network. wid: {wid}") success_nodes = [] for d_node in deployment_nodes: if d_node.node_id == workload.info.node_id: continue success_nodes.append(node) deployment_nodes = success_nodes except DeploymentFailed as e: # for dry run exceptions if e.wid: workload = self.zos.workloads.get(e.wid) self.vdc_deployer.error(f"Failed to add node {workload.info.node_id} to network. wid: {e.wid}") success_nodes = [] for d_node in deployment_nodes: if d_node.node_id == workload.info.node_id: continue success_nodes.append(node) deployment_nodes = success_nodes else: self.vdc_deployer.error(f"Network deployment failed on multiple nodes due to error {str(e)}") deployment_nodes = [] continue if len(deployment_nodes) == no_nodes: self.vdc_deployer.info("Required nodes added to network successfully") return deployment_nodes
def reservation(self): metadata = { "form_info": { "chatflow": "vmachine", "name": self.solution_name, "solution_uuid": self.solution_id }, } self.solution_metadata.update(metadata) self.reservation, self.public_ip = deployer.deploy_vmachine( node_id=self.selected_node.node_id, network_name=self.network_view.name, name="ubuntu-20.04", ip_address=self.ip_address, ssh_keys=self.ssh_keys, pool_id=self.pool_id, size=self.vm_size, enable_public_ip=self.enable_public_ip, **self.solution_metadata, ) success = deployer.wait_workload(self.reservation, self) if not success: raise DeploymentFailed( f"Failed to deploy workload {self.reservation}", solution_uuid=self.solution_id, wid=self.reservation, )
def zdb_reservation(self): self.password = uuid.uuid4().hex self.metadata = { "Solution Name": self.solution_name, "Solution Type": "minio", "zdb_password": self.password } self.solution_metadata.update(self.metadata) self.zdb_result = deployer.deploy_minio_zdb( pool_id=self.zdb_pool_ids[0], password=self.password, node_ids=[n.node_id for n in self.zdb_nodes], zdb_no=self.zdb_number, pool_ids=self.zdb_pool_ids, solution_uuid=self.solution_id, disk_size=self.zdb_disk_size, disk_type=self.zdb_disk_type, **self.solution_metadata, ) for resv_id in self.zdb_result: success = deployer.wait_workload(resv_id, self) if not success: raise DeploymentFailed( f"failed to deploy zdb workload {resv_id}", solution_uuid=self.solution_id, wid=resv_id)
def reservation(self): metadata = { "name": self.solution_name, "form_info": {"chatflow": "kubernetes", "Solution name": self.solution_name}, } self.solution_metadata.update(metadata) self.reservations = deployer.deploy_kubernetes_cluster( pool_id=self.selected_pool_ids[0], node_ids=[n.node_id for n in self.selected_nodes], network_name=self.network_view.name, cluster_secret=self.cluster_secret, ssh_keys=self.ssh_keys, size=self.cluster_size, ip_addresses=self.ip_addresses, slave_pool_ids=self.selected_pool_ids[1:], solution_uuid=self.solution_id, **self.solution_metadata, ) for resv in self.reservations: success = deployer.wait_workload(resv["reservation_id"], self) if not success: raise DeploymentFailed( f"Failed to deploy workload {resv['reservation_id']}", solution_uuid=self.solution_id, wid=resv["reservation_id"], )
def _deploy_trc_proxy( self, scheduler, wid, subdomain, gateway, pool_id, secret, ip_address, port, tls_port, gateway_pool_id, solution_uuid, description, ): cont_id = None proxy_id = None for node in scheduler.nodes_by_capacity(cru=1, mru=1, sru=0.25): try: self.vdc_deployer.info( f"Deploying trc proxy for wid: {wid} on node: {node.node_id} subdomain: {subdomain} gateway: {gateway.node_id}" ) cont_id, proxy_id = deployer.expose_address( reserve_proxy=True, pool_id=pool_id, gateway_id=gateway.node_id, network_name=self.vdc_name, trc_secret=secret, domain_name=subdomain, local_ip=ip_address, port=port, tls_port=tls_port, proxy_pool_id=gateway_pool_id, bot=self.bot, solution_uuid=solution_uuid, node_id=node.node_id, exposed_wid=wid, identity_name=self.identity.instance_name, description=description, ) success = deployer.wait_workload( cont_id, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: self.vdc_deployer.error( f"Nginx container for wid: {wid} failed on node: {node.node_id}, nginx_wid: {cont_id}" ) # container only failed. no need to decomission subdomain self.zos.workloads.decomission(proxy_id) continue return subdomain except DeploymentFailed: self.vdc_deployer.error( f"proxy reservation for wid: {wid} failed on node: {node.node_id}, subdomain: {subdomain}, gateway: {gateway.node_id}" ) if cont_id: self.zos.workloads.decomission(cont_id) if proxy_id: self.zos.workloads.decomission(proxy_id) continue
def ip_selection(self): self.md_show_update("Deploying Network on Nodes....") result = deployer.add_multiple_network_nodes( self.network_view.name, [node.node_id for node in self.selected_nodes], self.selected_pool_ids, self.network_view, self, owner=self.solution_metadata.get("owner"), ) if result: for wid in result["ids"]: success = deployer.wait_workload(wid, self) if not success: workload = j.sals.zos.get().workloads.get(wid) raise DeploymentFailed(f"Failed to add node {workload.info.node_id} to network {wid}", wid=wid) self.network_view = self.network_view.copy() # get ip addresses self.ip_addresses = [] master_free_ips = self.network_view.get_node_free_ips(self.selected_nodes[0]) self.ip_addresses.append( self.drop_down_choice( "Please choose IP Address for Master node", master_free_ips, required=True, default=master_free_ips[0] ) ) self.network_view.used_ips.append(self.ip_addresses[0]) for i in range(1, len(self.selected_nodes)): free_ips = self.network_view.get_node_free_ips(self.selected_nodes[i]) self.ip_addresses.append( self.drop_down_choice( f"Please choose IP Address for Slave node {i}", free_ips, required=True, default=free_ips[0] ) ) self.network_view.used_ips.append(self.ip_addresses[i])
def network_reservation(self): if self.action == "Create": try: self.config = deployer.deploy_network( self.solution_name, self.access_node, self.ip_range, self.ipversion, self.pool, **self.solution_metadata, ) except Exception as e: raise StopChatFlow(f"Failed to register workload due to error {str(e)}") else: self.config = deployer.add_access( self.network_view.name, self.network_view, self.access_node.node_id, self.pool, self.ipversion == "IPv4", bot=self, **self.solution_metadata, ) for wid in self.config["ids"]: try: success = deployer.wait_workload(wid, self, breaking_node_id=self.access_node.node_id) except StopChatFlow as e: if self.action == "Create": solutions.cancel_solution(self.config["ids"]) raise e if not success: raise DeploymentFailed(f"Failed to deploy workload {wid}", wid=wid)
def network_reservation(self): # create config first for wid in self.config["ids"]: try: success = deployer.wait_workload( wid, self, breaking_node_id=self.access_node.node_id) except StopChatFlow as e: solutions.cancel_solution(self.config["ids"]) raise e if not success: raise DeploymentFailed(f"Failed to deploy workload {wid}", wid=wid)
def minio_reservation(self): zdb_configs = [] for zid in self.zdb_result: zdb_configs.append(deployer.get_zdb_url(zid, self.password)) metadata = { "name": self.solution_name, "form_info": { "chatflow": "minio", "Solution name": self.solution_name, "zdb_password": self.password }, } self.solution_metadata.update(metadata) if self.mode == "Master/Slave": metadata["form_info"]["Slave IP"] = self.ip_addresses[1] self.minio_result = deployer.deploy_minio_containers( pool_id=self.minio_pool_ids[0], network_name=self.network_view.name, minio_nodes=[n.node_id for n in self.minio_nodes], minio_ip_addresses=self.ip_addresses, zdb_configs=zdb_configs, ak=self.ak, sk=self.sk, ssh_key=self.public_ssh_key, cpu=self.minio_cont_resources["cpu"], memory=self.minio_cont_resources["memory"], data=self.data, parity=self.parity, disk_size=1, log_config=self.log_config, mode=self.mode, bot=self, pool_ids=self.minio_pool_ids, solution_uuid=self.solution_id, public_ipv6=self.public_ipv6, **self.solution_metadata, ) for resv_id in self.minio_result: success = deployer.wait_workload(resv_id, self) if not success: raise DeploymentFailed( f"Failed to deploy Minio container workload {resv_id}", solution_uuid=self.solution_id, wid=resv_id)
def ip_selection(self): self.md_show_update("Deploying Network on Nodes....") for i in range(len(self.minio_nodes)): node = self.minio_nodes[i] pool_id = self.minio_pool_ids[i] result = deployer.add_network_node( self.network_view.name, node, pool_id, self.network_view, bot=self, owner=self.solution_metadata.get("owner"), ) if not result: continue for wid in result["ids"]: success = deployer.wait_workload(wid, bot=self, breaking_node_id=node.node_id) if not success: raise DeploymentFailed( f"Failed to add node {node.node_id} to network {wid}", wid=wid) self.network_view = self.network_view.copy() self.ip_addresses = [] free_ips = self.network_view.get_node_free_ips(self.minio_nodes[0]) self.ip_addresses.append( self.drop_down_choice( "Please choose IP Address for Primary container", free_ips, required=True, default=free_ips[0])) self.network_view.used_ips.append(self.ip_addresses[0]) if self.mode == "Master/Slave": free_ips = self.network_view.get_node_free_ips(self.minio_nodes[1]) self.ip_addresses.append( self.drop_down_choice( "Please choose IP Address for Secondary container", free_ips, required=True, default=free_ips[0])) self.network_view.used_ips.append(self.ip_addresses[1])
def wg_reservation(self): if not self.publickey: self.privatekey, self.publickey = j.tools.wireguard.generate_key_pair( ) self.privatekey = self.privatekey.decode() self.resv_id = deployer.create_ipv6_gateway( self.gateway_id, self.pool_id, self.publickey, SolutionType="4to6GW", solution_uuid=self.solution_id, **self.solution_metadata, ) success = deployer.wait_workload(self.resv_id, self) if not success: raise StopChatFlow(f"Failed to deploy workload {self.resv_id}") self.reservation_result = j.sals.zos.workloads.get( self.resv_id).info.result
def zdb_reservation(self): self.password = uuid.uuid4().hex self.zdb_result = deployer.deploy_minio_zdb( pool_id=self.zdb_pool_ids[0], password=self.password, node_ids=[n.node_id for n in self.zdb_nodes], zdb_no=self.zdb_number, pool_ids=self.zdb_pool_ids, solution_uuid=self.solution_id, **self.solution_metadata, ) for resv_id in self.zdb_result: success = deployer.wait_workload(resv_id, self) if not success: raise DeploymentFailed( f"failed to deploy zdb workload {resv_id}", solution_uuid=self.solution_id, wid=resv_id)
def container_ip(self): self.network_view_copy = self.network_view.copy() result = deployer.add_network_node( self.network_view.name, self.selected_node, self.pool_id, self.network_view_copy, bot=self, **self.solution_metadata, ) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise StopChatFlow( f"Failed to add node {self.selected_node.node_id} to network {wid}" ) self.network_view_copy = self.network_view_copy.copy() free_ips = self.network_view_copy.get_node_free_ips(self.selected_node) self.ip_address = self.drop_down_choice( "Please choose IP Address for your solution", free_ips)
def deploy_master( self, pool_id, scheduler, k8s_flavor, cluster_secret, ssh_keys, solution_uuid, network_view, datastore_endpoint="", network_subnet="", private_ip="", public_ip=None, ): master_ip = None # deploy_master k8s_resources_dict = VDC_SIZE.K8S_SIZES[k8s_flavor] nodes_generator = scheduler.nodes_by_capacity(**k8s_resources_dict, pool_id=pool_id, public_ip=True) while not master_ip: try: try: master_node = next(nodes_generator) except StopIteration: return self.vdc_deployer.info( f"Deploying kubernetes master on node {master_node.node_id} with datastore: {datastore_endpoint}" ) # add node to network try: result = deployer.add_network_node( self.vdc_name, master_node, pool_id, network_view, self.bot, self.identity.instance_name, subnet=network_subnet, ) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self.bot, 3, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: self.vdc_deployer.error(f"Failed to deploy network for kubernetes master wid: {wid}") raise DeploymentFailed except DeploymentFailed: self.vdc_deployer.error( f"Failed to deploy network for kubernetes master on node {master_node.node_id}" ) continue except IndexError: self.vdc_deployer.error("All attempts to deploy kubernetes master node have failed") raise j.exceptions.Runtime("All attempts to deploy kubernetes master node have failed") # reserve public_ip if public_ip: public_ip_wid = self.vdc_deployer.public_ip.get_specific_public_ip( pool_id, master_node.node_id, public_ip, solution_uuid=solution_uuid ) else: public_ip_wid = self.vdc_deployer.public_ip.get_public_ip( pool_id, master_node.node_id, solution_uuid=solution_uuid ) if not public_ip_wid: self.vdc_deployer.error(f"Failed to reserve public ip on node {master_node.node_id}") continue # deploy master if private_ip: private_ip_address = private_ip else: network_view = network_view.copy() private_ip_address = network_view.get_free_ip(master_node) self.vdc_deployer.info(f"Kubernetes master ip: {private_ip_address}") wid = deployer.deploy_kubernetes_master( pool_id, master_node.node_id, network_view.name, cluster_secret, ssh_keys, private_ip_address, size=k8s_flavor.value, identity_name=self.identity.instance_name, # form_info={"chatflow": "kubernetes"}, # name=self.vdc_name, secret=cluster_secret, solution_uuid=solution_uuid, description=self.vdc_deployer.description, public_ip_wid=public_ip_wid, datastore_endpoint=datastore_endpoint, disable_default_ingress=False, ) self.vdc_deployer.info(f"Kubernetes master wid: {wid}") try: success = deployer.wait_workload( wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: raise DeploymentFailed() master_ip = private_ip_address return master_ip except DeploymentFailed: self.zos.workloads.decomission(public_ip_wid) self.vdc_deployer.error(f"Failed to deploy kubernetes master wid: {wid}") continue self.vdc_deployer.error(f"All attempts to deploy kubernetes master have failed")
def _add_workers( self, pool_id, nodes_generator, k8s_flavor, cluster_secret, ssh_keys, solution_uuid, network_view, master_ip, no_nodes, public_ip=False, external=True, ): # deploy workers wids = [] while True: result = [] public_wids = [] deployment_nodes = self._add_nodes_to_network(pool_id, nodes_generator, wids, no_nodes, network_view) if not deployment_nodes: self.vdc_deployer.error("No available nodes to deploy kubernetes workers") return self.vdc_deployer.info( f"Deploying kubernetes workers on nodes {[node.node_id for node in deployment_nodes]}" ) network_view = network_view.copy() # deploy workers for node in deployment_nodes: if public_ip: public_ip_wid = self.vdc_deployer.public_ip.get_public_ip(pool_id, node.node_id, solution_uuid) if not public_ip_wid: self.vdc_deployer.error(f"Failed to deploy reserve public ip on node {node.node_id}") continue else: public_ip_wid = 0 self.vdc_deployer.info(f"Deploying kubernetes worker on node {node.node_id}") ip_address = network_view.get_free_ip(node) self.vdc_deployer.info(f"Kubernetes worker ip address: {ip_address}") result.append( deployer.deploy_kubernetes_worker( pool_id, node.node_id, network_view.name, cluster_secret, ssh_keys, ip_address, master_ip, size=k8s_flavor.value, secret=cluster_secret, identity_name=self.identity.instance_name, # form_info={"chatflow": "kubernetes"}, # name=self.vdc_name, solution_uuid=solution_uuid, description=self.vdc_deployer.description, public_ip_wid=public_ip_wid, external=external, ) ) public_wids.append(public_ip_wid) for idx, wid in enumerate(result): try: success = deployer.wait_workload( wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: raise DeploymentFailed() wids.append(wid) self.vdc_deployer.info(f"Kubernetes worker deployed successfully wid: {wid}") except DeploymentFailed: if public_wids[idx]: self.zos.workloads.decomission(public_wids[idx]) self.vdc_deployer.error(f"Failed to deploy kubernetes worker wid: {wid}") self.vdc_deployer.info(f"successful kubernetes workers ids: {wids}") if len(wids) == no_nodes: self.vdc_deployer.info(f"All workers deployed successfully") return wids self.vdc_deployer.error("All tries to deploy kubernetes workers have failed")
def deploy_external_etcd(self, farm_name, no_nodes=ETCD_CLUSTER_SIZE, solution_uuid=None): network_view = deployer.get_network_view(self.vdc_name, identity_name=self.identity.instance_name) pool_id, _ = self.vdc_deployer.get_pool_id_and_reservation_id(farm_name) scheduler = Scheduler(pool_id=pool_id) nodes_generator = scheduler.nodes_by_capacity(cru=ETCD_CPU, sru=ETCD_DISK / 1024, mru=ETCD_MEMORY / 1024) solution_uuid = solution_uuid or uuid.uuid4().hex while True: deployment_nodes = self._add_nodes_to_network(pool_id, nodes_generator, [], no_nodes, network_view) if not deployment_nodes: self.vdc_deployer.error("no available nodes to deploy etcd cluster") return self.vdc_deployer.info(f"deploying etcd cluster on nodes {[node.node_id for node in deployment_nodes]}") network_view = network_view.copy() ip_addresses = [] node_ids = [] etcd_cluster = "" for idx, node in enumerate(deployment_nodes): address = network_view.get_free_ip(node) ip_addresses.append(address) etcd_cluster += f"etcd_{idx+1}=http://{address}:2380," node_ids.append(node.node_id) secret_env = None # etcd_backup_config = j.core.config.get("VDC_S3_CONFIG", {}) # restic_url = etcd_backup_config.get("S3_URL", "") # restic_bucket = etcd_backup_config.get("S3_BUCKET", "") # restic_ak = etcd_backup_config.get("S3_AK", "") # restic_sk = etcd_backup_config.get("S3_SK", "") # if all([self.vdc_deployer.restore, restic_url, restic_bucket, restic_ak, restic_sk]): # secret_env = { # "RESTIC_REPOSITORY": f"s3:{restic_url}/{restic_bucket}/{self.vdc_instance.owner_tname}/{self.vdc_instance.vdc_name}", # "AWS_ACCESS_KEY_ID": restic_ak, # "AWS_SECRET_ACCESS_KEY": restic_sk, # "RESTIC_PASSWORD": self.vdc_deployer.password_hash, # } explorer = None if "test" in j.core.identity.me.explorer_url: explorer = "test" elif "dev" in j.core.identity.me.explorer_url: explorer = "dev" else: explorer = "main" log_config = j.core.config.get("VDC_LOG_CONFIG", {}) if log_config: log_config["channel_name"] = f"{self.vdc_instance.instance_name}_{explorer}" pool_ids = [pool_id for i in range(no_nodes)] wids = deployer.deploy_etcd_containers( pool_ids, node_ids, network_view.name, ip_addresses, etcd_cluster, ETCD_FLIST, ETCD_CPU, ETCD_MEMORY, ETCD_DISK, entrypoint="", ssh_key=self.vdc_deployer.ssh_key.public_key.strip(), identity_name=self.identity.instance_name, solution_uuid=solution_uuid, description=self.vdc_deployer.description, secret_env=secret_env, log_config=log_config, ) try: for wid in wids: success = deployer.wait_workload( wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: self.vdc_deployer.error(f"etcd cluster workload: {wid} failed to deploy") raise DeploymentFailed() except DeploymentFailed: for wid in wids: self.zos.workloads.decomission(wid) continue return ip_addresses
def reservation(self): metadata = { "name": self.domain, "form_info": { "Solution name": self.domain, "chatflow": "exposed" } } self.solution_metadata.update(metadata) query = {"mru": 1, "cru": 1, "sru": 1} self.selected_node = deployer.schedule_container(self.pool_id, **query) self.network_name = self.solution["Network"] result = deployer.add_network_node( self.network_name, self.selected_node, self.pool_id, bot=self, owner=self.solution_metadata.get("owner")) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise DeploymentFailed( f"Failed to add node to network {wid}", wid=wid) self.network_view = deployer.get_network_view(self.network_name) self.tcprouter_ip = self.network_view.get_free_ip(self.selected_node) if not self.tcprouter_ip: raise StopChatFlow( f"No available ips one for network {self.network_view.name} node {self.selected_node.node_id}" ) if self.domain_type != "Custom Domain": self.dom_id = deployer.create_subdomain( pool_id=self.domain_pool.pool_id, gateway_id=self.domain_gateway.node_id, subdomain=self.domain, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.dom_id, self) if not success: raise DeploymentFailed( f"Failed to reserve sub-domain workload {self.dom_id}", solution_uuid=self.solution_id) self.proxy_id = deployer.create_proxy( pool_id=self.domain_pool.pool_id, gateway_id=self.domain_gateway.node_id, domain_name=self.domain, trc_secret=self.secret, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.proxy_id, self) if not success: raise DeploymentFailed( f"Failed to reserve reverse proxy workload {self.proxy_id}", solution_uuid=self.solution_id) self.tcprouter_id = deployer.expose_address( pool_id=self.pool_id, gateway_id=self.domain_gateway.node_id, network_name=self.network_name, local_ip=self.solution_ip, port=self.port, tls_port=self.tls_port, trc_secret=self.secret, bot=self, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.tcprouter_id, self) if not success: raise DeploymentFailed( f"Failed to reserve TCP Router container workload {self.tcprouter_id}", solution_uuid=self.solution_id, wid=self.tcprouter_id, )
def deploy_s3_minio_container(self, pool_id, ak, sk, ssh_key, scheduler, zdb_wids, solution_uuid, password): zdb_configs = [] self.vdc_deployer.info(f"deploying minio for zdbs: {zdb_wids}") for zid in zdb_wids: zdb_configs.append( deployer.get_zdb_url( zid, password, identity_name=self.identity.instance_name)) self.vdc_deployer.info(f"zdb_configs: {zdb_configs}") network_view = deployer.get_network_view( self.vdc_name, identity_name=self.identity.instance_name) for node in scheduler.nodes_by_capacity(cru=MINIO_CPU, mru=MINIO_MEMORY / 1024, sru=MINIO_DISK / 1024, ip_version="IPv6"): self.vdc_deployer.info(f"node {node.node_id} selected for minio") try: result = deployer.add_network_node(self.vdc_name, node, pool_id, network_view, self.bot, self.identity.instance_name) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self.bot, 5, identity_name=self.identity.instance_name, cancel_by_uuid=False) if not success: self.vdc_deployer.error( f"workload {wid} failed when adding node to network" ) raise DeploymentFailed() except DeploymentFailed: self.vdc_deployer.error( f"failed to deploy minio network on node {node.node_id}.") continue network_view = network_view.copy() ip_address = network_view.get_free_ip(node) self.vdc_deployer.info(f"minio ip address {ip_address}") try: result = deployer.deploy_minio_containers( pool_id, self.vdc_name, [node.node_id], [ip_address], zdb_configs, ak, sk, ssh_key, MINIO_CPU, MINIO_MEMORY, S3_NO_DATA_NODES, S3_NO_PARITY_NODES, public_ipv6=True, disk_size=int(MINIO_DISK / 1024), bot=self.bot, identity_name=self.identity.instance_name, # form_info={"chatflow": "minio"}, # name=self.vdc_name, solution_uuid=solution_uuid, description=self.vdc_deployer.description, ) except DeploymentFailed as e: if e.wid: workload = self.zos.workloads.get(e.wid) self.vdc_deployer.error( f"failed to deploy minio volume wid: {e.wid} on node {workload.info.node_id}" ) else: self.vdc_deployer.error( f"failed to deploy minio volume due to error {str(e)}") continue wid = result[0] try: success = deployer.wait_workload( wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False) if not success: raise DeploymentFailed() self.vdc_deployer.info( f"minio container deployed successfully wid: {wid}") return wid except DeploymentFailed: self.vdc_deployer.error( f"failed to deploy minio container wid: {wid}") continue self.vdc_deployer.error("no nodes available to deploy minio container")
def deploy_vmachine( self, solution_name, vm_size, pool_id, nodes_generator, ssh_keys, solution_uuid, network_view, enable_public_ip, vmachine_type, description="", ): vmachine_ip = None while not vmachine_ip: try: try: vmachine_node = next(nodes_generator) except StopIteration: return self.vdc_deployer.info(f"Deploying virtual machine on node {vmachine_node.node_id}") # add node to network try: result = deployer.add_network_node( self.vdc_name, vmachine_node, pool_id, network_view, self.bot, self.identity.instance_name ) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self.bot, 3, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: self.vdc_deployer.error(f"Failed to deploy network for virtual machine") raise DeploymentFailed except DeploymentFailed: self.vdc_deployer.error( f"Failed to deploy network for virtual machine on node {vmachine_node.node_id}" ) continue except IndexError: self.vdc_deployer.error("All attempts to deploy virtual machine on nodes node have been failed") raise j.exceptions.Runtime("All attempts to deploy virtual machine on nodes node have been failed") network_view = network_view.copy() private_ip_address = network_view.get_free_ip(vmachine_node) self.vdc_deployer.info(f"Virtual machine ip: {private_ip_address}") metadata = {"form_info": {"chatflow": "vmachine", "name": solution_name, "solution_uuid": solution_uuid}} wid, public_ip = deployer.deploy_vmachine( node_id=vmachine_node.node_id, network_name=network_view.name, name=vmachine_type, ip_address=private_ip_address, ssh_keys=ssh_keys, pool_id=pool_id, size=vm_size, enable_public_ip=enable_public_ip, description=description, **metadata, ) self.vdc_deployer.info(f"virtual machine machine wid: {wid}") try: success = deployer.wait_workload( wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: raise DeploymentFailed() return {"public_ip": public_ip, "ip_address": private_ip_address, "vm_wid": wid} except DeploymentFailed: if enable_public_ip: self.zos.workloads.decomission(self.zos.workloads.get(wid).public_ip) self.vdc_deployer.error(f"Failed to deploy virtual machine wid: {wid}") continue self.vdc_deployer.error(f"All attempts to deploy virtual machine have failed")
def reservation(self): metadata = { "name": self.domain, "form_info": { "Solution name": self.domain, "chatflow": "exposed" } } self.solution_metadata.update(metadata) query = {"mru": 1, "cru": 1, "sru": 1} self.selected_node = deployer.schedule_container(self.pool_id, **query) self.network_name = self.solution["Network"] result = deployer.add_network_node( self.network_name, self.selected_node, self.pool_id, bot=self, owner=self.solution_metadata.get("owner")) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise DeploymentFailed( f"Failed to add node to network {wid}", wid=wid) self.network_view = deployer.get_network_view(self.network_name) self.tcprouter_ip = self.network_view.get_free_ip(self.selected_node) if not self.tcprouter_ip: raise StopChatFlow( f"No available ips one for network {self.network_view.name} node {self.selected_node.node_id}" ) if self.domain_type != "Custom Domain": self.dom_id = deployer.create_subdomain( pool_id=self.domain_pool.pool_id, gateway_id=self.domain_gateway.node_id, subdomain=self.domain, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.dom_id, self) if not success: raise DeploymentFailed( f"Failed to reserve sub-domain workload {self.dom_id}", solution_uuid=self.solution_id) if self.proxy_type == "TRC": self.proxy_id = deployer.create_proxy( pool_id=self.domain_pool.pool_id, gateway_id=self.domain_gateway.node_id, domain_name=self.domain, trc_secret=self.secret, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.proxy_id, self) if not success: raise DeploymentFailed( f"Failed to reserve reverse proxy workload {self.proxy_id}", solution_uuid=self.solution_id) trc_log_config = j.core.config.get("LOGGING_SINK", {}) if trc_log_config: trc_log_config[ "channel_name"] = f"{self.threebot_name}-{self.solution_name}-trc".lower( ) if self.proxy_type == "NGINX": self.tcprouter_id = deployer.expose_and_create_certificate( domain=self.domain, email=self.email, pool_id=self.pool_id, gateway_id=self.domain_gateway.node_id, network_name=self.network_name, solution_ip=self.solution_ip, solution_port=self.port, trc_secret=self.secret, bot=self, enforce_https=self.force_https, log_config=trc_log_config, **self.solution_metadata, solution_uuid=self.solution_id, ) else: self.tcprouter_id, _ = deployer.expose_address( pool_id=self.pool_id, gateway_id=self.domain_gateway.node_id, network_name=self.network_name, local_ip=self.solution_ip, port=self.port, tls_port=self.tls_port, trc_secret=self.secret, bot=self, log_config=trc_log_config, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.tcprouter_id, self) if not success: raise DeploymentFailed( f"Failed to reserve TCP Router container workload {self.tcprouter_id}", solution_uuid=self.solution_id, wid=self.tcprouter_id, )
def reservation(self): metadata = { "name": self.solution_name, "form_info": { "chatflow": "monitoring", "Solution name": self.solution_name, }, } self.solution_metadata.update(metadata) self.md_show_update("Deploying Volume....") vol_id = deployer.deploy_volume( self.selected_pool_ids[1], self.selected_nodes[1].node_id, self.vol_size, solution_uuid=self.solution_id, **self.solution_metadata, ) success = deployer.wait_workload(vol_id, self) if not success: raise DeploymentFailed( f"Failed to deploy volume on node {self.selected_nodes[1].node_id} {vol_id}", wid=vol_id) volume_configs = [{}, {self.vol_mount_point: vol_id}, {}] log_configs = [ {}, { "channel_type": "redis", "channel_host": self.ip_addresses[0], "channel_port": 6379, "channel_name": "prometheus", }, { "channel_type": "redis", "channel_host": self.ip_addresses[0], "channel_port": 6379, "channel_name": "grafana", }, ] self.reservation_ids = [] for i in range(3): self.md_show_update(f"Deploying {self.tools_names[i]}....") flist = self.flists[i] node = self.selected_nodes[i] pool_id = self.selected_pool_ids[i] volume_config = volume_configs[i] log_config = log_configs[i] ip_address = self.ip_addresses[i] self.reservation_ids.append( deployer.deploy_container( pool_id=pool_id, node_id=node.node_id, network_name=self.network_view.name, ip_address=ip_address, flist=flist, cpu=self.query[self.tools_names[i]]["cpu"], memory=self.query[self.tools_names[i]]["memory"], disk_size=self.query[self.tools_names[i]]["disk_size"], env=self.env_var_dict, interactive=False, entrypoint="", volumes=volume_config, log_config=log_config, **self.solution_metadata, solution_uuid=self.solution_id, )) success = deployer.wait_workload(self.reservation_ids[i], self) if not success: raise DeploymentFailed( f"Failed to deploy {self.tools_names[i]}", solution_uuid=self.solution_id, wid=self.reservation_ids[i], )
def reservation(self): self.workload_ids = [] metadata = { "name": self.solution_name, "form_info": { "chatflow": "gollum", "Solution name": self.solution_name }, } self.solution_metadata.update(metadata) # reserve subdomain self.workload_ids.append( deployer.create_subdomain( pool_id=self.pool_id, gateway_id=self.gateway.node_id, subdomain=self.domain, addresses=self.addresses, solution_uuid=self.solution_id, **self.solution_metadata, )) success = deployer.wait_workload(self.workload_ids[0], self) if not success: raise StopChatFlow( f"Failed to create subdomain {self.domain} on gateway {self.gateway.node_id} {self.workload_ids[0]}" ) # expose container domain wid, _ = deployer.expose_address( pool_id=self.pool_id, gateway_id=self.gateway.node_id, network_name=self.network_view.name, local_ip=self.ip_address, port=80, tls_port=443, trc_secret=self.secret, node_id=self.selected_node.node_id, reserve_proxy=True, domain_name=self.domain, proxy_pool_id=self.gateway_pool.pool_id, solution_uuid=self.solution_id, **self.solution_metadata, ) self.workload_ids.append(wid) success = deployer.wait_workload(self.workload_ids[1], self) if not success: solutions.cancel_solution(self.workload_ids) raise StopChatFlow( f"Failed to create TRC container on node {self.selected_node.node_id} {self.workload_ids[1]}" ) self.container_url = f"https://{self.domain}/" # deploy container var_dict = { "pub_key": self.public_key, "GITHUB_USER": self.github_user.value, "GITHUB_EMAIL": self.github_email.value, "GITHUB_REPO": self.github_repo.value, "GITHUB_TOKEN": self.github_token.value, } entrypoint = f'/bin/bash /start.sh "{self.domain}" "{self.email}"' self.workload_ids.append( deployer.deploy_container( pool_id=self.pool_id, node_id=self.selected_node.node_id, network_name=self.network_view.name, ip_address=self.ip_address, flist=self.flist_url, cpu=self.resources["cpu"], memory=self.resources["memory"], disk_size=self.resources["disk_size"], env=var_dict, interactive=False, entrypoint=entrypoint, solution_uuid=self.solution_id, **self.solution_metadata, )) success = deployer.wait_workload(self.workload_ids[2], self) if not success: raise StopChatFlow( f"Failed to create container on node {self.selected_node.node_id} {self.workload_ids[2]}" )