def test_user_group_with_template(request, dind_cc, user_mc): test_user_no_template(request, dind_cc, user_mc) crdClient = CustomObjectsApi(dind_cc.admin_mc.k8s_client) user_attribute = crdClient.get_cluster_custom_object( 'management.cattle.io', 'v3', 'userattributes', user_mc.user.id ) user_attribute['GroupPrincipals']['local']['Items'] = [{ 'metadata': { 'name': 'local_group://test-123' } }] crdClient.replace_cluster_custom_object( 'management.cattle.io', 'v3', 'userattributes', user_mc.user.id, user_attribute ) role_template = { 'clusterId': dind_cc.cluster.id, 'groupPrincipalId': 'local_group://test-123', 'roleTemplateId': 'cluster-member' } dind_cc.admin_mc.client.create_clusterRoleTemplateBinding(role_template) wait_for(kubectl_available(request, dind_cc, user_mc.client))
def delete_policy(custom_objects: CustomObjectsApi, name, namespace) -> None: """ Delete a Policy. :param custom_objects: CustomObjectsApi :param namespace: namespace :param name: :return: """ print(f"Delete a Policy: {name}") custom_objects.delete_namespaced_custom_object( "k8s.nginx.org", "v1", namespace, "policies", name ) ensure_item_removal( custom_objects.get_namespaced_custom_object, "k8s.nginx.org", "v1", namespace, "policies", name, ) print(f"Policy was removed with name '{name}'")
def delete_v_s_route(custom_objects: CustomObjectsApi, name, namespace) -> None: """ Delete a VirtualServerRoute. :param custom_objects: CustomObjectsApi :param namespace: namespace :param name: :return: """ print(f"Delete a VirtualServerRoute: {name}") delete_options = client.V1DeleteOptions() custom_objects.delete_namespaced_custom_object( "k8s.nginx.org", "v1", namespace, "virtualserverroutes", name, delete_options ) ensure_item_removal( custom_objects.get_namespaced_custom_object, "k8s.nginx.org", "v1", namespace, "virtualserverroutes", name, ) print(f"VirtualServerRoute was removed with the name '{name}'")
def patch_v_s_route_from_yaml( custom_objects: CustomObjectsApi, name, yaml_manifest, namespace ) -> None: """ Update a VirtualServerRoute based on yaml manifest :param custom_objects: CustomObjectsApi :param name: :param yaml_manifest: an absolute path to file :param namespace: :return: """ print(f"Update a VirtualServerRoute: {name}") with open(yaml_manifest) as f: dep = yaml.safe_load(f) try: custom_objects.patch_namespaced_custom_object( "k8s.nginx.org", "v1", namespace, "virtualserverroutes", name, dep ) print(f"VirtualServerRoute updated with name '{dep['metadata']['name']}'") except ApiException: logging.exception(f"Failed with exception while patching VirtualServerRoute: {name}") raise
def delete_ap_policy(custom_objects: CustomObjectsApi, name, namespace) -> None: """ Delete a AppProtect policy. :param custom_objects: CustomObjectsApi :param namespace: namespace :param name: :return: """ print(f"Delete a AP policy: {name}") custom_objects.delete_namespaced_custom_object("appprotect.f5.com", "v1beta1", namespace, "appolicies", name) ensure_item_removal( custom_objects.get_namespaced_custom_object, "appprotect.f5.com", "v1beta1", namespace, "appolicies", name, ) time.sleep(3) print(f"AP policy was removed with name: {name}")
def delete_virtual_server(custom_objects: CustomObjectsApi, name, namespace) -> None: """ Delete a VirtualServer. :param custom_objects: CustomObjectsApi :param namespace: namespace :param name: :return: """ print(f"Delete a VirtualServer: {name}") custom_objects.delete_namespaced_custom_object( "k8s.nginx.org", "v1", namespace, "virtualservers", name ) ensure_item_removal( custom_objects.get_namespaced_custom_object, "k8s.nginx.org", "v1", namespace, "virtualservers", name, ) print(f"VirtualServer was removed with name '{name}'")
def __init__(self, alg): """ Intializes the connection. If algorithm object includes a remote cluster, use that. Otherwise, use this cluster. """ if alg.cluster: host = alg.cluster.host port = alg.cluster.port token = alg.cluster.token fd, cert = tempfile.mkstemp(text=True) with open(fd, 'w') as f: f.write(alg.cluster.cert) conf = Configuration() conf.api_key['authorization'] = token conf.host = f'{PROTO}{host}:{port}' conf.verify_ssl = True conf.ssl_ca_cert = cert api_client = ApiClient(conf) self.corev1 = CoreV1Api(api_client) self.custom = CustomObjectsApi(api_client) else: load_incluster_config() self.corev1 = CoreV1Api() self.custom = CustomObjectsApi() # Read in the manifest. if alg.manifest: self.manifest = yaml.safe_load(alg.manifest.open(mode='r')) if 'volumeClaimTemplates' in self.manifest['spec']: for claim in self.manifest['spec']['volumeClaimTemplates']: storage_class_name = claim['spec'].get('storageClassName',None) if storage_class_name is None: claim['storageClassName'] = os.getenv('WORKFLOW_STORAGE_CLASS') logger.warning(f"Implicitly sc to pvc of Algo:{alg.pk}") # Save off the algorithm. self.alg = alg
def delete_ap_logconf(custom_objects: CustomObjectsApi, name, namespace) -> None: """ Delete a AppProtect logconf. :param custom_objects: CustomObjectsApi :param namespace: namespace :param name: :return: """ print(f"Delete AP logconf: {name}") delete_options = client.V1DeleteOptions() custom_objects.delete_namespaced_custom_object("appprotect.f5.com", "v1beta1", namespace, "aplogconfs", name, delete_options) ensure_item_removal( custom_objects.get_namespaced_custom_object, "appprotect.f5.com", "v1beta1", namespace, "aplogconfs", name, ) print(f"AP logconf was removed with name: {name}")
def __init__(self): """ Intializes the connection. If environment variables for remote transcode are defined, connect to that cluster. """ host = os.getenv('REMOTE_TRANSCODE_HOST') port = os.getenv('REMOTE_TRANSCODE_PORT') token = os.getenv('REMOTE_TRANSCODE_TOKEN') cert = os.getenv('REMOTE_TRANSCODE_CERT') if host: conf = Configuration() conf.api_key['authorization'] = token conf.host = f'https://{host}:{port}' conf.verify_ssl = True conf.ssl_ca_cert = cert api_client = ApiClient(conf) self.corev1 = CoreV1Api(api_client) self.custom = CustomObjectsApi(api_client) else: load_incluster_config() self.corev1 = CoreV1Api() self.custom = CustomObjectsApi() self.setup_common_steps()
def _delete_custom_objects( # type: ignore group: str, version: str, plural: str, namespace: str, logger: kopf.Logger, use_async=True, **_: Any ): logger.info(f"Deleting {plural}.{group} in ns {namespace}") co = CustomObjectsApi() try: resp = co.delete_collection_namespaced_custom_object( group=group, version=version, namespace=namespace, plural=plural, grace_period_seconds=0, propagation_policy="Background", pretty="true", async_req=use_async, body=V1DeleteOptions(), ) return resp except ApiException as e: logger.warn("calling CustomObjectsApi->delete_collection_namespaced_custom_object: %s\n" % e) logger.warn("Assume it did not exist")
def create_policy_from_yaml(custom_objects: CustomObjectsApi, yaml_manifest, namespace) -> str: """ Create a Policy based on yaml file. :param custom_objects: CustomObjectsApi :param yaml_manifest: an absolute path to file :param namespace: :return: str """ print("Create a Policy:") with open(yaml_manifest) as f: dep = yaml.safe_load(f) try: custom_objects.create_namespaced_custom_object("k8s.nginx.org", "v1alpha1", namespace, "policies", dep) print(f"Policy created with name '{dep['metadata']['name']}'") return dep["metadata"]["name"] except ApiException as ex: logging.exception( f"Exception: {ex} occured while creating Policy: {dep['metadata']['name']}" ) raise
def list_model_builders( namespace: str, project_name: str, project_version: str, client: kubeclient.CustomObjectsApi = None, selectors: Optional[Dict[str, str]] = None, ): """ Get a list of pods which were responsible for building models for a given project and version Parameters ---------- namespace: str Namespace to operate in project_name: str Project name project_version: str Project version client: kubernetes.client.CustomObjectApi The client to use in selecting custom objects from Kubernetes selectors: Optional[Dict[str, str]] A mapping of key value pairs representing the label in the workflow to match to a value, if not set then match on project_name and project_version """ # Set default selectors if not provided. if selectors is None: selectors = { "applications.gordo.equinor.com/project-name": project_name, "applications.gordo.equinor.com/project-version": project_version, } selectors.update({"app": "gordo-model-builder"}) if client is None: load_config() client = kubeclient.CoreV1Api() return [ ModelBuilderPod(pod, client=client) for pod in client.list_namespaced_pod( namespace=namespace, label_selector=",".join(f"{k}={v}" for k, v in selectors.items()), ).items ]
def execute(self, context): # initialize config try: config = kube_config.load_incluster_config() except: config = kube_config.load_kube_config() # create an instance of the API class api_instance = CustomObjectsApi(ApiClient(config)) # params to create custom object params = [self.group, self.version, self.namespace, self.plural] crd_created = self.create_custom_definition(api_instance, *params) if crd_created: w = Watch() for event in w.stream(api_instance.list_namespaced_custom_object, *params, timeout_seconds=self.timeout): job_name = event.get('object', {}).get('metadata', {}).get('name') job_state = event.get('object', {}).get('status', {}).get('applicationState', {}).get('state') if job_name == self.job_name and job_state == "COMPLETED": break
def k8s_api_client(client, cluster_name, kube_path=None): kube_path = None if kube_path is not None: kube_file = open(kube_path, "r") kube_config = kube_file.read() kube_file.close() else: cluster = client.rancher_api_client().by_id_cluster(cluster_name) kube_config = cluster.generateKubeconfig().config loader = KubeConfigLoader(config_dict=yaml.full_load(kube_config)) client_configuration = type.__call__(Configuration) loader.load_and_set(client_configuration) client_configuration.api_key = {} client_configuration.verify_ssl = False k8s_client = ApiClient(configuration=client_configuration) return CustomObjectsApi(api_client=k8s_client).api_client
def create_kubeconfig(request, dind_cc, client): # request cluster scoped kubeconfig, permissions may not be synced yet def generateKubeconfig(max_attempts=5): for attempt in range(1, max_attempts + 1): try: # get cluster for client cluster = client.by_id_cluster(dind_cc.cluster.id) return cluster.generateKubeconfig()['config'] except ApiError as err: if attempt == max_attempts: raise err time.sleep(1) cluster_kubeconfig = generateKubeconfig() # write cluster scoped kubeconfig cluster_kubeconfig_file = "kubeconfig-" + random_str() + ".yml" f = open(cluster_kubeconfig_file, "w") f.write(cluster_kubeconfig) f.close() # cleanup file when done request.addfinalizer(lambda: os.remove(cluster_kubeconfig_file)) # extract token name config = yaml.safe_load(cluster_kubeconfig) token_name = config['users'][0]['user']['token'].split(':')[0] # wait for token to sync crd_client = CustomObjectsApi( kubernetes_api_client(dind_cc.admin_mc.client, dind_cc.cluster.id)) def cluster_token_available(): try: return crd_client.get_namespaced_custom_object( 'cluster.cattle.io', 'v3', 'cattle-system', 'clusterauthtokens', token_name) except ApiException: return None wait_for(cluster_token_available) return cluster_kubeconfig_file
def read_ap_crd(custom_objects: CustomObjectsApi, namespace, plural, name) -> object: """ Get AppProtect CRD information (kubectl describe output) :param custom_objects: CustomObjectsApi :param namespace: The custom resource's namespace :param plural: the custom resource's plural name :param name: the custom object's name :return: object """ print(f"Getting info for {name} in namespace {namespace}") try: response = custom_objects.get_namespaced_custom_object( "appprotect.f5.com", "v1beta1", namespace, plural, name ) return response except ApiException: logging.exception(f"Exception occurred while reading CRD") raise
def read_policy(custom_objects: CustomObjectsApi, namespace, name) -> object: """ Get policy information (kubectl describe output) :param custom_objects: CustomObjectsApi :param namespace: The policy's namespace :param name: policy's name :return: object """ print(f"Getting info for policy {name} in namespace {namespace}") try: response = custom_objects.get_namespaced_custom_object( "k8s.nginx.org", "v1", namespace, "policies", name) pprint(response) return response except ApiException: logging.exception(f"Exception occurred while reading Policy") raise
def _create_custom_object_with_plural(custom_object: Manifest, plural: str): logging.info("Creating %s %r ", custom_object.body["kind"], custom_object.name) try: group, version = custom_object.body.get("apiVersion").rsplit("/", 1) return CustomObjectsApi().create_namespaced_custom_object( namespace=custom_object.namespace, body=custom_object.body, group=group, version=version, plural=plural, ) except ApiException as err: logging.error( "Failed to create %s %r: %s", custom_object.body["kind"], custom_object.name, err.reason, ) raise
def read_custom_resource_v1alpha1(custom_objects: CustomObjectsApi, namespace, plural, name) -> object: """ Get CRD information (kubectl describe output) :param custom_objects: CustomObjectsApi :param namespace: The custom resource's namespace :param plural: the custom resource's plural name :param name: the custom object's name :return: object """ print(f"Getting info for v1alpha1 crd {name} in namespace {namespace}") try: response = custom_objects.get_namespaced_custom_object( "k8s.nginx.org", "v1alpha1", namespace, plural, name) pprint(response) return response except ApiException: logging.exception(f"Exception occurred while reading CRD") raise
def list( cls, crd_api: CustomObjectsApi, cluster_deployment: 'ClusterDeployment', ) -> List['Agent']: resources = crd_api.list_namespaced_custom_object( group=cls._api_group, version=cls._version, plural=cls._plural, namespace=cluster_deployment.ref.namespace, ) assigned_agents = [] for item in resources.get('items', []): assigned_cluster_ref = ObjectReference( name=item['spec']['clusterDeploymentName']['name'], namespace=item['spec']['clusterDeploymentName']['namespace']) if assigned_cluster_ref == cluster_deployment.ref: assigned_agents.append( cls(kube_api_client=cluster_deployment.crd_api.api_client, name=item['metadata']['name'], namespace=item['metadata']['namespace'])) return assigned_agents
def _cleanup(resource): k8s_v1beta1_client = ApiextensionsV1beta1Api(admin_mc.k8s_client) k8s_client = CustomObjectsApi(admin_mc.k8s_client) def clean(): kind = resource["kind"] metadata = resource["metadata"] api_version = resource["apiVersion"] api_version_parts = api_version.split("/") if len(api_version_parts) != 2: raise ValueError("Error parsing ApiVersion [" + api_version + "]." + "Expected form \"group/version\"" ) group = api_version_parts[0] version = api_version_parts[1] crd_list = k8s_v1beta1_client.\ list_custom_resource_definition().items crd = list(filter(lambda x: x.spec.names.kind == kind and x.spec.group == group and x.spec.version == version, crd_list))[0] try: k8s_client.delete_namespaced_custom_object( group, version, metadata["namespace"], crd.spec.names.plural, metadata["name"], {}) except ApiException as e: body = json.loads(e.body) if body["code"] not in WAIT_HTTP_ERROR_CODES: raise e request.addfinalizer(clean)
def __init__(self, controller: "KubernetesController") -> None: super().__init__(controller) self.api_ex = ApiextensionsV1Api(controller.client) self.api = CustomObjectsApi(controller.client)
class TraefikMiddlewareReconciler(KubernetesObjectReconciler[TraefikMiddleware] ): """Kubernetes Traefik Middleware Reconciler""" def __init__(self, controller: "KubernetesController") -> None: super().__init__(controller) self.api_ex = ApiextensionsV1Api(controller.client) self.api = CustomObjectsApi(controller.client) def _crd_exists(self) -> bool: """Check if the traefik middleware exists""" return bool( len( self.api_ex.list_custom_resource_definition( field_selector=f"metadata.name={CRD_NAME}").items)) def reconcile(self, current: TraefikMiddleware, reference: TraefikMiddleware): super().reconcile(current, reference) if current.spec.forwardAuth.address != reference.spec.forwardAuth.address: raise NeedsUpdate() def get_reference_object(self) -> TraefikMiddleware: """Get deployment object for outpost""" if not ProxyProvider.objects.filter( outpost__in=[self.controller.outpost], forward_auth_mode=True, ).exists(): self.logger.debug("No providers with forward auth enabled.") raise Disabled() if not self._crd_exists(): self.logger.debug("CRD doesn't exist") raise Disabled() return TraefikMiddleware( apiVersion=f"{CRD_GROUP}/{CRD_VERSION}", kind="Middleware", metadata=TraefikMiddlewareMetadata( name=self.name, namespace=self.namespace, labels=self.get_object_meta().labels, ), spec= TraefikMiddlewareSpec(forwardAuth=TraefikMiddlewareSpecForwardAuth( address= f"http://{self.name}.{self.namespace}:4180/akprox/auth?traefik", authResponseHeaders=[ "Set-Cookie", "X-Auth-Username", "X-Forwarded-Email", "X-Forwarded-Preferred-Username", "X-Forwarded-User", ], trustForwardHeader=True, )), ) def create(self, reference: TraefikMiddleware): return self.api.create_namespaced_custom_object( group=CRD_GROUP, version=CRD_VERSION, plural=CRD_PLURAL, namespace=self.namespace, body=asdict(reference), field_manager=FIELD_MANAGER, ) def delete(self, reference: TraefikMiddleware): return self.api.delete_namespaced_custom_object( group=CRD_GROUP, version=CRD_VERSION, namespace=self.namespace, plural=CRD_PLURAL, name=self.name, ) def retrieve(self) -> TraefikMiddleware: return from_dict( TraefikMiddleware, self.api.get_namespaced_custom_object( group=CRD_GROUP, version=CRD_VERSION, namespace=self.namespace, plural=CRD_PLURAL, name=self.name, ), ) def update(self, current: TraefikMiddleware, reference: TraefikMiddleware): return self.api.patch_namespaced_custom_object( group=CRD_GROUP, version=CRD_VERSION, namespace=self.namespace, plural=CRD_PLURAL, name=self.name, body=asdict(reference), field_manager=FIELD_MANAGER, )
class ClusterDeployment(BaseCustomResource): """ A CRD that represents cluster in assisted-service. On creation the cluster will be registered to the service. On deletion it will be unregistered from the service. When has sufficient data installation will start automatically. """ _plural = "clusterdeployments" _platform_field = {"platform": {"agentBareMetal": {"agentSelector": {}}}} def __init__( self, kube_api_client: ApiClient, name: str, namespace: str = env_variables["namespace"], ): super().__init__(name, namespace) self.crd_api = CustomObjectsApi(kube_api_client) def create_from_yaml(self, yaml_data: dict) -> None: self.crd_api.create_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, body=yaml_data, namespace=self.ref.namespace, ) logger.info("created cluster deployment %s: %s", self.ref, pformat(yaml_data)) def create( self, secret: Secret, base_domain: str = env_variables["base_domain"], agent_cluster_install_ref: Optional[ObjectReference] = None, **kwargs, ): body = { "apiVersion": f"{HIVE_API_GROUP}/{HIVE_API_VERSION}", "kind": "ClusterDeployment", "metadata": self.ref.as_dict(), "spec": { "clusterName": self.ref.name, "baseDomain": base_domain, "pullSecretRef": secret.ref.as_dict(), } } body["spec"].update(self._platform_field) if agent_cluster_install_ref: body["spec"]["clusterInstallRef"] = agent_cluster_install_ref.as_dict() body["spec"].update(kwargs) self.crd_api.create_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, body=body, namespace=self.ref.namespace, ) logger.info("created cluster deployment %s: %s", self.ref, pformat(body)) def patch( self, secret: Optional[Secret] = None, **kwargs, ) -> None: body = {"spec": kwargs} body["spec"]["platform"] = {"agentBareMetal": {}} spec = body["spec"] body["spec"].update(self._platform_field) if secret: spec["pullSecretRef"] = secret.ref.as_dict() if "agent_cluster_install_ref" in kwargs: spec["clusterInstallRef"] = kwargs["agent_cluster_install_ref"].as_dict() self.crd_api.patch_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, body=body, ) logger.info("patching cluster deployment %s: %s", self.ref, pformat(body)) def annotate_install_config(self, install_config: str) -> None: body = {"metadata": {"annotations": {f"{CRD_API_GROUP}/install-config-overrides": install_config}}} self.crd_api.patch_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, body=body, ) logger.info("patching cluster install config %s: %s", self.ref, pformat(body)) def get(self) -> dict: return self.crd_api.get_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, ) def delete(self) -> None: self.crd_api.delete_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, ) logger.info("deleted cluster deployment %s", self.ref) def status( self, timeout: Union[int, float] = DEFAULT_WAIT_FOR_CRD_STATUS_TIMEOUT, ) -> dict: """ Status is a section in the CRD that is created after registration to assisted service and it defines the observed state of ClusterDeployment. Since the status key is created only after resource is processed by the controller in the service, it might take a few seconds before appears. """ def _attempt_to_get_status() -> dict: return self.get()["status"] return waiting.wait( _attempt_to_get_status, sleep_seconds=0.5, timeout_seconds=timeout, waiting_for=f"cluster {self.ref} status", expected_exceptions=KeyError, ) def condition( self, cond_type, timeout: Union[int, float] = DEFAULT_WAIT_FOR_CRD_STATE_TIMEOUT, ) -> Tuple[Optional[str], Optional[str]]: for condition in self.status(timeout).get("conditions", []): if cond_type == condition.get("type"): return condition.get("status"), condition.get("reason") return None, None def wait_for_condition( self, cond_type: str, required_status: str, required_reason: Optional[str] = None, timeout: Union[int, float] = DEFAULT_WAIT_FOR_CRD_STATE_TIMEOUT, ) -> None: def _has_required_condition() -> Optional[bool]: status, reason = self.condition(cond_type=cond_type, timeout=0.5) if status == required_status: if required_reason: return required_reason == reason return True return False logger.info( "Waiting till cluster will be in condition %s with status: %s " "reason: %s", cond_type, required_status, required_reason ) waiting.wait( _has_required_condition, timeout_seconds=timeout, waiting_for=f"cluster {self.ref} condition {cond_type} to be in {required_status}", expected_exceptions=waiting.exceptions.TimeoutExpired, ) def list_agents(self) -> List[Agent]: return Agent.list(self.crd_api, self) def wait_for_agents( self, num_agents: int = 1, timeout: Union[int, float] = DEFAULT_WAIT_FOR_AGENTS_TIMEOUT, ) -> List[Agent]: def _wait_for_sufficient_agents_number() -> List[Agent]: agents = self.list_agents() return agents if len(agents) == num_agents else [] return waiting.wait( _wait_for_sufficient_agents_number, sleep_seconds=0.5, timeout_seconds=timeout, waiting_for=f"cluster {self.ref} to have {num_agents} agents", )
class Agent(BaseCustomResource): """ A CRD that represents host's agent in assisted-service. When host is registered to the cluster the service will create an Agent resource and assign it to the relevant cluster. In oder to start the installation, all assigned agents must be approved. """ _plural = "agents" def __init__( self, kube_api_client: ApiClient, name: str, namespace: str = consts.DEFAULT_NAMESPACE, ): super().__init__(name, namespace) self.crd_api = CustomObjectsApi(kube_api_client) @classmethod def list( cls, crd_api: CustomObjectsApi, cluster_deployment: "ClusterDeployment", ) -> List["Agent"]: resources = crd_api.list_namespaced_custom_object( group=CRD_API_GROUP, version=CRD_API_VERSION, plural=cls._plural, namespace=cluster_deployment.ref.namespace, ) assigned_agents = [] for item in resources.get("items", []): assigned_cluster_ref = ObjectReference( name=item["spec"]["clusterDeploymentName"]["name"], namespace=item["spec"]["clusterDeploymentName"]["namespace"], ) if assigned_cluster_ref == cluster_deployment.ref: assigned_agents.append( cls( kube_api_client=cluster_deployment.crd_api.api_client, name=item["metadata"]["name"], namespace=item["metadata"]["namespace"], )) return assigned_agents def create(self): raise RuntimeError( "agent resource must be created by the assisted-installer operator" ) def get(self) -> dict: return self.crd_api.get_namespaced_custom_object( group=CRD_API_GROUP, version=CRD_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, ) def patch(self, **kwargs) -> None: body = {"spec": kwargs} self.crd_api.patch_namespaced_custom_object( group=CRD_API_GROUP, version=CRD_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, body=body, ) logger.info("patching agent %s: %s", self.ref, pformat(body)) def delete(self) -> None: self.crd_api.delete_namespaced_custom_object( group=CRD_API_GROUP, version=CRD_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, ) logger.info("deleted agent %s", self.ref) def status( self, timeout: Union[int, float] = DEFAULT_WAIT_FOR_CRD_STATUS_TIMEOUT) -> dict: def _attempt_to_get_status() -> dict: return self.get()["status"] return waiting.wait( _attempt_to_get_status, sleep_seconds=0.5, timeout_seconds=timeout, waiting_for=f"agent {self.ref} status", expected_exceptions=KeyError, ) def approve(self) -> None: self.patch(approved=True) logger.info("approved agent %s", self.ref)
class ClusterDeployment(BaseCustomResource): """ A CRD that represents cluster in assisted-service. On creation the cluster will be registered to the service. On deletion it will be unregistered from the service. When has sufficient data installation will start automatically. """ _plural = 'clusterdeployments' def __init__( self, kube_api_client: ApiClient, name: str, namespace: str = env_variables['namespace'], ): super().__init__(name, namespace) self.crd_api = CustomObjectsApi(kube_api_client) def create_from_yaml(self, yaml_data: dict) -> None: self.crd_api.create_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, body=yaml_data, namespace=self.ref.namespace, ) logger.info('created cluster deployment %s: %s', self.ref, pformat(yaml_data)) def create( self, platform: Platform, install_strategy: InstallStrategy, secret: Secret, base_domain: str = env_variables['base_domain'], **kwargs, ): body = { 'apiVersion': f'{HIVE_API_GROUP}/{HIVE_API_VERSION}', 'kind': 'ClusterDeployment', 'metadata': self.ref.as_dict(), 'spec': { 'clusterName': self.ref.name, 'baseDomain': base_domain, 'platform': platform.as_dict(), 'provisioning': { 'installStrategy': install_strategy.as_dict() }, 'pullSecretRef': secret.ref.as_dict(), } } body['spec'].update(kwargs) self.crd_api.create_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, body=body, namespace=self.ref.namespace, ) logger.info('created cluster deployment %s: %s', self.ref, pformat(body)) def patch( self, platform: Optional[Platform] = None, install_strategy: Optional[InstallStrategy] = None, secret: Optional[Secret] = None, **kwargs, ) -> None: body = {'spec': kwargs} spec = body['spec'] if platform: spec['platform'] = platform.as_dict() if install_strategy: spec['provisioning'] = { 'installStrategy': install_strategy.as_dict() } if secret: spec['pullSecretRef'] = secret.ref.as_dict() self.crd_api.patch_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, body=body, ) logger.info('patching cluster deployment %s: %s', self.ref, pformat(body)) def annotate_install_config(self, install_config: str) -> None: body = { 'metadata': { 'annotations': { f'{CRD_API_GROUP}/install-config-overrides': install_config } } } self.crd_api.patch_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, body=body, ) logger.info('patching cluster install config %s: %s', self.ref, pformat(body)) def get(self) -> dict: return self.crd_api.get_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, ) def delete(self) -> None: self.crd_api.delete_namespaced_custom_object( group=HIVE_API_GROUP, version=HIVE_API_VERSION, plural=self._plural, name=self.ref.name, namespace=self.ref.namespace, ) logger.info('deleted cluster deployment %s', self.ref) def status( self, timeout: Union[int, float] = DEFAULT_WAIT_FOR_CRD_STATUS_TIMEOUT, ) -> dict: """ Status is a section in the CRD that is created after registration to assisted service and it defines the observed state of ClusterDeployment. Since the status key is created only after resource is processed by the controller in the service, it might take a few seconds before appears. """ def _attempt_to_get_status() -> dict: return self.get()['status'] return waiting.wait( _attempt_to_get_status, sleep_seconds=0.5, timeout_seconds=timeout, waiting_for=f'cluster {self.ref} status', expected_exceptions=KeyError, ) def state( self, timeout: Union[int, float] = DEFAULT_WAIT_FOR_CRD_STATE_TIMEOUT, ) -> Tuple[str, str]: state, state_info = None, None for condition in self.status(timeout).get('conditions', []): reason = condition.get('reason') if reason == 'AgentPlatformState': state = condition.get('message') elif reason == 'AgentPlatformStateInfo': state_info = condition.get('message') if state and state_info: break return state, state_info def wait_for_state( self, required_state: str, timeout: Union[int, float] = DEFAULT_WAIT_FOR_CRD_STATE_TIMEOUT, *, raise_on_states: Iterable[str] = FAILURE_STATES, ) -> None: required_state = required_state.lower() raise_on_states = [x.lower() for x in raise_on_states] def _has_required_state() -> Optional[bool]: state, state_info = self.state(timeout=0.5) state = state.lower() if state else state if state == required_state: return True elif state in raise_on_states: raise UnexpectedStateError( f'while waiting for state `{required_state}`, cluster ' f'{self.ref} state changed unexpectedly to `{state}`: ' f'{state_info}') logger.info("Waiting till cluster will be in %s state", required_state) waiting.wait( _has_required_state, timeout_seconds=timeout, waiting_for=f'cluster {self.ref} state to be {required_state}', expected_exceptions=waiting.exceptions.TimeoutExpired, ) def list_agents(self) -> List[Agent]: return Agent.list(self.crd_api, self) def wait_for_agents( self, num_agents: int = 1, timeout: Union[int, float] = DEFAULT_WAIT_FOR_AGENTS_TIMEOUT, ) -> List[Agent]: def _wait_for_sufficient_agents_number() -> List[Agent]: agents = self.list_agents() return agents if len(agents) == num_agents else [] return waiting.wait( _wait_for_sufficient_agents_number, sleep_seconds=0.5, timeout_seconds=timeout, waiting_for=f'cluster {self.ref} to have {num_agents} agents', ) def wait_to_be_installed( self, timeout: Union[int, float] = DEFAULT_WAIT_FOR_INSTALLATION_COMPLETE_TIMEOUT, ) -> None: waiting.wait( lambda: self.get()['spec'].get('installed') is True, timeout_seconds=timeout, waiting_for=f'cluster {self.ref} state installed', expected_exceptions=waiting.exceptions.TimeoutExpired, ) def download_kubeconfig(self, kubeconfig_path): def _get_kubeconfig_secret() -> dict: return self.get( )['spec']['clusterMetadata']['adminKubeconfigSecretRef'] secret_ref = waiting.wait( _get_kubeconfig_secret, sleep_seconds=1, timeout_seconds=240, expected_exceptions=KeyError, waiting_for=f'kubeconfig secret creation for cluster {self.ref}', ) kubeconfig_data = Secret( kube_api_client=self.crd_api.api_client, **secret_ref, ).get().data['kubeconfig'] with open(kubeconfig_path, 'wt') as kubeconfig_file: kubeconfig_file.write(b64decode(kubeconfig_data).decode())
def test_legacy_template_migrate_and_delete(admin_mc, admin_cc, remove_resource, user_mc, raw_remove_custom_resource): """Asserts that any node template not in cattle-global-nt namespace is duplicated into cattle-global-nt, then deleted. Also, asserts that operations on legacy node templates are forwarded to corresponding migrated node templates""" admin_client = admin_mc.client admin_cc_client = admin_cc.client user_client = user_mc.client k8s_dynamic_client = CustomObjectsApi(admin_mc.k8s_client) ns = admin_cc_client.create_namespace(name="ns-" + random_str(), clusterId=admin_cc.cluster.id) remove_resource(ns) node_template_name = "nt-" + random_str() body = { "metadata": { "name": node_template_name, "annotations": { "field.cattle.io/creatorId": user_mc.user.id } }, "kind": "NodeTemplate", "apiVersion": "management.cattle.io/v3", "azureConfig": { "customData": "asdfsadfsd" } } # create a node template that will be recognized as legacy dynamic_nt = k8s_dynamic_client.create_namespaced_custom_object( "management.cattle.io", "v3", ns.name, 'nodetemplates', body) raw_remove_custom_resource(dynamic_nt) def migrated_template_exists(id): try: nt = user_client.by_id_node_template(id=id) remove_resource(nt) return nt except ApiError as e: assert e.error.status == 403 return False id = "cattle-global-nt:nt-" + ns.id + "-" + dynamic_nt["metadata"]["name"] legacy_id = dynamic_nt["metadata"]["name"] legacy_ns = dynamic_nt["metadata"]["namespace"] full_legacy_id = legacy_ns + ":" + legacy_id # wait for node template to be migrated nt = wait_for( lambda: migrated_template_exists(id), fail_handler=lambda: "failed waiting for node template to migrate") # assert that config has not been removed from node template assert nt.azureConfig["customData"] ==\ dynamic_nt["azureConfig"]["customData"] def legacy_template_deleted(): try: k8s_dynamic_client.get_namespaced_custom_object( "management.cattle.io", "v3", ns.name, 'nodetemplates', legacy_id) return False except ApiException as e: return e.status == 404 # wait for legacy node template to be deleted wait_for( lambda: legacy_template_deleted(), fail_handler=lambda: "failed waiting for old node template to delete") # retrieve node template via legacy id nt = admin_client.by_id_node_template(id=full_legacy_id) # retrieve node template via migrated id migrated_nt = admin_client.by_id_node_template(id=id) def compare(d1, d2): if d1 == d2: return True if d1.keys() != d2.keys(): return False for key in d1.keys(): if key in ["id", "namespace", "links", "annotations"]: continue if d1[key] == d2[key]: continue if callable(d1[key]): continue if isinstance(d1[key], RestObject): if compare(d1[key], d1[key]): continue return False return True # ensure templates returned are identical aside from fields containing # id/ns if not compare(nt, migrated_nt): raise Exception("forwarded does not match migrated nodetemplate") nt.azureConfig.customData = "asdfasdf" new_config = nt.azureConfig new_config.customData = "adsfasdfadsf" # update node template via legacy id nt = admin_client.update_by_id_node_template(id=full_legacy_id, azureConfig=new_config) # assert node template is being updated assert nt.azureConfig.customData == new_config.customData nt2 = admin_client.by_id_node_template(id=id) # assert node template being updated is migrated node template assert nt2.azureConfig.customData == new_config.customData # delete node template via legacy id admin_client.delete(nt) wait_for(lambda: admin_client.by_id_node_template(id) is None, fail_handler=lambda: "failed waiting for migrate node template to delete")
class TatorTranscode(JobManagerMixin): """ Interface to kubernetes REST API for starting transcodes. """ def __init__(self): """ Intializes the connection. If environment variables for remote transcode are defined, connect to that cluster. """ host = os.getenv('REMOTE_TRANSCODE_HOST') port = os.getenv('REMOTE_TRANSCODE_PORT') token = os.getenv('REMOTE_TRANSCODE_TOKEN') cert = os.getenv('REMOTE_TRANSCODE_CERT') self.remote = host is not None if self.remote: conf = Configuration() conf.api_key['authorization'] = token conf.host = f'https://{host}:{port}' conf.verify_ssl = True conf.ssl_ca_cert = cert api_client = ApiClient(conf) self.corev1 = CoreV1Api(api_client) self.custom = CustomObjectsApi(api_client) else: load_incluster_config() self.corev1 = CoreV1Api() self.custom = CustomObjectsApi() self.setup_common_steps() def setup_common_steps(self): """ Sets up the basic steps for a transcode pipeline. """ def spell_out_params(params): yaml_params = [{"name": x} for x in params] return yaml_params # Define each task in the pipeline. # Deletes the remote TUS file self.delete_task = { 'name': 'delete', 'metadata': { 'labels': {'app': 'transcoder'}, }, 'inputs': {'parameters' : spell_out_params(['url'])}, 'nodeSelector' : {'cpuWorker' : 'yes'}, 'container': { 'image': '{{workflow.parameters.client_image}}', 'imagePullPolicy': 'IfNotPresent', 'command': ['curl',], 'args': ['-X', 'DELETE', '{{inputs.parameters.url}}'], 'resources': { 'limits': { 'memory': '1Gi', 'cpu': '250m', }, }, }, } # Unpacks a tarball and sets up the work products for follow up # dags or steps unpack_params = [{'name': f'videos-{x}', 'valueFrom': {'path': f'/work/videos_{x}.json'}} for x in range(NUM_WORK_PACKETS)] # TODO: Don't make work packets for localizations / states unpack_params.extend([{'name': f'localizations-{x}', 'valueFrom': {'path': f'/work/localizations_{x}.json'}} for x in range(NUM_WORK_PACKETS)]) unpack_params.extend([{'name': f'states-{x}', 'valueFrom': {'path': f'/work/states_{x}.json'}} for x in range(NUM_WORK_PACKETS)]) self.unpack_task = { 'name': 'unpack', 'metadata': { 'labels': {'app': 'transcoder'}, }, 'inputs': {'parameters' : spell_out_params(['original'])}, 'outputs': {'parameters' : unpack_params}, 'nodeSelector' : {'cpuWorker' : 'yes'}, 'container': { 'image': '{{workflow.parameters.client_image}}', 'imagePullPolicy': 'IfNotPresent', 'command': ['bash',], 'args': ['unpack.sh', '{{inputs.parameters.original}}', '/work'], 'volumeMounts': [{ 'name': 'transcode-scratch', 'mountPath': '/work', }], 'resources': { 'limits': { 'memory': '4Gi', 'cpu': '1000m', }, }, }, } self.data_import = { 'name': 'data-import', 'inputs': {'parameters' : spell_out_params(['md5', 'file', 'mode'])}, 'nodeSelector' : {'cpuWorker' : 'yes'}, 'container': { 'image': '{{workflow.parameters.client_image}}', 'imagePullPolicy': 'IfNotPresent', 'command': ['python3',], 'args': ['importDataFromCsv.py', '--host', '{{workflow.parameters.host}}', '--token', '{{workflow.parameters.token}}', '--project', '{{workflow.parameters.project}}', '--mode', '{{inputs.parameters.mode}}', '--media-md5', '{{inputs.parameters.md5}}', '{{inputs.parameters.file}}'], 'volumeMounts': [{ 'name': 'transcode-scratch', 'mountPath': '/work', }], 'resources': { 'limits': { 'memory': '4Gi', 'cpu': '1000m', }, }, }, } self.prepare_task = { 'name': 'prepare', 'metadata': { 'labels': {'app': 'transcoder'}, }, 'retryStrategy': { 'retryPolicy': 'Always', 'limit': 3, 'backoff': { 'duration': '5s', 'factor': 2 }, }, 'nodeSelector' : {'cpuWorker' : 'yes'}, 'container': { 'image': '{{workflow.parameters.client_image}}', 'imagePullPolicy': 'IfNotPresent', 'command': ['python3',], 'args': ['-m', 'tator.transcode.prepare', '--url', '{{workflow.parameters.url}}', '--work_dir', '/work', '--host', '{{workflow.parameters.host}}', '--token', '{{workflow.parameters.token}}', '--project', '{{workflow.parameters.project}}', '--type', '{{workflow.parameters.type}}', '--name', '{{workflow.parameters.upload_name}}', '--section', '{{workflow.parameters.section}}', '--gid', '{{workflow.parameters.gid}}', '--uid', '{{workflow.parameters.uid}}', '--attributes', '{{workflow.parameters.attributes}}', '--media_id', '{{workflow.parameters.media_id}}', ], 'workingDir': '/scripts', 'volumeMounts': [{ 'name': 'scratch-prepare', 'mountPath': '/work', }], 'resources': { 'limits': { 'memory': os.getenv('TRANSCODER_MEMORY_LIMIT'), 'cpu': os.getenv('TRANSCODER_CPU_LIMIT'), }, }, }, 'outputs': { 'parameters': [{ 'name': 'workloads', 'valueFrom': {'path': '/work/workloads.json'}, }, { 'name': 'media_id', 'valueFrom': {'path': '/work/media_id.txt'}, }], }, } self.transcode_task = { 'name': 'transcode', 'metadata': { 'labels': {'app': 'transcoder'}, }, 'retryStrategy': { 'retryPolicy': 'Always', 'limit': 3, 'backoff': { 'duration': '5s', 'factor': 2 }, }, 'nodeSelector' : {'cpuWorker' : 'yes'}, 'inputs': {'parameters' : spell_out_params(['original', 'transcoded', 'media', 'category', 'raw_width', 'raw_height', 'configs', 'id'])}, 'container': { 'image': '{{workflow.parameters.client_image}}', 'imagePullPolicy': 'IfNotPresent', 'command': ['python3',], 'args': ['-m', 'tator.transcode.transcode', '--url', '{{workflow.parameters.url}}', '--work_dir', '/work', '--host', '{{workflow.parameters.host}}', '--token', '{{workflow.parameters.token}}', '--media', '{{inputs.parameters.media}}', '--category', '{{inputs.parameters.category}}', '--raw_width', '{{inputs.parameters.raw_width}}', '--raw_height', '{{inputs.parameters.raw_height}}', '--configs', '{{inputs.parameters.configs}}'], 'workingDir': '/scripts', 'volumeMounts': [{ 'name': 'scratch-{{inputs.parameters.id}}', 'mountPath': '/work', }], 'resources': { 'limits': { 'memory': os.getenv('TRANSCODER_MEMORY_LIMIT'), 'cpu': os.getenv('TRANSCODER_CPU_LIMIT'), }, }, }, } self.image_upload_task = { 'name': 'image-upload', 'metadata': { 'labels': {'app': 'transcoder'}, }, 'retryStrategy': { 'retryPolicy': 'Always', 'limit': 3, 'backoff': { 'duration': '5s', 'factor': 2 }, }, 'nodeSelector' : {'cpuWorker' : 'yes'}, 'container': { 'image': '{{workflow.parameters.client_image}}', 'imagePullPolicy': 'IfNotPresent', 'command': ['python3',], 'args': [ 'imageLoop.py', '--host', '{{workflow.parameters.host}}', '--token', '{{workflow.parameters.token}}', '--project', '{{workflow.parameters.project}}', '--gid', '{{workflow.parameters.gid}}', '--uid', '{{workflow.parameters.uid}}', # TODO: If we made section a DAG argument, we could # conceviably import a tar across multiple sections '--section', '{{workflow.parameters.section}}', '--progressName', '{{workflow.parameters.upload_name}}', ], 'workingDir': '/scripts', 'volumeMounts': [{ 'name': 'transcode-scratch', 'mountPath': '/work', }], 'resources': { 'limits': { 'memory': '1Gi', 'cpu': '250m', }, }, }, } def get_download_task(self, headers=[]): # Download task exports the human readable filename a # workflow global to support the onExit handler return { 'name': 'download', 'metadata': { 'labels': {'app': 'transcoder'}, }, 'retryStrategy': { 'retryPolicy': 'Always', 'limit': 3, 'backoff': { 'duration': '5s', 'factor': 2 }, }, 'inputs': {'parameters' : [{'name': 'original'}, {'name': 'url'}]}, 'nodeSelector' : {'cpuWorker' : 'yes'}, 'container': { 'image': '{{workflow.parameters.client_image}}', 'imagePullPolicy': 'IfNotPresent', 'command': ['wget',], 'args': ['-O', '{{inputs.parameters.original}}'] + headers + \ ['{{inputs.parameters.url}}'], 'volumeMounts': [{ 'name': 'transcode-scratch', 'mountPath': '/work', }], 'resources': { 'limits': { 'memory': '4Gi', 'cpu': '1000m', }, }, }, } def get_unpack_and_transcode_tasks(self, paths, url): """ Generate a task object describing the dependencies of a transcode from tar""" # Generate an args structure for the DAG args = [{'name': 'url', 'value': url}] for key in paths: args.append({'name': key, 'value': paths[key]}) parameters = {"parameters" : args} def make_item_arg(name): return {'name': name, 'value': f'{{{{item.{name}}}}}'} instance_args = ['entity_type', 'name', 'md5'] item_parameters = {"parameters" : [make_item_arg(x) for x in instance_args]} # unpack work list item_parameters["parameters"].append({"name": "url", "value": "None"}) item_parameters["parameters"].append({"name": "original", "value": "{{item.dirname}}/{{item.name}}"}) item_parameters["parameters"].append({"name": "transcoded", "value": "{{item.dirname}}/{{item.base}}_transcoded"}) item_parameters["parameters"].append({"name": "thumbnail", "value": "{{item.dirname}}/{{item.base}}_thumbnail.jpg"}) item_parameters["parameters"].append({"name": "thumbnail_gif", "value": "{{item.dirname}}/{{item.base}}_thumbnail_gif.gif"}) item_parameters["parameters"].append({"name": "segments", "value": "{{item.dirname}}/{{item.base}}_segments.json"}) state_import_parameters = {"parameters" : [make_item_arg(x) for x in ["md5", "file"]]} localization_import_parameters = {"parameters" : [make_item_arg(x) for x in ["md5", "file"]]} state_import_parameters["parameters"].append({"name": "mode", "value": "state"}) localization_import_parameters["parameters"].append({"name": "mode", "value": "localizations"}) unpack_task = { 'name': 'unpack-pipeline', 'metadata': { 'labels': {'app': 'transcoder'}, }, 'dag': { # First download, unpack and delete archive. Then Iterate over each video and upload # Lastly iterate over all localization and state files. 'tasks' : [{'name': 'download-task', 'template': 'download', 'arguments': parameters}, {'name': 'unpack-task', 'template': 'unpack', 'arguments': parameters, 'dependencies' : ['download-task']}, {'name': 'delete-task', 'template': 'delete', 'arguments': parameters, 'dependencies' : ['unpack-task']} ] } } # end of dag unpack_task['dag']['tasks'].extend([{'name': f'transcode-task-{x}', 'template': 'transcode-pipeline', 'arguments' : item_parameters, 'withParam' : f'{{{{tasks.unpack-task.outputs.parameters.videos-{x}}}}}', 'dependencies' : ['unpack-task']} for x in range(NUM_WORK_PACKETS)]) unpack_task['dag']['tasks'].append({'name': f'image-upload-task', 'template': 'image-upload', 'dependencies' : ['unpack-task']}) deps = [f'transcode-task-{x}' for x in range(NUM_WORK_PACKETS)] deps.append('image-upload-task') unpack_task['dag']['tasks'].extend([{'name': f'state-import-task-{x}', 'template': 'data-import', 'arguments' : state_import_parameters, 'dependencies' : deps, 'withParam': f'{{{{tasks.unpack-task.outputs.parameters.states-{x}}}}}'} for x in range(NUM_WORK_PACKETS)]) unpack_task['dag']['tasks'].extend([{'name': f'localization-import-task-{x}', 'template': 'data-import', 'arguments' : localization_import_parameters, 'dependencies' : deps, 'withParam': f'{{{{tasks.unpack-task.outputs.parameters.localizations-{x}}}}}'} for x in range(NUM_WORK_PACKETS)]) return unpack_task def get_transcode_dag(self, media_id=None): """ Return the DAG that describes transcoding a single media file """ def make_passthrough_arg(name): return {'name': name, 'value': f'{{{{inputs.parameters.{name}}}}}'} instance_args = ['url', 'original', 'transcoded', 'thumbnail', 'thumbnail_gif', 'segments', 'entity_type', 'name', 'md5'] passthrough_parameters = {"parameters" : [make_passthrough_arg(x) for x in instance_args]} pipeline_task = { 'name': 'transcode-pipeline', 'metadata': { 'labels': {'app': 'transcoder'}, }, 'inputs': passthrough_parameters, 'dag': { 'tasks': [{ 'name': 'prepare-task', 'template': 'prepare', }, { 'name': 'transcode-task', 'template': 'transcode', 'arguments': { 'parameters': passthrough_parameters['parameters'] + [{ 'name': 'category', 'value': '{{item.category}}', }, { 'name': 'raw_width', 'value': '{{item.raw_width}}', }, { 'name': 'raw_height', 'value': '{{item.raw_height}}', }, { 'name': 'configs', 'value': '{{item.configs}}', }, { 'name': 'id', 'value': '{{item.id}}', }, { 'name': 'media', 'value': '{{tasks.prepare-task.outputs.parameters.media_id}}' \ if media_id is None else str(media_id), }], }, 'dependencies': ['prepare-task'], 'withParam': '{{tasks.prepare-task.outputs.parameters.workloads}}', }], }, } return pipeline_task def get_transcode_task(self, item, url): """ Generate a task object describing the dependencies of a transcode """ # Generate an args structure for the DAG args = [{'name': 'url', 'value': url}] for key in item: args.append({'name': key, 'value': item[key]}) parameters = {"parameters" : args} pipeline = { 'name': 'single-file-pipeline', 'dag': { # First download, unpack and delete archive. Then Iterate over each video and upload # Lastly iterate over all localization and state files. 'tasks' : [{'name': 'transcode-task', 'template': 'transcode-pipeline', 'arguments' : parameters}] } } return pipeline def start_tar_import(self, project, entity_type, token, url, name, section, md5, gid, uid, user, upload_size, attributes): """ Initiate a transcode based on the contents on an archive """ comps = name.split('.') base = comps[0] ext = '.'.join(comps[1:]) if entity_type != -1: raise Exception("entity type is not -1!") pvc_size = os.getenv('TRANSCODER_PVC_SIZE') if upload_size: pvc_size = bytes_to_mi_str(upload_size * 4) args = {'original': '/work/' + name, 'name': name} docker_registry = os.getenv('SYSTEM_IMAGES_REGISTRY') host = f'{PROTO}{os.getenv("MAIN_HOST")}' global_args = {'upload_name': name, 'url': url, 'host': host, 'rest_url': f'{host}/rest', 'tus_url' : f'{host}/files/', 'project' : str(project), 'type': '-1', 'token' : str(token), 'section' : section, 'gid': gid, 'uid': uid, 'user': str(user), 'client_image' : get_client_image_name(), 'attributes' : json.dumps(attributes), 'media_id': '-1'} global_parameters=[{"name": x, "value": global_args[x]} for x in global_args] pipeline_task = self.get_unpack_and_transcode_tasks(args, url) # Define the workflow spec. manifest = { 'apiVersion': 'argoproj.io/v1alpha1', 'kind': 'Workflow', 'metadata': { 'generateName': 'transcode-workflow-', 'labels': { 'job_type': 'upload', 'project': str(project), 'gid': gid, 'uid': uid, 'user': str(user), }, 'annotations': { 'name': name, 'section': section, }, }, 'spec': { 'entrypoint': 'unpack-pipeline', 'podGC': {'strategy': 'OnPodCompletion'}, 'arguments': {'parameters' : global_parameters}, 'ttlStrategy': {'secondsAfterSuccess': 300, 'secondsAfterFailure': 86400}, 'volumeClaimTemplates': [{ 'metadata': { 'name': 'transcode-scratch', }, 'spec': { 'storageClassName': _select_storage_class(), 'accessModes': [ 'ReadWriteOnce' ], 'resources': { 'requests': { 'storage': pvc_size, } } } }], 'parallelism': 4, 'templates': [ self.prepare_task, self.get_download_task(), self.delete_task, self.transcode_task, self.image_upload_task, self.unpack_task, self.get_transcode_dag(), pipeline_task, self.data_import ], }, } # Create the workflow for num_retries in range(MAX_SUBMIT_RETRIES): try: response = self.custom.create_namespaced_custom_object( group='argoproj.io', version='v1alpha1', namespace='default', plural='workflows', body=manifest, ) break except ApiException: logger.info(f"Failed to submit workflow:") logger.info(f"{manifest}") time.sleep(SUBMIT_RETRY_BACKOFF) if num_retries == (MAX_SUBMIT_RETRIES - 1): raise Exception(f"Failed to submit workflow {MAX_SUBMIT_RETRIES} times!") def start_transcode(self, project, entity_type, token, url, name, section, md5, gid, uid, user, upload_size, attributes, media_id): MAX_WORKLOADS = 7 # 5 resolutions + audio + archival """ Creates an argo workflow for performing a transcode. """ # Define paths for transcode outputs. base, _ = os.path.splitext(name) args = { 'original': '/work/' + name, 'transcoded': '/work/' + base + '_transcoded', 'thumbnail': '/work/' + base + '_thumbnail.jpg', 'thumbnail_gif': '/work/' + base + '_thumbnail_gif.gif', 'segments': '/work/' + base + '_segments.json', 'entity_type': str(entity_type), 'md5' : md5, 'name': name } pvc_size = os.getenv('TRANSCODER_PVC_SIZE') if upload_size: pvc_size = bytes_to_mi_str(upload_size * 4) docker_registry = os.getenv('SYSTEM_IMAGES_REGISTRY') host = f'{PROTO}{os.getenv("MAIN_HOST")}' global_args = {'upload_name': name, 'url': url, 'host': host, 'rest_url': f'{host}/rest', 'tus_url' : f'{host}/files/', 'token' : str(token), 'project' : str(project), 'type': str(entity_type), 'section' : section, 'gid': gid, 'uid': uid, 'user': str(user), 'client_image' : get_client_image_name(), 'attributes' : json.dumps(attributes), 'media_id': '-1' if media_id is None else str(media_id)} global_parameters=[{"name": x, "value": global_args[x]} for x in global_args] pipeline_task = self.get_transcode_task(args, url) # Define the workflow spec. manifest = { 'apiVersion': 'argoproj.io/v1alpha1', 'kind': 'Workflow', 'metadata': { 'generateName': 'transcode-workflow-', 'labels': { 'job_type': 'upload', 'project': str(project), 'gid': gid, 'uid': uid, 'user': str(user), }, 'annotations': { 'name': name, 'section': section, }, }, 'spec': { 'entrypoint': 'single-file-pipeline', 'podGC': {'strategy': 'OnPodCompletion'}, 'arguments': {'parameters' : global_parameters}, 'ttlStrategy': {'secondsAfterSuccess': 300, 'secondsAfterFailure': 86400}, 'volumeClaimTemplates': [{ 'metadata': { 'name': f'scratch-{workload}', }, 'spec': { 'storageClassName': os.getenv('SCRATCH_STORAGE_CLASS'), 'accessModes': [ 'ReadWriteOnce' ], 'resources': { 'requests': { 'storage': pvc_size, } } } } for workload in ['prepare'] + list(range(MAX_WORKLOADS))], 'templates': [ self.prepare_task, self.transcode_task, self.image_upload_task, self.get_transcode_dag(media_id), pipeline_task, ], }, } # Create the workflow response = self.custom.create_namespaced_custom_object( group='argoproj.io', version='v1alpha1', namespace='default', plural='workflows', body=manifest, ) # Cache the job for cancellation/authentication. TatorCache().set_job({'uid': uid, 'gid': gid, 'user': user, 'project': project, 'algorithm': -1, 'datetime': datetime.datetime.utcnow().isoformat() + 'Z'})
class ClusterImageSet(BaseResource): """ A CRD that represents a ClusterImageSet resource that contains the release image URI. Upon creating a cluster deployment, the release image is fetched by the assisted-service from the image set. """ _api_group = "hive.openshift.io" _api_version = "v1" _plural = "clusterimagesets" def __init__( self, kube_api_client: ApiClient, name: str, namespace: str = consts.DEFAULT_NAMESPACE, ): super().__init__(name, namespace) self.crd_api = CustomObjectsApi(kube_api_client) def create_from_yaml(self, yaml_data: dict) -> None: self.crd_api.create_cluster_custom_object( group=self._api_group, version=self._api_version, plural=self._plural, body=yaml_data, ) log.info("created cluster imageset %s: %s", self.ref, pformat(yaml_data)) def create(self, releaseImage: str): body = { "apiVersion": f"{self._api_group}/{self._api_version}", "kind": "ClusterImageSet", "metadata": self.ref.as_dict(), "spec": { "releaseImage": releaseImage, }, } self.crd_api.create_cluster_custom_object( group=self._api_group, version=self._api_version, plural=self._plural, body=body, ) log.info("created cluster imageset %s: %s", self.ref, pformat(body)) def get(self) -> dict: return self.crd_api.get_cluster_custom_object( group=self._api_group, version=self._api_version, plural=self._plural, name=self.ref.name, ) def delete(self) -> None: self.crd_api.delete_cluster_custom_object( group=self._api_group, version=self._api_version, plural=self._plural, name=self.ref.name, ) log.info("deleted cluster imageset %s", self.ref)
def get_podsetting_spec(podsetting_name, team_name): load_kube_config() co = CustomObjectsApi() return co.get_namespaced_custom_object("orbit.aws", "v1", team_name, "podsettings", podsetting_name)
class TatorAlgorithm(JobManagerMixin): """ Interface to kubernetes REST API for starting algorithms. """ def __init__(self, alg): """ Intializes the connection. If algorithm object includes a remote cluster, use that. Otherwise, use this cluster. """ if alg.cluster: host = alg.cluster.host port = alg.cluster.port token = alg.cluster.token fd, cert = tempfile.mkstemp(text=True) with open(fd, 'w') as f: f.write(alg.cluster.cert) conf = Configuration() conf.api_key['authorization'] = token conf.host = f'{PROTO}{host}:{port}' conf.verify_ssl = True conf.ssl_ca_cert = cert api_client = ApiClient(conf) self.corev1 = CoreV1Api(api_client) self.custom = CustomObjectsApi(api_client) else: load_incluster_config() self.corev1 = CoreV1Api() self.custom = CustomObjectsApi() # Read in the manifest. if alg.manifest: self.manifest = yaml.safe_load(alg.manifest.open(mode='r')) # Save off the algorithm. self.alg = alg def start_algorithm(self, media_ids, sections, gid, uid, token, project, user, extra_params: list=[]): """ Starts an algorithm job, substituting in parameters in the workflow spec. """ # Make a copy of the manifest from the database. manifest = copy.deepcopy(self.manifest) # Update the storage class of the spec if executing locally. if self.alg.cluster is None: if 'volumeClaimTemplates' in manifest['spec']: for claim in manifest['spec']['volumeClaimTemplates']: claim['spec']['storageClassName'] = _select_storage_class() logger.warning(f"Implicitly sc to pvc of Algo:{self.alg.pk}") # Add in workflow parameters. manifest['spec']['arguments'] = {'parameters': [ { 'name': 'name', 'value': self.alg.name, }, { 'name': 'media_ids', 'value': media_ids, }, { 'name': 'sections', 'value': sections, }, { 'name': 'gid', 'value': gid, }, { 'name': 'uid', 'value': uid, }, { 'name': 'host', 'value': f'{PROTO}{os.getenv("MAIN_HOST")}', }, { 'name': 'rest_url', 'value': f'{PROTO}{os.getenv("MAIN_HOST")}/rest', }, { 'name': 'rest_token', 'value': str(token), }, { 'name': 'tus_url', 'value': f'{PROTO}{os.getenv("MAIN_HOST")}/files/', }, { 'name': 'project_id', 'value': str(project), }, ]} # Add the non-standard extra parameters if provided # Expected format of extra_params: list of dictionaries with 'name' and 'value' entries # for each of the parameters. e.g. {{'name': 'hello_param', 'value': [1]}} manifest['spec']['arguments']['parameters'].extend(extra_params) # Set labels and annotations for job management if 'labels' not in manifest['metadata']: manifest['metadata']['labels'] = {} if 'annotations' not in manifest['metadata']: manifest['metadata']['annotations'] = {} manifest['metadata']['labels'] = { **manifest['metadata']['labels'], 'job_type': 'algorithm', 'project': str(project), 'gid': gid, 'uid': uid, 'user': str(user), } manifest['metadata']['annotations'] = { **manifest['metadata']['annotations'], 'name': self.alg.name, 'sections': sections, 'media_ids': media_ids, } for num_retries in range(MAX_SUBMIT_RETRIES): try: response = self.custom.create_namespaced_custom_object( group='argoproj.io', version='v1alpha1', namespace='default', plural='workflows', body=manifest, ) break except ApiException: logger.info(f"Failed to submit workflow:") logger.info(f"{manifest}") time.sleep(SUBMIT_RETRY_BACKOFF) if num_retries == (MAX_SUBMIT_RETRIES - 1): raise Exception(f"Failed to submit workflow {MAX_SUBMIT_RETRIES} times!") # Cache the job for cancellation/authentication. TatorCache().set_job({'uid': uid, 'gid': gid, 'user': user, 'project': project, 'algorithm': self.alg.pk, 'datetime': datetime.datetime.utcnow().isoformat() + 'Z'}) return response