def _create_k8s_job_config(self, job_name: str) -> dict: # Load details cluster_details = self.cluster_details job_details = load_job_details(cluster_name=self.cluster_name, job_name=job_name) cluster_id = cluster_details['id'] job_id = job_details["id"] # Check and load k8s context self._check_and_load_k8s_context() # Get config template with open(os.path.expanduser(f"{GlobalPaths.MARO_K8S_LIB}/k8s_configs/job.yml")) as fr: k8s_job_config = yaml.safe_load(fr) with open(os.path.expanduser(f"{GlobalPaths.MARO_K8S_LIB}/k8s_configs/container.yml")) as fr: k8s_container_config = yaml.safe_load(fr) # Fill configs k8s_job_config['metadata']['name'] = f"{job_id}" azure_file_config = k8s_job_config['spec']['template']['spec']['volumes'][0]['azureFile'] azure_file_config['secretName'] = f"{cluster_id}-k8s-secret" azure_file_config['shareName'] = f"{cluster_id}-fs" # Create and fill container config for component_type, component_details in job_details['components'].items(): for component_index in range(component_details['num']): container_config = self._create_k8s_container_config( job_details=job_details, k8s_container_config_template=k8s_container_config, component_type=component_type, component_index=component_index ) k8s_job_config['spec']['template']['spec']['containers'].append(container_config) return k8s_job_config
def stop_schedule(self, schedule_name: str): # Load details schedule_details = load_schedule_details(cluster_name=self.cluster_name, schedule_name=schedule_name) job_names = schedule_details['job_names'] for job_name in job_names: # Load job details job_details = load_job_details(cluster_name=self.cluster_name, job_name=job_name) job_schedule_tag = job_details['tags']['schedule'] # Remote stop job if job_schedule_tag == schedule_name: self.grass_executor.remote_create_killed_job_ticket(job_name=job_name) self.grass_executor.remote_delete_pending_job_ticket(job_name=job_name)
def stop_schedule(self, schedule_name: str): # Load details schedule_details = load_schedule_details(cluster_name=self.cluster_name, schedule_name=schedule_name) job_names = schedule_details['job_names'] for job_name in job_names: # Load job details job_details = load_job_details(cluster_name=self.cluster_name, job_name=job_name) job_schedule_tag = job_details['tags']['schedule'] # Stop job if job_schedule_tag == schedule_name: self.stop_job( job_name=job_name )
def _set_job_id(self, job_name: str): # Load details job_details = load_job_details(cluster_name=self.cluster_name, job_name=job_name) # Set cluster id job_details['id'] = generate_job_id() # Set component id for component, component_details in job_details['components'].items(): component_details['id'] = generate_component_id() # Save details save_job_details(cluster_name=self.cluster_name, job_name=job_name, job_details=job_details)
def get_job_logs(self, job_name: str, export_dir: str = './'): # Load details job_details = load_job_details(cluster_name=self.cluster_name, job_name=job_name) job_id = job_details['id'] # Get pods details pods_details = self.get_pods_details() # Export logs for pod_details in pods_details: if pod_details["metadata"]["name"].startswith(job_id): for container_details in pod_details["spec"]["containers"]: self._export_log(pod_id=pod_details["metadata"]["name"], container_name=container_details["name"], export_dir=export_dir)
def get_job_logs(self, job_name: str, export_dir: str = './'): # Load details cluster_details = self.cluster_details job_details = load_job_details(cluster_name=self.cluster_name, job_name=job_name) admin_username = cluster_details['user']['admin_username'] master_public_ip_address = cluster_details['master'][ 'public_ip_address'] job_id = job_details['id'] # Copy logs from master try: copy_files_from_node(local_dir=export_dir, remote_path=f"~/.maro/logs/{job_id}", admin_username=admin_username, node_ip_address=master_public_ip_address) except CommandError: logger.error_red("No logs have been created at this time")
def get_job_logs(self, job_name: str, export_dir: str = './'): # Load details job_details = load_job_details(cluster_name=self.cluster_name, job_name=job_name) job_id = job_details['id'] # Get pods details command = "kubectl get pods -o json" return_str = SubProcess.run(command) pods_details = json.loads(return_str)['items'] # Export logs for pod_details in pods_details: if pod_details["metadata"]["name"].startswith(job_id): for container_details in pod_details["spec"]["containers"]: self._export_log(pod_id=pod_details["metadata"]["name"], container_name=container_details["name"], export_dir=export_dir)