def k8s_manager(self): if not self._k8s_manager: self._k8s_manager = AsyncK8SManager( namespace=self.namespace, in_cluster=self.in_cluster, ) return self._k8s_manager
async def collect_logs(request): run_uuid = request.path_params["run_uuid"] resource_name = get_resource_name(run_uuid=run_uuid) k8s_manager = AsyncK8SManager( namespace=settings.CLIENT_CONFIG.namespace, in_cluster=settings.CLIENT_CONFIG.in_cluster, ) await k8s_manager.setup() k8s_operation = await get_k8s_operation(k8s_manager=k8s_manager, resource_name=resource_name) if not k8s_operation: raise HTTPException( detail="Run's logs was not collected, resource was not found.", status_code=status.HTTP_400_BAD_REQUEST, ) operation_logs, _ = await query_k8s_operation_logs(instance=run_uuid, k8s_manager=k8s_manager, last_time=None) if k8s_manager: await k8s_manager.close() if not operation_logs: return Response() try: await upload_logs(run_uuid=run_uuid, logs=operation_logs) except Exception as e: raise HTTPException( detail= "Run's logs was not collected, an error was raised while uploading the data %s." % e, status_code=status.HTTP_400_BAD_REQUEST, ) task = BackgroundTask(clean_tmp_logs, run_uuid=run_uuid) return Response(background=task)
async def collect_logs(request): owner = request.path_params["owner"] project = request.path_params["project"] run_uuid = request.path_params["run_uuid"] resource_name = get_resource_name(run_uuid=run_uuid) operation = get_run_instance(owner=owner, project=project, run_uuid=run_uuid) k8s_manager = AsyncK8SManager( namespace=settings.CLIENT_CONFIG.namespace, in_cluster=settings.CLIENT_CONFIG.in_cluster, ) await k8s_manager.setup() k8s_operation = await get_k8s_operation(k8s_manager=k8s_manager, resource_name=resource_name) if not k8s_operation: raise HTTPException( detail="Run's logs was not collected, resource was not found.", status_code=status.HTTP_400_BAD_REQUEST, ) operation_logs, _ = await get_k8s_operation_logs(operation=operation, k8s_manager=k8s_manager, last_time=None) if k8s_manager: await k8s_manager.close() if not operation_logs: return Response() logs = operation_logs task = BackgroundTask(upload_logs, run_uuid=run_uuid, logs=logs) return Response(background=task)
async def get_logs(request): owner = request.path_params["owner"] project = request.path_params["project"] run_uuid = request.path_params["run_uuid"] force = to_bool(request.query_params.get("force"), handle_none=True) resource_name = get_resource_name(run_uuid=run_uuid) operation = get_run_instance(owner=owner, project=project, run_uuid=run_uuid) last_time = QueryParams(request.url.query).get("last_time") if last_time: last_time = dt_parser.parse(last_time).astimezone() last_file = QueryParams(request.url.query).get("last_file") k8s_manager = None k8s_operation = None if not last_file: k8s_manager = AsyncK8SManager( namespace=settings.CLIENT_CONFIG.namespace, in_cluster=settings.CLIENT_CONFIG.in_cluster, ) await k8s_manager.setup() k8s_operation = await get_k8s_operation(k8s_manager=k8s_manager, resource_name=resource_name) if not last_file and k8s_operation: last_file = None operation_logs, last_time = await get_k8s_operation_logs( operation=operation, last_time=last_time, k8s_manager=k8s_manager, stream=True, ) if k8s_operation["status"].get("completionTime"): last_time = None elif last_time: # Streaming should stop last_file = None last_time = None operation_logs = [] else: last_time = None operation_logs, last_file = await get_archived_operation_logs( run_uuid=run_uuid, last_file=last_file, check_cache=not force) if k8s_manager: await k8s_manager.close() response = V1Logs(last_time=last_time, last_file=last_file, logs=operation_logs) return UJSONResponse(response.to_dict())
async def collect_logs(request: Request) -> Response: run_uuid = request.path_params["run_uuid"] run_kind = request.path_params["run_kind"] resource_name = get_resource_name_for_kind(run_uuid=run_uuid, run_kind=run_kind) k8s_manager = AsyncK8SManager( namespace=settings.CLIENT_CONFIG.namespace, in_cluster=settings.CLIENT_CONFIG.in_cluster, ) await k8s_manager.setup() k8s_operation = await get_k8s_operation(k8s_manager=k8s_manager, resource_name=resource_name) if not k8s_operation: errors = "Run's logs was not collected, resource was not found." logger.warning(errors) return UJSONResponse( content={"errors": errors}, status_code=status.HTTP_400_BAD_REQUEST, ) operation_logs, _ = await query_k8s_operation_logs(instance=run_uuid, k8s_manager=k8s_manager, last_time=None) if k8s_manager: await k8s_manager.close() if not operation_logs: return Response(status_code=status.HTTP_404_NOT_FOUND) try: await upload_logs(run_uuid=run_uuid, logs=operation_logs) except Exception as e: errors = ( "Run's logs was not collected, an error was raised while uploading the data %s." % e) logger.warning(errors) return UJSONResponse( content={"errors": errors}, status_code=status.HTTP_400_BAD_REQUEST, ) if settings.AGENT_CONFIG.is_replica: task = BackgroundTask(clean_tmp_logs, run_uuid=run_uuid) return Response(background=task) return Response(status_code=status.HTTP_200_OK)
async def get_logs(request: Request) -> UJSONResponse: run_uuid = request.path_params["run_uuid"] force = to_bool(request.query_params.get("force"), handle_none=True) last_time = QueryParams(request.url.query).get("last_time") if last_time: last_time = parse_datetime(last_time).astimezone() last_file = QueryParams(request.url.query).get("last_file") files = [] if last_time: resource_name = get_resource_name(run_uuid=run_uuid) k8s_manager = AsyncK8SManager( namespace=settings.CLIENT_CONFIG.namespace, in_cluster=settings.CLIENT_CONFIG.in_cluster, ) await k8s_manager.setup() k8s_operation = await get_k8s_operation( k8s_manager=k8s_manager, resource_name=resource_name ) if k8s_operation: operation_logs, last_time = await get_operation_logs( k8s_manager=k8s_manager, k8s_operation=k8s_operation, instance=run_uuid, last_time=last_time, ) else: operation_logs, last_time = await get_tmp_operation_logs( run_uuid=run_uuid, last_time=last_time ) if k8s_manager: await k8s_manager.close() else: operation_logs, last_file, files = await get_archived_operation_logs( run_uuid=run_uuid, last_file=last_file, check_cache=not force ) response = V1Logs( last_time=last_time, last_file=last_file, logs=operation_logs, files=files ) return UJSONResponse(response.to_dict())
async def start_sidecar( container_id: str, sleep_interval: int, sync_interval: int, monitor_outputs: bool, monitor_logs: bool, ): sync_interval = get_sync_interval( interval=sync_interval, sleep_interval=sleep_interval ) try: pod_id = os.environ[POLYAXON_KEYS_K8S_POD_ID] except KeyError as e: raise PolyaxonContainerException( "Please make sure that this job has been " "started by Polyaxon with all required context." ) from e try: owner, project, run_uuid = get_run_info() except PolyaxonClientException as e: raise PolyaxonContainerException(e) client = RunClient(owner=owner, project=project, run_uuid=run_uuid) k8s_manager = AsyncK8SManager(namespace=CLIENT_CONFIG.namespace, in_cluster=True) await k8s_manager.setup() pod = await k8s_manager.get_pod(pod_id, reraise=True) retry = 1 is_running = True counter = 0 state = { "last_artifacts_check": None, "last_logs_check": None, } async def monitor(): if monitor_logs: await sync_logs( run_uuid=run_uuid, k8s_manager=k8s_manager, pod=pod, last_time=None, stream=True, is_running=is_running, ) if monitor_outputs: last_check = state["last_artifacts_check"] state["last_artifacts_check"] = sync_artifacts( last_check=last_check, run_uuid=run_uuid, ) sync_summaries( last_check=last_check, run_uuid=run_uuid, client=client, ) while is_running and retry <= 3: await asyncio.sleep(sleep_interval) try: is_running = await k8s_manager.is_pod_running(pod_id, container_id) except ApiException as e: retry += 1 logger.info("Exception %s" % repr(e)) logger.info("Sleeping ...") await asyncio.sleep(retry) continue logger.debug("Syncing ...") if is_running: retry = 1 counter += 1 if counter == sync_interval: counter = 0 try: await monitor() except Exception as e: logger.warning("Polyaxon sidecar error: %s" % repr(e)) await monitor() logger.info("Cleaning non main containers") if k8s_manager: await k8s_manager.close()