def image_boxes( asset_path: str, tensor_image, tensor_boxes, rescale=1, dataformats="CHW", asset_rel_path: str = None, ): if not np: logger.warning(NUMPY_ERROR_MESSAGE) return UNKNOWN tensor_image = to_np(tensor_image) tensor_image = convert_to_HWC(tensor_image, dataformats) tensor_boxes = to_np(tensor_boxes) tensor_image = tensor_image.astype( np.float32) * calculate_scale_factor(tensor_image) return make_image( asset_path, tensor_image.astype(np.uint8), rescale=rescale, rois=tensor_boxes, asset_rel_path=asset_rel_path, )
def make_grid(data, ncols=8): # I: N1HW or N3HW if not np: logger.warning(NUMPY_ERROR_MESSAGE) return UNKNOWN assert isinstance(data, np.ndarray), "plugin error, should pass numpy array here" if data.shape[1] == 1: data = np.concatenate([data, data, data], 1) assert data.ndim == 4 and data.shape[1] == 3 or data.shape[1] == 4 nimg = data.shape[0] H = data.shape[2] # noqa W = data.shape[3] # noqa ncols = min(nimg, ncols) nrows = int(np.ceil(float(nimg) / ncols)) canvas = np.zeros((data.shape[1], H * nrows, W * ncols)) i = 0 for y in range(nrows): for x in range(ncols): if i >= nimg: break canvas[:, y * H : (y + 1) * H, x * W : (x + 1) * W] = data[i] # noqa i = i + 1 return canvas
def audio(asset_path: str, tensor, sample_rate=44100, asset_rel_path: str = None): if not np: logger.warning(NUMPY_ERROR_MESSAGE) return UNKNOWN tensor = to_np(tensor) tensor = tensor.squeeze() if abs(tensor).max() > 1: print("warning: audio amplitude out of range, auto clipped.") tensor = tensor.clip(-1, 1) assert tensor.ndim == 1, "input tensor should be 1 dimensional." tensor_list = [int(32767.0 * x) for x in tensor] import struct import wave check_or_create_path(asset_path, is_dir=False) wave_write = wave.open(asset_path, "wb") wave_write.setnchannels(1) wave_write.setsampwidth(2) wave_write.setframerate(sample_rate) tensor_enc = b"" for v in tensor_list: tensor_enc += struct.pack("<h", v) wave_write.writeframes(tensor_enc) wave_write.close() return V1EventAudio( sample_rate=sample_rate, num_channels=1, length_frames=len(tensor_list), path=asset_rel_path or asset_path, content_type="audio/wav", )
async def notify(request: Request) -> Response: namespace = request.path_params["namespace"] owner = request.path_params["owner"] project = request.path_params["project"] run_uuid = request.path_params["run_uuid"] body = await request.json() run_name = body.get("name") condition = body.get("condition") if not condition: errors = "Received a notification request without condition." logger.warning(errors) return UJSONResponse( content={"errors": errors}, status_code=status.HTTP_400_BAD_REQUEST, ) condition = V1StatusCondition.get_condition(**condition) connections = body.get("connections") if not connections: errors = "Received a notification request without connections." logger.warning(errors) return UJSONResponse( content={"errors": errors}, status_code=status.HTTP_400_BAD_REQUEST, ) if not settings.AGENT_CONFIG.connections: errors = "Received a notification request, but the agent did not declare connections." logger.warning(errors) return UJSONResponse( content={"errors": errors}, status_code=status.HTTP_400_BAD_REQUEST, ) task = BackgroundTask( notify_run, namespace=namespace, owner=owner, project=project, run_uuid=run_uuid, run_name=run_name, condition=condition, connections=connections, ) return Response(background=task)
def upload( self, url, files, files_size, params=None, json_data=None, timeout=None, headers=None, session=None, ): if files_size > settings.WARN_UPLOAD_SIZE: logger.warning( "You are uploading %s, there's a hard limit of %s.\n" "If you have data files in the current directory, " "please make sure to add them to .polyaxonignore or " "add them directly to your data volume, or upload them " "separately using `polyaxon data` command and remove them from here.\n", self.format_sizeof(settings.WARN_UPLOAD_SIZE), self.format_sizeof(settings.MAX_UPLOAD_SIZE), ) if files_size > settings.MAX_UPLOAD_SIZE: raise PolyaxonShouldExitError( "Files too large to sync, please keep it under {}.\n" "If you have data files in the current directory, " "please add them directly to your data volume, or upload them " "separately using `polyaxon data` command and remove them from here.\n" .format(self.format_sizeof(settings.MAX_UPLOAD_SIZE))) files = to_list(files) if json_data: files.append(("json", json.dumps(json_data))) multipart_encoder = MultipartEncoder(fields=files) request_headers = headers or {} request_headers.update( {"Content-Type": multipart_encoder.content_type}) # Attach progress bar progress_callback, callback_bar = self.create_progress_callback( multipart_encoder) multipart_encoder_monitor = MultipartEncoderMonitor( multipart_encoder, progress_callback) timeout = timeout if timeout is not None else settings.LONG_REQUEST_TIMEOUT try: response = self.put( url=url, params=params, data=multipart_encoder_monitor, headers=request_headers, timeout=timeout, session=session, ) finally: # always make sure we clear the console callback_bar.done() return response
def add_value( cls, run, step, value, log_image: bool = False, log_histo: bool = False, log_tensor: bool = False, ): field = value.WhichOneof("value") if field == "simple_value": run.log_metric(name=value.tag, step=step, value=value.simple_value) return if field == "image" and log_image: run.log_image(name=value.tag, step=step, data=value.image) return if ( field == "tensor" and log_tensor and value.tensor.string_val and len(value.tensor.string_val) ): string_values = [] for _ in range(0, len(value.tensor.string_val)): string_value = value.tensor.string_val.pop() string_values.append(string_value.decode("utf-8")) run.log_text(name=value.tag, step=step, text=", ".join(string_values)) return elif field == "histo" and log_histo: if len(value.histo.bucket_limit) >= 3: first = ( value.histo.bucket_limit[0] + value.histo.bucket_limit[0] - value.histo.bucket_limit[1] ) last = ( value.histo.bucket_limit[-2] + value.histo.bucket_limit[-2] - value.histo.bucket_limit[-3] ) values, counts = ( list(value.histo.bucket), [first] + value.histo.bucket_limit[:-1] + [last], ) try: run.log_np_histogram( name=value.tag, values=values, counts=counts, step=step ) return except ValueError: logger.warning( "Ignoring histogram for tag `{}`, " "Histograms must have few bins".format(value.tag) ) else: logger.warning( "Ignoring histogram for tag `{}`, " "Found a histogram with only 2 bins.".format(value.tag) )
def healthz(health_interval): from polyaxon.agents.base import BaseAgent if not BaseAgent.pong(interval=health_interval): logger.warning("Polyaxon agent is not healthy!") sys.exit(1)
async def notify_run( namespace: str, owner: str, project: str, run_uuid: str, run_name: str, condition: V1StatusCondition, connections: List[str], ): spawner = AsyncSpawner(namespace=namespace) await spawner.k8s_manager.setup() for connection in connections: connection_type = settings.AGENT_CONFIG.notification_connections_by_names.get( connection ) if not connection_type: logger.warning( "Could not create notification using connection {}, " "the connection was not found or not set correctly.".format( connection_type ) ) continue operation = V1Operation( params={ "kind": connection_type.kind, "owner": owner, "project": project, "run_uuid": run_uuid, "run_name": run_name, "condition": ujson.dumps(condition.to_dict()), }, termination=V1Termination(max_retries=3), component=V1Component( name="slack-notification", plugins=V1Plugins( auth=False, collect_logs=False, collect_artifacts=False, collect_resources=False, sync_statuses=False, ), inputs=[ V1IO(name="kind", iotype=types.STR, is_optional=False), V1IO(name="owner", iotype=types.STR, is_optional=False), V1IO(name="project", iotype=types.STR, is_optional=False), V1IO(name="run_uuid", iotype=types.STR, is_optional=False), V1IO(name="run_name", iotype=types.STR, is_optional=True), V1IO(name="condition", iotype=types.STR, is_optional=True), V1IO(name="connection", iotype=types.STR, is_optional=True), ], run=V1Notifier( connections=[connection], container=get_default_notification_container(), ), ), ) compiled_operation = OperationSpecification.compile_operation(operation) resource = compiler.make( owner_name=owner, project_name=project, project_uuid=project, run_uuid=run_uuid, run_name=run_name, run_path=run_uuid, compiled_operation=compiled_operation, params=operation.params, ) await spawner.create( run_uuid=run_uuid, run_kind=compiled_operation.get_run_kind(), resource=resource, )
async def start_sidecar( container_id: str, sleep_interval: int, sync_interval: int, monitor_outputs: bool, monitor_logs: bool, ): sync_interval = get_sync_interval( interval=sync_interval, sleep_interval=sleep_interval ) try: pod_id = os.environ[POLYAXON_KEYS_K8S_POD_ID] except KeyError as e: raise PolyaxonContainerException( "Please make sure that this job has been " "started by Polyaxon with all required context." ) from e try: owner, project, run_uuid = get_run_info() except PolyaxonClientException as e: raise PolyaxonContainerException(e) client = RunClient(owner=owner, project=project, run_uuid=run_uuid) k8s_manager = AsyncK8SManager(namespace=CLIENT_CONFIG.namespace, in_cluster=True) await k8s_manager.setup() pod = await k8s_manager.get_pod(pod_id, reraise=True) retry = 1 is_running = True counter = 0 state = { "last_artifacts_check": None, "last_logs_check": None, } async def monitor(): if monitor_logs: await sync_logs( run_uuid=run_uuid, k8s_manager=k8s_manager, pod=pod, last_time=None, stream=True, is_running=is_running, ) if monitor_outputs: last_check = state["last_artifacts_check"] state["last_artifacts_check"] = sync_artifacts( last_check=last_check, run_uuid=run_uuid, ) sync_summaries( last_check=last_check, run_uuid=run_uuid, client=client, ) while is_running and retry <= 3: await asyncio.sleep(sleep_interval) try: is_running = await k8s_manager.is_pod_running(pod_id, container_id) except ApiException as e: retry += 1 logger.info("Exception %s" % repr(e)) logger.info("Sleeping ...") await asyncio.sleep(retry) continue logger.debug("Syncing ...") if is_running: retry = 1 counter += 1 if counter == sync_interval: counter = 0 try: await monitor() except Exception as e: logger.warning("Polyaxon sidecar error: %s" % repr(e)) await monitor() logger.info("Cleaning non main containers") if k8s_manager: await k8s_manager.close()
def start_sidecar( container_id: str, sleep_interval: int, sync_interval: int, monitor_outputs: bool, monitor_logs: bool, ): sync_interval = get_sync_interval(interval=sync_interval, sleep_interval=sleep_interval) try: owner, project, run_uuid = get_run_info() except PolyaxonClientException as e: raise PolyaxonContainerException(e) client = RunClient(owner=owner, project=project, run_uuid=run_uuid) pod_id = CLIENT_CONFIG.pod_id if not pod_id: raise PolyaxonContainerException( "Please make sure that this job has been " "started by Polyaxon with all required context.") k8s_manager = K8SManager(namespace=CLIENT_CONFIG.namespace, in_cluster=True) retry = 1 is_running = True counter = 0 state = { "last_artifacts_check": None, "last_logs_check": None, } def monitor(): if monitor_outputs: last_check = state["last_artifacts_check"] state["last_artifacts_check"] = sync_artifacts( last_check=last_check, run_uuid=run_uuid, ) sync_summaries( last_check=last_check, run_uuid=run_uuid, client=client, ) if monitor_logs: state["last_logs_check"] = sync_logs( k8s_manager=k8s_manager, client=client, last_check=state["last_logs_check"], run_uuid=run_uuid, pod_id=pod_id, container_id=container_id, owner=owner, project=project, ) while is_running and retry <= 3: time.sleep(sleep_interval) try: is_running = is_pod_running(k8s_manager, pod_id, container_id) except ApiException as e: retry += 1 time.sleep(1 * retry) logger.info("Exception %s" % repr(e)) logger.info("Sleeping ...") logger.debug("Syncing ...") if is_running: retry = 1 counter += 1 if counter == sync_interval: counter = 0 try: monitor() except Exception as e: logger.warning("Polyaxon sidecar error: %e", e) monitor() logger.info("Cleaning non main containers")