def download_file(self, url, path, **kwargs): """This function downloads a single file or several files compressed in tar.gz. If the untar args is not specified it assumes a single file. If untar is provided: False or True it appends the tar.gz extension. If untar is True: it extracts the file. If untar is False: it keeps the file compressed. """ local_path = kwargs.pop("path_to", None) local_path = local_path or get_path( settings.CLIENT_CONFIG.archive_root, self._client.run_uuid ) if path: local_path = get_path(local_path, path) _local_path = local_path untar = kwargs.get("untar") if untar is not None: _local_path = _local_path + ".tar.gz" if untar is False: local_path = _local_path check_or_create_path(_local_path, is_dir=False) if not os.path.exists(_local_path): self.download( filename=_local_path, params={"path": path}, url=url, **kwargs ) return local_path
def resolve_globals_contexts( namespace: str, owner_name: str, project_name: str, project_uuid: str, run_uuid: str, run_name: str, run_path: str, iteration: int, created_at: datetime, compiled_at: datetime, plugins: V1Plugins = None, artifacts_store: V1ConnectionType = None, cloning_kind: V1CloningKind = None, original_uuid: str = None, ) -> Dict: resolved_contexts = { "globals": { "owner_name": owner_name, "project_name": project_name, "project_unique_name": "{}.{}".format(owner_name, project_name), "project_uuid": project_uuid, "run_info": "{}.{}.runs.{}".format(owner_name, project_name, run_uuid), "name": run_name, "uuid": run_uuid, "namespace": namespace, "iteration": iteration, "context_path": contexts.CONTEXT_ROOT, "artifacts_path": contexts.CONTEXT_MOUNT_ARTIFACTS, "created_at": created_at, "compiled_at": compiled_at, "cloning_kind": cloning_kind, "original_uuid": original_uuid, }, } contexts_spec = PluginsContextsSpec.from_config(plugins) if contexts_spec.collect_artifacts: run_artifacts_path = contexts.CONTEXT_MOUNT_ARTIFACTS_FORMAT.format( run_path) run_outputs_path = contexts.CONTEXT_MOUNT_RUN_OUTPUTS_FORMAT.format( run_path) resolved_contexts["globals"]["run_artifacts_path"] = run_artifacts_path resolved_contexts["globals"]["run_outputs_path"] = run_outputs_path elif artifacts_store: run_artifacts_path = get_path(artifacts_store.store_path, run_path) run_outputs_path = get_path(run_artifacts_path, "outputs") resolved_contexts["globals"]["run_artifacts_path"] = run_artifacts_path resolved_contexts["globals"]["run_outputs_path"] = run_outputs_path return resolved_contexts
async def upload_file(subpath: str): path_from = get_path(settings.AGENT_CONFIG.artifacts_root, subpath) path_to = get_path(settings.AGENT_CONFIG.artifacts_store.store_path, subpath) try: return manager.upload_file_or_dir( connection_type=settings.AGENT_CONFIG.artifacts_store, path_from=path_from, path_to=path_to, is_file=True, ) except (OSError, PolyaxonException) as e: logger.warning("Could not upload %s. Error %s" % (path_from, e)) return None
def tar_dir(download_path: str) -> str: outputs_files = get_files_in_path(download_path) tar_base_name = os.path.basename(download_path) tar_name = "{}.tar.gz".format(tar_base_name) target_path = get_path(settings.CLIENT_CONFIG.archive_root, tar_name) create_tarfile(files=outputs_files, tar_path=target_path, relative_to=download_path) return target_path
def test_get_artifacts_path_container_with_managed_mount_store(self): store = V1ConnectionType( name="test_gcs", kind=V1ConnectionKind.VOLUME_CLAIM, schema=V1ClaimConnection(mount_path="/claim/path", volume_claim="claim"), ) container = get_artifacts_path_container( polyaxon_init=V1PolyaxonInitContainer( image="init", image_pull_policy="IfNotPresent" ), artifacts_store=store, run_path="run_uid", clean=True, ) init_args = init_artifact_context_args("run_uid") init_args.append( get_artifacts_store_args( artifacts_path=get_path(store.store_path, "run_uid"), clean=True ) ) assert container == get_base_store_container( container=k8s_schemas.V1Container(name="default"), container_name=INIT_ARTIFACTS_CONTAINER.format("default"), polyaxon_init=V1PolyaxonInitContainer( image="init", image_pull_policy="IfNotPresent" ), store=store, env=[], env_from=[], volume_mounts=[get_artifacts_context_mount()], args=[" ".join(init_args)], is_artifact_store=True, )
def resolve_globals_contexts( namespace: str, owner_name: str, project_name: str, project_uuid: str, run_uuid: str, run_name: str, run_path: str, iteration: int, ) -> Dict: resolved_contexts = { "globals": { "owner_name": owner_name, "project_name": project_name, "project_unique_name": "{}.{}".format(owner_name, project_name), "project_uuid": project_uuid, "run_info": "{}.{}.runs.{}".format(owner_name, project_name, run_uuid), "name": run_name, "uuid": run_uuid, "namespace": namespace, "iteration": iteration, "artifacts_path": get_path(contexts.CONTEXT_MOUNT_ARTIFACTS, run_path) } } return resolved_contexts
def get_artifacts_path_container( polyaxon_init: V1PolyaxonInitContainer, artifacts_store: V1ConnectionType, run_path: str, clean: bool = True, ) -> Optional[k8s_schemas.V1Container]: if not artifacts_store: raise PolypodException("Init artifacts container requires a store.") init_args = init_artifact_context_args(run_path=run_path) if not artifacts_store.is_bucket: artifacts_path = get_path(artifacts_store.store_path, run_path) init_args.append( get_artifacts_store_args(artifacts_path=artifacts_path, clean=clean)) container_name = INIT_ARTIFACTS_CONTAINER.format(DEFAULT) container = k8s_schemas.V1Container(name=container_name) return get_base_store_container( container_name=container_name, container=container, polyaxon_init=polyaxon_init, store=artifacts_store, env=[], env_from=[], volume_mounts=[get_artifacts_context_mount()], # If we are dealing with a volume we need to make sure the path exists for the user # We also clean the path if this is not a resume run args=[" ".join(init_args)], is_artifact_store=True, )
def download_file(self, url, path, **kwargs): local_path = get_path( settings.CLIENT_CONFIG.archive_root, self._client.run_uuid, ) _local_path = local_path if path: _local_path = get_path(local_path, path) if kwargs.get("untar"): _local_path = _local_path + ".tar.gz" check_or_create_path(_local_path, is_dir=False) if not os.path.exists(_local_path): self.download(filename=_local_path, params={"path": path}, url=url, **kwargs) return local_path
def delete_path( subpath: str, workers: int = 0, connection_type: V1ConnectionType = None ): connection_type = connection_type or get_artifacts_connection() validate_store(connection_type) store_path = get_path(connection_type.store_path, subpath) store_manager = get_connection_from_type(connection_type=connection_type) store_manager.delete(store_path, workers=workers)
def list_files(subpath: str, filepath: str = None, connection_type: V1ConnectionType = None): connection_type = connection_type or get_artifacts_connection() validate_store(connection_type) store_path = get_path(connection_type.store_path, subpath) if filepath: store_path = get_path(store_path, filepath) store_manager = get_connection_from_type(connection_type=connection_type) try: results = store_manager.ls(store_path) results["files"] = {f[0]: f[1] for f in results["files"]} return results except Exception: raise PolyaxonStoresException( "Run store path does not exists or bad configuration.")
async def delete_file(subpath: str) -> bool: try: manager.delete_file_or_dir( connection_type=settings.AGENT_CONFIG.artifacts_store, subpath=get_path(settings.AGENT_CONFIG.artifacts_store.store_path, subpath), is_file=True, ) return True except (OSError, PolyaxonException) as e: logger.warning("Could not delete %s. Error %s" % (subpath, e)) return False
async def download_dir(subpath: str, to_tar: bool = False) -> str: path_from = get_path(settings.AGENT_CONFIG.artifacts_store.store_path, subpath) path_to = os.path.join(settings.CLIENT_CONFIG.archive_root, subpath) check_or_create_path(path_to, is_dir=True) return manager.download_file_or_dir( connection_type=settings.AGENT_CONFIG.artifacts_store, path_from=path_from, path_to=path_to, is_file=False, workers=5, to_tar=to_tar, )
async def upload_data(subpath: str, data): path_to = get_path(settings.AGENT_CONFIG.artifacts_store.store_path, subpath) path_from = os.path.join(settings.AGENT_CONFIG.artifacts_root, subpath) check_or_create_path(path_from, is_dir=False) async with aiofiles.open(path_from, "w") as filepath_upload: await filepath_upload.write(data) manager.upload_file_or_dir( connection_type=settings.AGENT_CONFIG.artifacts_store, path_from=path_from, path_to=path_to, is_file=True, )
async def upload_dir(subpath: str, path_from: str, workers: int = 0, last_time: datetime = None): path_to = get_path(settings.AGENT_CONFIG.artifacts_store.store_path, subpath) manager.upload_file_or_dir( connection_type=settings.AGENT_CONFIG.artifacts_store, path_from=path_from, path_to=path_to, is_file=False, workers=workers, last_time=last_time, )
async def download_file(subpath: str, check_cache=True) -> str: path_from = get_path(settings.AGENT_CONFIG.artifacts_store.store_path, subpath) path_to = os.path.join(settings.CLIENT_CONFIG.archive_root, subpath) if check_cache and os.path.exists(path_to): # file already exists return path_to check_or_create_path(path_to, is_dir=False) return manager.download_file_or_dir( connection_type=settings.AGENT_CONFIG.artifacts_store, path_from=path_from, path_to=path_to, is_file=True, )
async def download_dir(subpath: str, to_tar: bool = False) -> Optional[str]: path_from = get_path(settings.AGENT_CONFIG.artifacts_store.store_path, subpath) path_to = os.path.join(settings.CLIENT_CONFIG.archive_root, subpath) check_or_create_path(path_to, is_dir=True) try: return manager.download_file_or_dir( connection_type=settings.AGENT_CONFIG.artifacts_store, path_from=path_from, path_to=path_to, is_file=False, workers=5, to_tar=to_tar, ) except (OSError, PolyaxonException) as e: logger.warning("Could not download %s. Error %s" % (path_from, e)) return None
def delete_file_or_dir( subpath: str, is_file: bool = False, workers: int = 0, connection_type: V1ConnectionType = None, ): connection_type = connection_type or get_artifacts_connection() validate_store(connection_type) store_path = get_path(connection_type.store_path, subpath) store_manager = get_connection_from_type(connection_type=connection_type) if is_file: store_manager.delete_file(store_path) else: store_manager.delete(store_path, workers=workers)
async def download_file(subpath: str, check_cache=True) -> Optional[str]: path_from = get_path(settings.AGENT_CONFIG.artifacts_store.store_path, subpath) path_to = os.path.join(settings.CLIENT_CONFIG.archive_root, subpath) if os.path.exists(path_to): if check_cache: # file already exists return path_to else: os.remove(path_to) check_or_create_path(path_to, is_dir=False) try: return manager.download_file_or_dir( connection_type=settings.AGENT_CONFIG.artifacts_store, path_from=path_from, path_to=path_to, is_file=True, ) except PolyaxonException: return None
def sync_events_summaries( events_path: str, events_kind: str, last_check: Optional[datetime], connection_name: str = None, ) -> Tuple[List, Dict]: current_events_path = get_path(events_path, events_kind) summaries = [] last_values = {} with get_files_in_path_context(current_events_path) as files: for f in files: if last_check and not file_modified_since(filepath=f, last_time=last_check): continue event_name = os.path.basename(f).split(".plx")[0] event = V1Events.read(kind=events_kind, name=event_name, data=f) if event.df.empty: continue # Get only the relpath from run uuid event_rel_path = os.path.relpath(f, CONTEXT_MOUNT_ARTIFACTS) summary = event.get_summary() run_artifact = V1RunArtifact( name=event_name, kind=events_kind, connection=connection_name, summary=summary, path=event_rel_path, is_input=False, ) summaries.append(run_artifact) if events_kind == V1ArtifactKind.METRIC: last_values[event_name] = summary[ V1ArtifactKind.METRIC]["last"] return summaries, last_values
def test_get_path(self): assert get_path("/foo", "bar") == "/foo/bar"
def resolve_globals_contexts( namespace: str, owner_name: str, project_name: str, project_uuid: str, run_uuid: str, run_name: str, run_path: str, iteration: int, created_at: datetime, compiled_at: datetime, schedule_at: datetime = None, started_at: datetime = None, finished_at: datetime = None, duration: int = None, plugins: V1Plugins = None, artifacts_store: V1ConnectionType = None, cloning_kind: V1CloningKind = None, original_uuid: str = None, ) -> Dict: resolved_contexts = { contexts_sections.GLOBALS: { contexts_keys.OWNER_NAME: owner_name, contexts_keys.PROJECT_NAME: project_name, contexts_keys.PROJECT_UNIQUE_NAME: get_project_instance( owner_name, project_name ), contexts_keys.PROJECT_UUID: project_uuid, contexts_keys.RUN_INFO: get_run_instance( owner_name, project_name, run_uuid ), contexts_keys.NAME: run_name, contexts_keys.UUID: run_uuid, contexts_keys.NAMESPACE: namespace, contexts_keys.ITERATION: iteration, contexts_keys.CONTEXT_PATH: contexts.CONTEXT_ROOT, contexts_keys.ARTIFACTS_PATH: contexts.CONTEXT_MOUNT_ARTIFACTS, contexts_keys.CREATED_AT: created_at, contexts_keys.COMPILED_AT: compiled_at, contexts_keys.SCHEDULE_AT: schedule_at, contexts_keys.STARTED_AT: started_at, contexts_keys.FINISHED_AT: finished_at, contexts_keys.DURATION: duration, contexts_keys.CLONING_KIND: cloning_kind, contexts_keys.ORIGINAL_UUID: original_uuid, }, } contexts_spec = PluginsContextsSpec.from_config(plugins) if contexts_spec.collect_artifacts: run_artifacts_path = contexts.CONTEXT_MOUNT_ARTIFACTS_FORMAT.format(run_path) run_outputs_path = contexts.CONTEXT_MOUNT_RUN_OUTPUTS_FORMAT.format(run_path) resolved_contexts[contexts_sections.GLOBALS][ contexts_keys.RUN_ARTIFACTS_PATH ] = run_artifacts_path resolved_contexts[contexts_sections.GLOBALS][ contexts_keys.RUN_OUTPUTS_PATH ] = run_outputs_path elif artifacts_store: run_artifacts_path = get_path(artifacts_store.store_path, run_path) run_outputs_path = get_path(run_artifacts_path, "outputs") resolved_contexts[contexts_sections.GLOBALS][ contexts_keys.RUN_ARTIFACTS_PATH ] = run_artifacts_path resolved_contexts[contexts_sections.GLOBALS][ contexts_keys.RUN_OUTPUTS_PATH ] = run_outputs_path return resolved_contexts
def resolve_contexts( namespace: str, owner_name: str, project_name: str, project_uuid: str, run_uuid: str, run_name: str, run_path: str, compiled_operation: V1CompiledOperation, artifacts_store: V1ConnectionType, connection_by_names: Dict[str, V1ConnectionType], iteration: int, ) -> Dict: resolved_contexts = { "globals": { "owner_name": owner_name, "project_name": project_name, "project_unique_name": "{}.{}".format(owner_name, project_name), "project_uuid": project_uuid, "run_info": "{}.{}.runs.{}".format(owner_name, project_name, run_uuid), "name": run_name, "uuid": run_uuid, "namespace": namespace, "iteration": iteration, }, "init": {}, "connections": {}, } contexts_spec = PluginsContextsSpec.from_config(compiled_operation.plugins) if contexts_spec.collect_artifacts: resolved_contexts["globals"]["artifacts_path"] = get_path( contexts.CONTEXT_MOUNT_ARTIFACTS, run_path) elif artifacts_store: resolved_contexts["globals"]["artifacts_path"] = get_path( artifacts_store.store_path, run_path) if compiled_operation and not compiled_operation.has_pipeline: init = compiled_operation.run.init or [] init_connections = [i for i in init if i.connection] for init_connection in init_connections: if connection_by_names[init_connection.connection].schema: resolved_contexts["init"][ init_connection.connection] = connection_by_names[ init_connection.connection].schema.to_dict() if compiled_operation.run.connections: for connection in compiled_operation.run.connections: if connection_by_names[connection].schema: resolved_contexts["connections"][ connection] = connection_by_names[ connection].schema.to_dict() if compiled_operation.is_service_run: resolved_contexts["globals"]["ports"] = compiled_operation.run.ports base_url = "/{service}/{namespace}/{owner_name}/{project_name}/runs/{run_uuid}".format( service=REWRITE_SERVICES_V1 if compiled_operation.run.rewrite_path else SERVICES_V1, namespace=namespace, owner_name=owner_name, project_name=project_name, run_uuid=run_uuid, ) resolved_contexts["globals"]["base_url"] = base_url return resolved_contexts