class DatcoreClient(object): def __init__(self, api_token=None, api_secret=None, host=None, streaming_host=None): # WARNING: contruction raise exception if service is not available. # Use datacore_wrapper for safe calls # TODO: can use https://developer.blackfynn.io/python/latest/configuration.html#environment-variables self._bf = Blackfynn( profile=None, api_token=api_token, api_secret=api_secret, host=host, streaming_host=streaming_host, ) def profile(self): """ Returns profile of current User """ return self._bf.profile def _collection_from_destination(self, destination: str): destination_path = Path(destination) parts = destination_path.parts dataset_name = parts[0] dataset = self.get_dataset(dataset_name) if dataset is None: return None, None collection_id = dataset.id collection = dataset collections = [] if len(parts) > 1: object_path = Path(*parts[1:]) collections = list(object_path.parts) collection_id = "" collection_id = _get_collection_id(dataset, collections, collection_id) collection = self._bf.get(collection_id) return collection, collection_id def _destination_from_id(self, destination_id: str): # NOTE: .get(*) logs # INFO:blackfynn.client.Blackfynn:Unable to retrieve object # if destination_id refers to a Dataset destination: Union[DataPackage, Collection] = self._bf.get(destination_id) if destination is None: destination: Dataset = self._bf.get_dataset(destination_id) return destination def list_files_recursively(self, dataset_filter: str = ""): files = [] for dataset in self._bf.datasets(): if not dataset_filter or dataset_filter in dataset.name: self.list_dataset_files_recursively(files, dataset, Path(dataset.name)) return files def list_files_raw_dataset(self, dataset_id: str) -> List[FileMetaDataEx]: files = [] # raw packages _files = [] # fmds data = {} # map to keep track of parents-child cursor = "" page_size = 1000 api = self._bf._api.datasets dataset = self._bf.get_dataset(dataset_id) if dataset is not None: while True: resp = api._get( api._uri( "/{id}/packages?cursor={cursor}&pageSize={pageSize}&includeSourceFiles={includeSourceFiles}", id=dataset_id, cursor=cursor, pageSize=page_size, includeSourceFiles=False, )) for package in resp.get("packages", list()): id = package["content"]["id"] data[id] = package files.append(package) cursor = resp.get("cursor") if cursor is None: break for f in files: if f["content"]["packageType"] != "Collection": filename = f["content"]["name"] file_path = "" file_id = f["content"]["nodeId"] _f = f while "parentId" in _f["content"].keys(): parentid = _f["content"]["parentId"] _f = data[parentid] file_path = _f["content"]["name"] + "/" + file_path bucket_name = dataset.name file_name = filename file_size = 0 object_name = str(Path(file_path) / file_name) file_uuid = str(Path(bucket_name) / object_name) created_at = f["content"]["createdAt"] last_modified = f["content"]["updatedAt"] parent_id = dataset_id if "parentId" in f["content"]: parentId = f["content"]["parentId"] parent_id = data[parentId]["content"]["nodeId"] fmd = FileMetaData( bucket_name=bucket_name, file_name=file_name, object_name=object_name, location=DATCORE_STR, location_id=DATCORE_ID, file_uuid=file_uuid, file_id=file_id, raw_file_path=file_uuid, display_file_path=file_uuid, created_at=created_at, last_modified=last_modified, file_size=file_size, ) fmdx = FileMetaDataEx(fmd=fmd, parent_id=parent_id) _files.append(fmdx) return _files def list_files_raw(self, dataset_filter: str = "") -> List[FileMetaDataEx]: _files = [] for dataset in self._bf.datasets(): _files = _files + self.list_files_raw_dataset(dataset.id) return _files def list_dataset_files_recursively(self, files: List[FileMetaData], base: BaseCollection, current_root: Path): for item in base: if isinstance(item, Collection): _current_root = current_root / Path(item.name) self.list_dataset_files_recursively(files, item, _current_root) else: parts = current_root.parts bucket_name = parts[0] file_name = item.name file_size = 0 # lets assume we have only one file if item.files: file_name = Path( item.files[0].as_dict()["content"]["s3key"]).name file_size = item.files[0].as_dict()["content"]["size"] # if this is in the root directory, the object_name is the filename only if len(parts) > 1: object_name = str(Path(*list(parts)[1:]) / Path(file_name)) else: object_name = str(Path(file_name)) file_uuid = str(Path(bucket_name) / Path(object_name)) file_id = item.id created_at = item.created_at last_modified = item.updated_at fmd = FileMetaData( bucket_name=bucket_name, file_name=file_name, object_name=object_name, location=DATCORE_STR, location_id=DATCORE_ID, file_uuid=file_uuid, file_id=file_id, raw_file_path=file_uuid, display_file_path=file_uuid, created_at=created_at, last_modified=last_modified, file_size=file_size, ) files.append(fmd) def create_dataset(self, ds_name, *, force_delete=False): """ Creates a new dataset for the current user and returns it. Returns existing one if there is already a dataset with the given name. Args: ds_name (str): Name for the dataset (_,-,' ' and capitalization are ignored) force_delete (bool, optional): Delete first if dataset already exists """ ds = None with suppress(Exception): ds = self._bf.get_dataset(ds_name) if force_delete: ds.delete() ds = None if ds is None: ds = self._bf.create_dataset(ds_name) return ds def get_dataset(self, ds_name, create_if_not_exists=False): """ Returns dataset with the given name. Creates it if required. Args: ds_name (str): Name for the dataset create_if_not_exists (bool, optional): Create first if dataset already exists """ ds = None with suppress(Exception): ds = self._bf.get_dataset(ds_name) if ds is None and create_if_not_exists: ds = self._bf.create_dataset(ds_name) return ds def delete_dataset(self, ds_name): """ Deletes dataset with the given name. Args: ds_name (str): Name for the dataset """ # this is not supported ds = self.get_dataset(ds_name) if ds is not None: self._bf.delete(ds.id) def exists_dataset(self, ds_name): """ Returns True if dataset with the given name exists. Args: ds_name (str): Name for the dataset """ ds = self.get_dataset(ds_name) return ds is not None def upload_file(self, destination: str, filepath: str, meta_data=None) -> bool: """ Uploads a file to a given dataset/collection given its filepath on the host. Optionally adds some meta data Args: dataset (dataset): The dataset into whioch the file shall be uploaded filepath (path): Full path to the file meta_data (dict, optional): Dictionary of meta data Note: Blackfynn postprocesses data based on filendings. If it can do that the filenames on the server change. """ # parse the destination and try to find the package_id to upload to collection, collection_id = self._collection_from_destination( destination) if collection is None: return False files = [ filepath, ] self._bf._api.io.upload_files(collection, files, display_progress=True, use_agent=False) collection.update() if meta_data is not None: for f in files: filename = os.path.basename(f) package = self.get_package(collection, filename) if package is not None: self._update_meta_data(package, meta_data) return True def _update_meta_data(self, package, meta_data): """ Updates or replaces metadata for a package Args: package (package): The package for which the meta data needs update meta_data (dict): Dictionary of meta data """ for key in meta_data.keys(): package.set_property(key, meta_data[key], category="simcore") package.update() def download_file(self, source, filename, destination_path): """ Downloads a frile from a source dataset/collection given its filename. Stores it under destination_path Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file destination__apth (str): Path on host for storing file """ url = self.download_link(source, filename) if url: _file = urllib.URLopener() # nosec _file.retrieve(url, destination_path) return True return False def download_link(self, destination, filename): """ returns presigned url for download, destination is a dataset or collection """ collection, collection_id = self._collection_from_destination( destination) for item in collection: if isinstance(item, DataPackage): if Path(item.files[0].as_dict()["content"] ["s3key"]).name == filename: file_desc = self._bf._api.packages.get_sources(item.id)[0] url = self._bf._api.packages.get_presigned_url_for_file( item.id, file_desc.id) return url return "" def download_link_by_id(self, file_id): """ returns presigned url for download of a file given its file_id """ url = "" filename = "" package = self._bf.get(file_id) if package is not None: filename = Path( package.files[0].as_dict()["content"]["s3key"]).name file_desc = self._bf._api.packages.get_sources(file_id)[0] url = self._bf._api.packages.get_presigned_url_for_file( file_id, file_desc.id) return url, filename def get_package(self, source, filename): """ Returns package from source by name if exists Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file """ source.update() for item in source: if item.name == filename: return item return None def delete_file(self, destination, filename): """ Deletes file by name from destination by name Args: destination (dataset/collection): The dataset or collection to delete from filename (str): Name of the file """ collection, collection_id = self._collection_from_destination( destination) if collection is None: return False collection.update() for item in collection: if isinstance(item, DataPackage): if Path(item.files[0].as_dict()["content"] ["s3key"]).name == filename: self._bf.delete(item) return True return False def delete_file_by_id(self, id: str) -> bool: """ Deletes file by id Args: datcore id for the file """ package: DataPackage = self._bf.get(id) package.delete() return not package.exists def delete_files(self, destination): """ Deletes all files in destination Args: destination (dataset/collection): The dataset or collection to delete """ collection, collection_id = self._collection_from_destination( destination) if collection is None: return False collection.update() for item in collection: self._bf.delete(item) def update_meta_data(self, dataset, filename, meta_data): """ Updates metadata for a file Args: dataset (package): Which dataset filename (str): Which file meta_data (dict): Dictionary of meta data """ filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: self._update_meta_data(package, meta_data) def get_meta_data(self, dataset, filename): """ Returns metadata for a file Args: dataset (package): Which dataset filename (str): Which file """ meta_data = {} filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: meta_list = package.properties for m in meta_list: meta_data[m.key] = m.value return meta_data def delete_meta_data(self, dataset, filename, keys=None): """ Deletes specified keys in meta data for source/filename. Args: dataset (package): Which dataset filename (str): Which file keys (list of str, optional): Deletes specified keys, deletes all meta data if None """ filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: if keys is None: for p in package.properties: package.remove_property(p.key, category="simcore") else: for k in keys: package.remove_property(k, category="simcore") def search(self, what, max_count): """ Seraches a thing in the database. Returns max_count results Args: what (str): query max_count (int): Max number of results to return """ return self._bf.search(what, max_count) def upload_file_to_id(self, destination_id: str, filepath: str): """ Uploads file to a given dataset/collection by id given its filepath on the host adds some meta data. Returns the id for the newly created resource Note: filepath could be an array Args: destination_id : The dataset/collection id into which the file shall be uploaded filepath (path): Full path to the file """ _id = "" destination = self._destination_from_id(destination_id) if destination is None: return _id files = [ filepath, ] try: # TODO: PC->MAG: should protected API # TODO: add new agent SEE https://developer.blackfynn.io/python/latest/CHANGELOG.html#id31 result = self._bf._api.io.upload_files(destination, files, display_progress=True, use_agent=False) if result and result[0] and "package" in result[0][0]: _id = result[0][0]["package"]["content"]["id"] except Exception: logger.exception("Error uploading file to datcore") return _id def create_collection(self, destination_id: str, collection_name: str): """ Create a empty collection within destination Args: destination_id : The dataset/collection id into which the file shall be uploaded filepath (path): Full path to the file """ destination = self._destination_from_id(destination_id) _id = "" if destination is None: return _id new_collection = Collection(collection_name) destination.add(new_collection) new_collection.update() destination.update() _id = new_collection.id return _id def list_datasets(self) -> DatasetMetaDataVec: data = [] for dataset in self._bf.datasets(): dmd = DatasetMetaData(dataset_id=dataset.id, display_name=dataset.name) data.append(dmd) return data
class DatcoreClient(object): def __init__(self, api_token=None, api_secret=None, host=None, streaming_host=None): self.client = Blackfynn(profile=None, api_token=api_token, api_secret=api_secret, host=host, streaming_host=streaming_host) def _context(self): """ Returns current organizational context """ return self.client.context def profile(self): """ Returns profile of current User """ return self.client.profile def organization(self): """ Returns organization name """ return self.client.context.name def list_datasets(self): ds = [] for item in self.client.datasets(): ds.append(item.name) return ds def list_files(self): files = [] for ds in self.client.datasets(): for item in ds: files.append(os.path.join(ds.name, item.name)) return files def create_dataset(self, ds_name, force_delete=False): """ Creates a new dataset for the current user and returns it. Returns existing one if there is already a dataset with the given name. Args: ds_name (str): Name for the dataset (_,-,' ' and capitalization are ignored) force_delete (bool, optional): Delete first if dataset already exists """ ds = None try: ds = self.client.get_dataset(ds_name) if force_delete: delete_dataset(ds) ds = None except Exception: # pylint: disable=W0703 pass if ds is None: ds = self.client.create_dataset(ds_name) return ds def get_dataset(self, ds_name, create_if_not_exists=False): """ Returns dataset with the given name. Creates it if required. Args: ds_name (str): Name for the dataset create_if_not_exists (bool, optional): Create first if dataset already exists """ ds = None try: ds = self.client.get_dataset(ds_name) except Exception: # pylint: disable=W0703 pass if ds is None and create_if_not_exists: ds = self.client.create_dataset(ds_name) return ds def delete_dataset(self, ds_name): """ Deletes dataset with the given name. Args: ds_name (str): Name for the dataset """ # this is not supported ds = self.get_dataset(ds_name) if ds is not None: self.client.delete(ds.id) def exists_dataset(self, ds_name): """ Returns True if dataset with the given name exists. Args: ds_name (str): Name for the dataset """ ds = self.get_dataset(ds_name) return ds is not None def upload_file(self, dataset, filepaths, meta_data = None): """ Uploads a file to a given dataset given its filepath on the host. Optionally adds some meta data Args: dataset (dataset): The dataset into whioch the file shall be uploaded filepath (path): Full path to the file meta_data (dict, optional): Dictionary of meta data Note: Blackfynn postprocesses data based on filendings. If it can do that the filenames on the server change. This makes it difficult to retrieve them back by name (see get_sources below). Also, for now we assume we have only single file data. """ if isinstance(filepaths, list): files = filepaths else: files = [filepaths] # pylint: disable = E1101 self.client._api.io.upload_files(dataset, files, display_progress=True) dataset.update() if meta_data is not None: filename = os.path.basename(filepath) package = self.get_package(dataset, filename) if package is not None: self._update_meta_data(package, meta_data) def _update_meta_data(self, package, meta_data): """ Updates or replaces metadata for a package Args: package (package): The package for which the meta data needs update meta_data (dict): Dictionary of meta data """ for key in meta_data.keys(): package.set_property(key, meta_data[key], category='simcore') package.update() def download_file(self, source, filename, destination_path): """ Downloads a frile from a source dataset/collection given its filename. Stores it under destination_path Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file destination__apth (str): Path on host for storing file """ # pylint: disable = E1101 url = self.download_link(source, filename) if url: _file = urllib.URLopener() _file.retrieve(url, destination_path) return True return False def download_link(self, source, filename): """ returns presigned url for download, source is a dataset """ # pylint: disable = E1101 for item in source: if item.name == filename: file_desc = self.client._api.packages.get_sources(item.id)[0] url = self.client._api.packages.get_presigned_url_for_file(item.id, file_desc.id) return url return "" def exists_file(self, source, filename): """ Checks if file exists in source Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file """ source.update() for item in source: if item.name == filename: return True return False def get_package(self, source, filename): """ Returns package from source by name if exists Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file """ source.update() for item in source: if item.name == filename: return item return None def delete_file(self, source, filename): """ Deletes file by name from source by name Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file """ source.update() for item in source: if item.name == filename: self.client.delete(item) def delete_files(self, source): """ Deletes all files in source Args: source (dataset/collection): The dataset or collection to donwload from """ source.update() for item in source: self.client.delete(item) def update_meta_data(self, dataset, filename, meta_data): """ Updates metadata for a file Args: dataset (package): Which dataset filename (str): Which file meta_data (dict): Dictionary of meta data """ filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: self._update_meta_data(package, meta_data) def get_meta_data(self, dataset, filename): """ Returns metadata for a file Args: dataset (package): Which dataset filename (str): Which file """ meta_data = {} filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: meta_list = package.properties for m in meta_list: meta_data[m.key] = m.value return meta_data def delete_meta_data(self, dataset, filename, keys=None): """ Deletes specified keys in meta data for source/filename. Args: dataset (package): Which dataset filename (str): Which file keys (list of str, optional): Deletes specified keys, deletes all meta data if None """ filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: if keys is None: for p in package.properties: package.remove_property(p.key, category='simcore') else: for k in keys: package.remove_property(k, category='simcore') def search(self, what, max_count): """ Seraches a thing in the database. Returns max_count results Args: what (str): query max_count (int): Max number of results to return """ return self.client.search(what, max_count)