class DatcoreClient(object): def __init__(self, api_token=None, api_secret=None, host=None, streaming_host=None): # WARNING: contruction raise exception if service is not available. # Use datacore_wrapper for safe calls # TODO: can use https://developer.blackfynn.io/python/latest/configuration.html#environment-variables self._bf = Blackfynn( profile=None, api_token=api_token, api_secret=api_secret, host=host, streaming_host=streaming_host, ) def profile(self): """ Returns profile of current User """ return self._bf.profile def _collection_from_destination(self, destination: str): destination_path = Path(destination) parts = destination_path.parts dataset_name = parts[0] dataset = self.get_dataset(dataset_name) if dataset is None: return None, None collection_id = dataset.id collection = dataset collections = [] if len(parts) > 1: object_path = Path(*parts[1:]) collections = list(object_path.parts) collection_id = "" collection_id = _get_collection_id(dataset, collections, collection_id) collection = self._bf.get(collection_id) return collection, collection_id def _destination_from_id(self, destination_id: str): # NOTE: .get(*) logs # INFO:blackfynn.client.Blackfynn:Unable to retrieve object # if destination_id refers to a Dataset destination: Union[DataPackage, Collection] = self._bf.get(destination_id) if destination is None: destination: Dataset = self._bf.get_dataset(destination_id) return destination def list_files_recursively(self, dataset_filter: str = ""): files = [] for dataset in self._bf.datasets(): if not dataset_filter or dataset_filter in dataset.name: self.list_dataset_files_recursively(files, dataset, Path(dataset.name)) return files def list_files_raw_dataset(self, dataset_id: str) -> List[FileMetaDataEx]: files = [] # raw packages _files = [] # fmds data = {} # map to keep track of parents-child cursor = "" page_size = 1000 api = self._bf._api.datasets dataset = self._bf.get_dataset(dataset_id) if dataset is not None: while True: resp = api._get( api._uri( "/{id}/packages?cursor={cursor}&pageSize={pageSize}&includeSourceFiles={includeSourceFiles}", id=dataset_id, cursor=cursor, pageSize=page_size, includeSourceFiles=False, )) for package in resp.get("packages", list()): id = package["content"]["id"] data[id] = package files.append(package) cursor = resp.get("cursor") if cursor is None: break for f in files: if f["content"]["packageType"] != "Collection": filename = f["content"]["name"] file_path = "" file_id = f["content"]["nodeId"] _f = f while "parentId" in _f["content"].keys(): parentid = _f["content"]["parentId"] _f = data[parentid] file_path = _f["content"]["name"] + "/" + file_path bucket_name = dataset.name file_name = filename file_size = 0 object_name = str(Path(file_path) / file_name) file_uuid = str(Path(bucket_name) / object_name) created_at = f["content"]["createdAt"] last_modified = f["content"]["updatedAt"] parent_id = dataset_id if "parentId" in f["content"]: parentId = f["content"]["parentId"] parent_id = data[parentId]["content"]["nodeId"] fmd = FileMetaData( bucket_name=bucket_name, file_name=file_name, object_name=object_name, location=DATCORE_STR, location_id=DATCORE_ID, file_uuid=file_uuid, file_id=file_id, raw_file_path=file_uuid, display_file_path=file_uuid, created_at=created_at, last_modified=last_modified, file_size=file_size, ) fmdx = FileMetaDataEx(fmd=fmd, parent_id=parent_id) _files.append(fmdx) return _files def list_files_raw(self, dataset_filter: str = "") -> List[FileMetaDataEx]: _files = [] for dataset in self._bf.datasets(): _files = _files + self.list_files_raw_dataset(dataset.id) return _files def list_dataset_files_recursively(self, files: List[FileMetaData], base: BaseCollection, current_root: Path): for item in base: if isinstance(item, Collection): _current_root = current_root / Path(item.name) self.list_dataset_files_recursively(files, item, _current_root) else: parts = current_root.parts bucket_name = parts[0] file_name = item.name file_size = 0 # lets assume we have only one file if item.files: file_name = Path( item.files[0].as_dict()["content"]["s3key"]).name file_size = item.files[0].as_dict()["content"]["size"] # if this is in the root directory, the object_name is the filename only if len(parts) > 1: object_name = str(Path(*list(parts)[1:]) / Path(file_name)) else: object_name = str(Path(file_name)) file_uuid = str(Path(bucket_name) / Path(object_name)) file_id = item.id created_at = item.created_at last_modified = item.updated_at fmd = FileMetaData( bucket_name=bucket_name, file_name=file_name, object_name=object_name, location=DATCORE_STR, location_id=DATCORE_ID, file_uuid=file_uuid, file_id=file_id, raw_file_path=file_uuid, display_file_path=file_uuid, created_at=created_at, last_modified=last_modified, file_size=file_size, ) files.append(fmd) def create_dataset(self, ds_name, *, force_delete=False): """ Creates a new dataset for the current user and returns it. Returns existing one if there is already a dataset with the given name. Args: ds_name (str): Name for the dataset (_,-,' ' and capitalization are ignored) force_delete (bool, optional): Delete first if dataset already exists """ ds = None with suppress(Exception): ds = self._bf.get_dataset(ds_name) if force_delete: ds.delete() ds = None if ds is None: ds = self._bf.create_dataset(ds_name) return ds def get_dataset(self, ds_name, create_if_not_exists=False): """ Returns dataset with the given name. Creates it if required. Args: ds_name (str): Name for the dataset create_if_not_exists (bool, optional): Create first if dataset already exists """ ds = None with suppress(Exception): ds = self._bf.get_dataset(ds_name) if ds is None and create_if_not_exists: ds = self._bf.create_dataset(ds_name) return ds def delete_dataset(self, ds_name): """ Deletes dataset with the given name. Args: ds_name (str): Name for the dataset """ # this is not supported ds = self.get_dataset(ds_name) if ds is not None: self._bf.delete(ds.id) def exists_dataset(self, ds_name): """ Returns True if dataset with the given name exists. Args: ds_name (str): Name for the dataset """ ds = self.get_dataset(ds_name) return ds is not None def upload_file(self, destination: str, filepath: str, meta_data=None) -> bool: """ Uploads a file to a given dataset/collection given its filepath on the host. Optionally adds some meta data Args: dataset (dataset): The dataset into whioch the file shall be uploaded filepath (path): Full path to the file meta_data (dict, optional): Dictionary of meta data Note: Blackfynn postprocesses data based on filendings. If it can do that the filenames on the server change. """ # parse the destination and try to find the package_id to upload to collection, collection_id = self._collection_from_destination( destination) if collection is None: return False files = [ filepath, ] self._bf._api.io.upload_files(collection, files, display_progress=True, use_agent=False) collection.update() if meta_data is not None: for f in files: filename = os.path.basename(f) package = self.get_package(collection, filename) if package is not None: self._update_meta_data(package, meta_data) return True def _update_meta_data(self, package, meta_data): """ Updates or replaces metadata for a package Args: package (package): The package for which the meta data needs update meta_data (dict): Dictionary of meta data """ for key in meta_data.keys(): package.set_property(key, meta_data[key], category="simcore") package.update() def download_file(self, source, filename, destination_path): """ Downloads a frile from a source dataset/collection given its filename. Stores it under destination_path Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file destination__apth (str): Path on host for storing file """ url = self.download_link(source, filename) if url: _file = urllib.URLopener() # nosec _file.retrieve(url, destination_path) return True return False def download_link(self, destination, filename): """ returns presigned url for download, destination is a dataset or collection """ collection, collection_id = self._collection_from_destination( destination) for item in collection: if isinstance(item, DataPackage): if Path(item.files[0].as_dict()["content"] ["s3key"]).name == filename: file_desc = self._bf._api.packages.get_sources(item.id)[0] url = self._bf._api.packages.get_presigned_url_for_file( item.id, file_desc.id) return url return "" def download_link_by_id(self, file_id): """ returns presigned url for download of a file given its file_id """ url = "" filename = "" package = self._bf.get(file_id) if package is not None: filename = Path( package.files[0].as_dict()["content"]["s3key"]).name file_desc = self._bf._api.packages.get_sources(file_id)[0] url = self._bf._api.packages.get_presigned_url_for_file( file_id, file_desc.id) return url, filename def get_package(self, source, filename): """ Returns package from source by name if exists Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file """ source.update() for item in source: if item.name == filename: return item return None def delete_file(self, destination, filename): """ Deletes file by name from destination by name Args: destination (dataset/collection): The dataset or collection to delete from filename (str): Name of the file """ collection, collection_id = self._collection_from_destination( destination) if collection is None: return False collection.update() for item in collection: if isinstance(item, DataPackage): if Path(item.files[0].as_dict()["content"] ["s3key"]).name == filename: self._bf.delete(item) return True return False def delete_file_by_id(self, id: str) -> bool: """ Deletes file by id Args: datcore id for the file """ package: DataPackage = self._bf.get(id) package.delete() return not package.exists def delete_files(self, destination): """ Deletes all files in destination Args: destination (dataset/collection): The dataset or collection to delete """ collection, collection_id = self._collection_from_destination( destination) if collection is None: return False collection.update() for item in collection: self._bf.delete(item) def update_meta_data(self, dataset, filename, meta_data): """ Updates metadata for a file Args: dataset (package): Which dataset filename (str): Which file meta_data (dict): Dictionary of meta data """ filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: self._update_meta_data(package, meta_data) def get_meta_data(self, dataset, filename): """ Returns metadata for a file Args: dataset (package): Which dataset filename (str): Which file """ meta_data = {} filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: meta_list = package.properties for m in meta_list: meta_data[m.key] = m.value return meta_data def delete_meta_data(self, dataset, filename, keys=None): """ Deletes specified keys in meta data for source/filename. Args: dataset (package): Which dataset filename (str): Which file keys (list of str, optional): Deletes specified keys, deletes all meta data if None """ filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: if keys is None: for p in package.properties: package.remove_property(p.key, category="simcore") else: for k in keys: package.remove_property(k, category="simcore") def search(self, what, max_count): """ Seraches a thing in the database. Returns max_count results Args: what (str): query max_count (int): Max number of results to return """ return self._bf.search(what, max_count) def upload_file_to_id(self, destination_id: str, filepath: str): """ Uploads file to a given dataset/collection by id given its filepath on the host adds some meta data. Returns the id for the newly created resource Note: filepath could be an array Args: destination_id : The dataset/collection id into which the file shall be uploaded filepath (path): Full path to the file """ _id = "" destination = self._destination_from_id(destination_id) if destination is None: return _id files = [ filepath, ] try: # TODO: PC->MAG: should protected API # TODO: add new agent SEE https://developer.blackfynn.io/python/latest/CHANGELOG.html#id31 result = self._bf._api.io.upload_files(destination, files, display_progress=True, use_agent=False) if result and result[0] and "package" in result[0][0]: _id = result[0][0]["package"]["content"]["id"] except Exception: logger.exception("Error uploading file to datcore") return _id def create_collection(self, destination_id: str, collection_name: str): """ Create a empty collection within destination Args: destination_id : The dataset/collection id into which the file shall be uploaded filepath (path): Full path to the file """ destination = self._destination_from_id(destination_id) _id = "" if destination is None: return _id new_collection = Collection(collection_name) destination.add(new_collection) new_collection.update() destination.update() _id = new_collection.id return _id def list_datasets(self) -> DatasetMetaDataVec: data = [] for dataset in self._bf.datasets(): dmd = DatasetMetaData(dataset_id=dataset.id, display_name=dataset.name) data.append(dmd) return data
except getopt.GetoptError: printf("%s\n", syntax()) sys.exit() dsets, dsdict = get_datasets() for opt, arg in opts: if opt == '-h': printf("%s\n", syntax()) sys.exit() elif opt in '-l': for ds in dsets: printf("%s\n", ds[0]) sys.exit() elif opt in '-d': dset = bf.get_dataset(dsdict[arg]) elif opt in '-n': if db_exists(arg, dsets): printf("Dataset %s already exists. Can not continue.\n", arg) EXISTS = True sys.exit() else: printf("Creating new dataset: %s\n", arg) bf.create_dataset(arg) newdset = bf.get_dataset(arg) create_duplicate(dset, newdset)
from blackfynn import Blackfynn from ncs2bfts2 import ncs2bfts import datetime bf = Blackfynn() fileNum = [57, 59, 61, 62, 63] # number of the channel being recorded on dirName = 'C:\Users\Placid\Dropbox\PTE_Data_Litt_Blackfinn\JAW_53_17_110217_2006_001' # directory of where data is stored bfFile = 'datacheck.bfts' startDateTime = ( (datetime.datetime(2017, 11, 6, 14, 38, 15) - datetime.datetime(1970, 1, 1)).total_seconds()) * (1e6) # microseconds ncs2bfts(startDateTime, dirName, fileNum, bfFile) ds = bf.create_dataset('John Wolf Data') ds.upload(bfFile)
def blackfynn_cli(): args = docopt(__doc__) if args['version']: print "version: {}".format(blackfynn.__version__) email = args['--user'] if args['--user'] is not None else settings.api_user passw = args['--pass'] if args['--pass'] is not None else settings.api_pass host = args['--host'] if args['--host'] is not None else settings.api_host org = args['--org'] try: bf = Blackfynn(email=email, password=passw, host=host) except: print "Unable to connect to to Blackfynn using specified user/password." return if args['orgs']: for o in bf.organizations(): print " * {} (id: {})".format(o.name, o.id) if org is not None: try: bf.set_context(org) except: print 'Error: Unable to set context to "{}"'.format(org) return if args['show']: item = bf.get(args['<item>']) print item if hasattr(item, 'items'): print "CONTENTS:" for i in item.items: print " * {}".format(i) if hasattr(item, 'channels'): print "CHANNELS:" for ch in item.channels: print " * {} (id: {})".format(ch.name, ch.id) elif args['search']: terms = ' '.join(args['<term>']) results = bf._api.search.query(terms) if len(results) == 0: print "No Results." else: for r in results: print " * {}".format(r) elif args['create']: if args['collection']: dest = args['<destination>'] name = args['<name>'] c = Collection(name) parent = bf.get(dest) parent.add(c) print c elif args['dataset']: name = args['<name>'] ds = bf.create_dataset(name) print ds else: print "Error: creation for object not supported." return elif args['delete']: item = bf.get(args['<item>']) if isinstance(item, Dataset): print "Error: cannot delete dataset" return elif not isinstance(item, BaseNode): print "Error: cannot delete item" return bf.delete(item) elif args['upload']: files = args['<file>'] dest = args['<destination>'] recursively_upload(bf, dest, files) elif args['append']: files = args['<file>'] dest = args['<destination>'] bf._api.io.upload_files(dest, files, append=True, display_progress=True) elif args['datasets']: print "Datasets: " for ds in bf.datasets(): print " - {} (id: {})".format(ds.name, ds.id) elif args['dataset']: ds = bf.get(args['<dataset>']) if args['collaborators']: if args['<action>'] == 'ls': resp = ds.collaborators() print " - Users" for u in resp['users']: print " - email:{} id:{}".format(u.email, u.id) print " - Groups" for g in resp['groups']: print " - name:{} id:{}".format(g.name, g.id) elif args['<action>'] == 'add': ids = args['<action-args>'] if len(ids) == 0: print "Error: No ids specified" sys.exit(1) resp = ds.add_collaborators(*ids) print_collaborator_edit_resp(resp) elif args['<action>'] == 'rm': ids = args['<action-args>'] if len(ids) == 0: print "Error: No ids specified" sys.exit(1) resp = ds.remove_collaborators(*ids) print_collaborator_edit_resp(resp) else: print "Error: invalid dataset collaborators command. Valid commands are 'ls', 'add' or 'rm'" else: print "Error: invalid dataset command. Valid commands are 'collaborators'" elif args['env']: print "# Blackfynn environment" print "API Location: {}".format(host) print "Streaming API: {}".format(settings.streaming_api_host) print "User: {}".format(email) print "Organization: {} (id: {})".format(bf.context.name, bf.context.id)
class BlackfynnDataModel(object): def __init__(self): self._settings = {'active-profile': ''} self._cache = {} self._bf = None def addProfile(self, profile): self._settings[profile['name']] = {'api_token': profile['token'], 'api_secret': profile['secret']} def setActiveProfile(self, profile_name): self._settings['active-profile'] = profile_name def getActiveProfile(self): return self._settings['active-profile'] def getExistingProfileNames(self): profile_names = [*self._settings] profile_names.remove('active-profile') return profile_names def _getBlackfynn(self, profile_name): api_key = self._settings[profile_name]['api_token'] api_secret = self._settings[profile_name]['api_secret'] print('[{0}]:[{1}]'.format(api_key, api_secret)) self._bf = Blackfynn(api_token=api_key, api_secret=api_secret) return self._bf def getDatasets(self, profile_name, refresh=False): if profile_name in self._cache and not refresh: datasets = self._cache[profile_name]['datasets'] elif refresh: bf = self._getBlackfynn(profile_name) datasets = bf.datasets() if profile_name in self._cache: self._cache[profile_name]['datasets'] = datasets else: self._cache[profile_name] = {'datasets': datasets} else: datasets = [] return datasets def getDataset(self, profile_name, dataset_name, refresh=False): if profile_name in self._cache and dataset_name in self._cache[profile_name] and not refresh: dataset = self._cache[profile_name][dataset_name] elif refresh: bf = self._getBlackfynn(profile_name) dataset = bf.get_dataset(dataset_name) self._cache[profile_name][dataset_name] = dataset else: dataset = [] return dataset def getTimeseriesData(self, profile_name, dataset_name, timeseries_name): for stored_dataset in self._cache[profile_name][dataset_name]: if stored_dataset.name == timeseries_name: timeseries_dframe = stored_dataset.get_data(length='16s') cache_output = self._create_file_cache(timeseries_dframe) absolute_timeseries_values = timeseries_dframe.axes[0] relative_times = [] for time in absolute_timeseries_values: relative_times.append( round( time.timestamp() - absolute_timeseries_values[0].timestamp(), 6) ) return [cache_output, relative_times] def _create_file_cache(self, data_frame): cache_dictionary = {} keys = natsorted(data_frame.keys()) # Sort the keys in 'natural' order for key in keys: cache_dictionary[key] = data_frame[key].values.tolist() return cache_dictionary def uploadRender(self, filePath): # uploadRender: Takes a given file path and uploads it to blackfynn in a folder called 'Zinc Exports' for the # user currently logged in. try: ds = self._bf.get_dataset('Zinc Exports') except: self._bf.create_dataset('Zinc Exports') ds = self._bf.get_dataset('Zinc Exports') ds.upload(filePath) def getSettings(self): return self._settings def setSettings(self, settings): print('set settings {0}',format(settings)) self._settings.update(settings)
class DatcoreClient(object): def __init__(self, api_token=None, api_secret=None, host=None, streaming_host=None): self.client = Blackfynn(profile=None, api_token=api_token, api_secret=api_secret, host=host, streaming_host=streaming_host) def _context(self): """ Returns current organizational context """ return self.client.context def profile(self): """ Returns profile of current User """ return self.client.profile def organization(self): """ Returns organization name """ return self.client.context.name def list_datasets(self): ds = [] for item in self.client.datasets(): ds.append(item.name) return ds def list_files(self): files = [] for ds in self.client.datasets(): for item in ds: files.append(os.path.join(ds.name, item.name)) return files def create_dataset(self, ds_name, force_delete=False): """ Creates a new dataset for the current user and returns it. Returns existing one if there is already a dataset with the given name. Args: ds_name (str): Name for the dataset (_,-,' ' and capitalization are ignored) force_delete (bool, optional): Delete first if dataset already exists """ ds = None try: ds = self.client.get_dataset(ds_name) if force_delete: delete_dataset(ds) ds = None except Exception: # pylint: disable=W0703 pass if ds is None: ds = self.client.create_dataset(ds_name) return ds def get_dataset(self, ds_name, create_if_not_exists=False): """ Returns dataset with the given name. Creates it if required. Args: ds_name (str): Name for the dataset create_if_not_exists (bool, optional): Create first if dataset already exists """ ds = None try: ds = self.client.get_dataset(ds_name) except Exception: # pylint: disable=W0703 pass if ds is None and create_if_not_exists: ds = self.client.create_dataset(ds_name) return ds def delete_dataset(self, ds_name): """ Deletes dataset with the given name. Args: ds_name (str): Name for the dataset """ # this is not supported ds = self.get_dataset(ds_name) if ds is not None: self.client.delete(ds.id) def exists_dataset(self, ds_name): """ Returns True if dataset with the given name exists. Args: ds_name (str): Name for the dataset """ ds = self.get_dataset(ds_name) return ds is not None def upload_file(self, dataset, filepaths, meta_data = None): """ Uploads a file to a given dataset given its filepath on the host. Optionally adds some meta data Args: dataset (dataset): The dataset into whioch the file shall be uploaded filepath (path): Full path to the file meta_data (dict, optional): Dictionary of meta data Note: Blackfynn postprocesses data based on filendings. If it can do that the filenames on the server change. This makes it difficult to retrieve them back by name (see get_sources below). Also, for now we assume we have only single file data. """ if isinstance(filepaths, list): files = filepaths else: files = [filepaths] # pylint: disable = E1101 self.client._api.io.upload_files(dataset, files, display_progress=True) dataset.update() if meta_data is not None: filename = os.path.basename(filepath) package = self.get_package(dataset, filename) if package is not None: self._update_meta_data(package, meta_data) def _update_meta_data(self, package, meta_data): """ Updates or replaces metadata for a package Args: package (package): The package for which the meta data needs update meta_data (dict): Dictionary of meta data """ for key in meta_data.keys(): package.set_property(key, meta_data[key], category='simcore') package.update() def download_file(self, source, filename, destination_path): """ Downloads a frile from a source dataset/collection given its filename. Stores it under destination_path Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file destination__apth (str): Path on host for storing file """ # pylint: disable = E1101 url = self.download_link(source, filename) if url: _file = urllib.URLopener() _file.retrieve(url, destination_path) return True return False def download_link(self, source, filename): """ returns presigned url for download, source is a dataset """ # pylint: disable = E1101 for item in source: if item.name == filename: file_desc = self.client._api.packages.get_sources(item.id)[0] url = self.client._api.packages.get_presigned_url_for_file(item.id, file_desc.id) return url return "" def exists_file(self, source, filename): """ Checks if file exists in source Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file """ source.update() for item in source: if item.name == filename: return True return False def get_package(self, source, filename): """ Returns package from source by name if exists Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file """ source.update() for item in source: if item.name == filename: return item return None def delete_file(self, source, filename): """ Deletes file by name from source by name Args: source (dataset/collection): The dataset or collection to donwload from filename (str): Name of the file """ source.update() for item in source: if item.name == filename: self.client.delete(item) def delete_files(self, source): """ Deletes all files in source Args: source (dataset/collection): The dataset or collection to donwload from """ source.update() for item in source: self.client.delete(item) def update_meta_data(self, dataset, filename, meta_data): """ Updates metadata for a file Args: dataset (package): Which dataset filename (str): Which file meta_data (dict): Dictionary of meta data """ filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: self._update_meta_data(package, meta_data) def get_meta_data(self, dataset, filename): """ Returns metadata for a file Args: dataset (package): Which dataset filename (str): Which file """ meta_data = {} filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: meta_list = package.properties for m in meta_list: meta_data[m.key] = m.value return meta_data def delete_meta_data(self, dataset, filename, keys=None): """ Deletes specified keys in meta data for source/filename. Args: dataset (package): Which dataset filename (str): Which file keys (list of str, optional): Deletes specified keys, deletes all meta data if None """ filename = os.path.basename(filename) package = self.get_package(dataset, filename) if package is not None: if keys is None: for p in package.properties: package.remove_property(p.key, category='simcore') else: for k in keys: package.remove_property(k, category='simcore') def search(self, what, max_count): """ Seraches a thing in the database. Returns max_count results Args: what (str): query max_count (int): Max number of results to return """ return self.client.search(what, max_count)