def _checkProvenace(item, path): """Determines if provenance item is valid""" if item is None: return item item_path_normalized = os.path.abspath( os.path.expandvars(os.path.expanduser(item))) if os.path.isfile(item_path_normalized): # Add full path item = item_path_normalized if item not in df.index: # If it is a file and it is not being uploaded try: bundle = syn._getFromFile(item) return bundle except SynapseFileNotFoundError: # TODO absence of a raise here appears to be a bug and yet tests fail if this is raised SynapseProvenanceError(( "The provenance record for file: %s is incorrect.\n" "Specifically %s is not being uploaded and is not in Synapse." % (path, item))) elif not utils.is_url(item) and (utils.is_synapse_id(item) is None): raise SynapseProvenanceError( ("The provenance record for file: %s is incorrect.\n" "Specifically %s, is neither a valid URL or synapseId.") % (path, item)) return item
def sync(self, entity, path, ifcollision, followLink): progress = CumulativeTransferProgress('Downloaded') if is_synapse_id(entity): # ensure that we seed with an actual entity entity = self._syn.get( entity, downloadLocation=path, ifcollision=ifcollision, followLink=followLink, ) if is_container(entity): root_folder_sync = self._sync_root(entity, path, ifcollision, followLink, progress) # once the whole folder hierarchy has been traversed this entrant thread waits for # all file downloads to complete before returning files = root_folder_sync.wait_until_finished() elif isinstance(entity, File): files = [entity] else: raise ValueError( "Cannot initiate a sync from an entity that is not a File or Folder" ) # since the sub folders could complete out of order from when they were submitted we # sort the files by their path (which includes their local folder) to get a predictable ordering. # not required but nice for testing etc. files.sort(key=lambda f: f.get('path') or '') return files
def cleanup(items): """cleanup junk created during testing""" for item in reversed(items): if isinstance(item, Entity) or utils.is_synapse_id(item) or hasattr( item, 'deleteURI'): try: syn.delete(item) except Exception as ex: if hasattr(ex, 'response') and ex.response.status_code in [ 404, 403 ]: pass else: print("Error cleaning up entity: " + str(ex)) elif isinstance(item, str): if os.path.exists(item): try: if os.path.isdir(item): shutil.rmtree(item) else: # Assume that remove will work on anything besides folders os.remove(item) except Exception as ex: print(ex) else: sys.stderr.write('Don\'t know how to clean: %s' % str(item))
def used(self, target=None, targetVersion=None, wasExecuted=None, url=None, name=None): """ Add a resource used by the activity. This method tries to be as permissive as possible. It accepts a string which might be a synapse ID or a URL, a synapse entity, a UsedEntity or UsedURL dictionary or a list containing any combination of these. In addition, named parameters can be used to specify the fields of either a UsedEntity or a UsedURL. If target and optionally targetVersion are specified, create a UsedEntity. If url and optionally name are specified, create a UsedURL. It is an error to specify both target/targetVersion parameters and url/name parameters in the same call. To add multiple UsedEntities and UsedURLs, make a separate call for each or pass in a list. In case of conflicting settings for wasExecuted both inside an object and with a parameter, the parameter wins. For example, this UsedURL will have wasExecuted set to False:: activity.used({'url':'http://google.com', 'name':'Goog', 'wasExecuted':True}, wasExecuted=False) Entity examples:: activity.used('syn12345') activity.used(entity) activity.used(target=entity, targetVersion=2) activity.used(codeEntity, wasExecuted=True) activity.used({'reference':{'target':'syn12345', 'targetVersion':1}, 'wasExecuted':False}) URL examples:: activity.used('http://mydomain.com/my/awesome/data.RData') activity.used(url='http://mydomain.com/my/awesome/data.RData', name='Awesome Data') activity.used(url='https://github.com/joe_hacker/code_repo', name='Gnarly hacks', wasExecuted=True) activity.used({'url':'https://github.com/joe_hacker/code_repo', 'name':'Gnarly hacks'}, wasExecuted=True) List example:: activity.used(['syn12345', 'syn23456', entity, \ {'reference':{'target':'syn100009', 'targetVersion':2}, 'wasExecuted':True}, \ 'http://mydomain.com/my/awesome/data.RData']) """ # -- A list of targets if isinstance(target, list): badargs = _get_any_bad_args(['targetVersion', 'url', 'name'], locals()) _raise_incorrect_used_usage(badargs, 'list of used resources') for item in target: self.used(item, wasExecuted=wasExecuted) return # -- UsedEntity elif is_used_entity(target): badargs = _get_any_bad_args(['targetVersion', 'url', 'name'], locals()) _raise_incorrect_used_usage( badargs, 'dictionary representing a used resource') resource = target if 'concreteType' not in resource: resource[ 'concreteType'] = 'org.sagebionetworks.repo.model.provenance.UsedEntity' # -- Used URL elif is_used_url(target): badargs = _get_any_bad_args(['targetVersion', 'url', 'name'], locals()) _raise_incorrect_used_usage(badargs, 'URL') resource = target if 'concreteType' not in resource: resource[ 'concreteType'] = 'org.sagebionetworks.repo.model.provenance.UsedURL' # -- Synapse Entity elif is_synapse_entity(target): badargs = _get_any_bad_args(['url', 'name'], locals()) _raise_incorrect_used_usage(badargs, 'Synapse entity') reference = {'targetId': target['id']} if 'versionNumber' in target: reference['targetVersionNumber'] = target['versionNumber'] if targetVersion: reference['targetVersionNumber'] = int(targetVersion) resource = { 'reference': reference, 'concreteType': 'org.sagebionetworks.repo.model.provenance.UsedEntity' } # -- URL parameter elif url: badargs = _get_any_bad_args(['target', 'targetVersion'], locals()) _raise_incorrect_used_usage(badargs, 'URL') resource = { 'url': url, 'name': name if name else target, 'concreteType': 'org.sagebionetworks.repo.model.provenance.UsedURL' } # -- URL as a string elif is_url(target): badargs = _get_any_bad_args(['targetVersion'], locals()) _raise_incorrect_used_usage(badargs, 'URL') resource = { 'url': target, 'name': name if name else target, 'concreteType': 'org.sagebionetworks.repo.model.provenance.UsedURL' } # -- Synapse Entity ID (assuming the string is an ID) elif isinstance(target, str): badargs = _get_any_bad_args(['url', 'name'], locals()) _raise_incorrect_used_usage(badargs, 'Synapse entity') vals = target.split('.') # Handle synapseIds of from syn234.4 if not is_synapse_id(vals[0]): raise ValueError('%s is not a valid Synapse id' % target) if len(vals) == 2: if targetVersion and int(targetVersion) != int(vals[1]): raise ValueError( 'Two conflicting versions for %s were specified' % target) targetVersion = int(vals[1]) reference = {'targetId': vals[0]} if targetVersion: reference['targetVersionNumber'] = int(targetVersion) resource = { 'reference': reference, 'concreteType': 'org.sagebionetworks.repo.model.provenance.UsedEntity' } else: raise SynapseError( 'Unexpected parameters in call to Activity.used().') # Set wasExecuted if wasExecuted is None: # Default to False if 'wasExecuted' not in resource: resource['wasExecuted'] = False else: # wasExecuted parameter overrides setting in an object resource['wasExecuted'] = wasExecuted # Add the used resource to the activity self['used'].append(resource)
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles=None, followLink=False): """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file metadata. :param syn: A synapse object as obtained with syn = synapseclient.login() :param entity: A Synapse ID, a Synapse Entity object of type file, folder or project. :param path: An optional path where the file hierarchy will be reproduced. If not specified the files will by default be placed in the synapseCache. :param ifcollision: Determines how to handle file collisions. Maybe "overwrite.local", "keep.local", or "keep.both". Defaults to "overwrite.local". :param followLink: Determines whether the link returns the target Entity. Defaults to False :returns: list of entities (files, tables, links) This function will crawl all subfolders of the project/folder specified by `entity` and download all files that have not already been downloaded. If there are newer files in Synapse (or a local file has been edited outside of the cache) since the last download then local the file will be replaced by the new file unless "ifcollision" is changed. If the files are being downloaded to a specific location outside of the Synapse cache a file (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains the metadata (annotations, storage location and provenance of all downloaded files). See also: - :py:func:`synapseutils.sync.syncToSynapse` Example: Download and print the paths of all downloaded files:: entities = syncFromSynapse(syn, "syn1234") for f in entities: print(f.path) """ # initialize the result list if allFiles is None: allFiles = list() # perform validation check on user input if is_synapse_id(entity): entity = syn.get(entity, downloadLocation=path, ifcollision=ifcollision, followLink=followLink) if isinstance(entity, File): allFiles.append(entity) return allFiles entity_id = id_of(entity) if not is_container(entity): raise ValueError( "The provided id: %s is neither a container nor a File" % entity_id) # get the immediate children as iterator children = syn.getChildren(entity_id) # process each child for child in children: if is_container(child): # If we are downloading outside cache create directory if path is not None: new_path = os.path.join(path, child['name']) try: os.makedirs(new_path) except OSError as err: if err.errno != errno.EEXIST: raise else: new_path = None # recursively explore this container's children syncFromSynapse(syn, child['id'], new_path, ifcollision, allFiles, followLink=followLink) else: # getting the child ent = syn.get(child['id'], downloadLocation=path, ifcollision=ifcollision, followLink=followLink) if isinstance(ent, File): allFiles.append(ent) if path is not None: # If path is None files are stored in cache. filename = os.path.join(path, MANIFEST_FILENAME) filename = os.path.expanduser(os.path.normcase(filename)) generateManifest(syn, allFiles, filename) return allFiles