Exemple #1
0
    def _checkProvenace(item, path):
        """Determines if provenance item is valid"""
        if item is None:
            return item

        item_path_normalized = os.path.abspath(
            os.path.expandvars(os.path.expanduser(item)))
        if os.path.isfile(item_path_normalized):
            # Add full path
            item = item_path_normalized
            if item not in df.index:  # If it is a file and it is not being uploaded
                try:
                    bundle = syn._getFromFile(item)
                    return bundle
                except SynapseFileNotFoundError:
                    # TODO absence of a raise here appears to be a bug and yet tests fail if this is raised
                    SynapseProvenanceError((
                        "The provenance record for file: %s is incorrect.\n"
                        "Specifically %s is not being uploaded and is not in Synapse."
                        % (path, item)))

        elif not utils.is_url(item) and (utils.is_synapse_id(item) is None):
            raise SynapseProvenanceError(
                ("The provenance record for file: %s is incorrect.\n"
                 "Specifically %s, is neither a valid URL or synapseId.") %
                (path, item))
        return item
Exemple #2
0
    def sync(self, entity, path, ifcollision, followLink):
        progress = CumulativeTransferProgress('Downloaded')

        if is_synapse_id(entity):
            # ensure that we seed with an actual entity
            entity = self._syn.get(
                entity,
                downloadLocation=path,
                ifcollision=ifcollision,
                followLink=followLink,
            )

        if is_container(entity):
            root_folder_sync = self._sync_root(entity, path, ifcollision,
                                               followLink, progress)

            # once the whole folder hierarchy has been traversed this entrant thread waits for
            # all file downloads to complete before returning
            files = root_folder_sync.wait_until_finished()

        elif isinstance(entity, File):
            files = [entity]

        else:
            raise ValueError(
                "Cannot initiate a sync from an entity that is not a File or Folder"
            )

        # since the sub folders could complete out of order from when they were submitted we
        # sort the files by their path (which includes their local folder) to get a predictable ordering.
        # not required but nice for testing etc.
        files.sort(key=lambda f: f.get('path') or '')
        return files
Exemple #3
0
def cleanup(items):
    """cleanup junk created during testing"""
    for item in reversed(items):
        if isinstance(item, Entity) or utils.is_synapse_id(item) or hasattr(
                item, 'deleteURI'):
            try:
                syn.delete(item)
            except Exception as ex:
                if hasattr(ex, 'response') and ex.response.status_code in [
                        404, 403
                ]:
                    pass
                else:
                    print("Error cleaning up entity: " + str(ex))
        elif isinstance(item, str):
            if os.path.exists(item):
                try:
                    if os.path.isdir(item):
                        shutil.rmtree(item)
                    else:  # Assume that remove will work on anything besides folders
                        os.remove(item)
                except Exception as ex:
                    print(ex)
        else:
            sys.stderr.write('Don\'t know how to clean: %s' % str(item))
    def used(self,
             target=None,
             targetVersion=None,
             wasExecuted=None,
             url=None,
             name=None):
        """
        Add a resource used by the activity.

        This method tries to be as permissive as possible. It accepts a string which might be a synapse ID or a URL,
        a synapse entity, a UsedEntity or UsedURL dictionary or a list containing any combination of these.

        In addition, named parameters can be used to specify the fields of either a UsedEntity or a UsedURL.
        If target and optionally targetVersion are specified, create a UsedEntity.
        If url and optionally name are specified, create a UsedURL.

        It is an error to specify both target/targetVersion parameters and url/name parameters in the same call.
        To add multiple UsedEntities and UsedURLs, make a separate call for each or pass in a list.

        In case of conflicting settings for wasExecuted both inside an object and with a parameter, the parameter wins.
        For example, this UsedURL will have wasExecuted set to False::

            activity.used({'url':'http://google.com', 'name':'Goog', 'wasExecuted':True}, wasExecuted=False)

        Entity examples::

            activity.used('syn12345')
            activity.used(entity)
            activity.used(target=entity, targetVersion=2)
            activity.used(codeEntity, wasExecuted=True)
            activity.used({'reference':{'target':'syn12345', 'targetVersion':1}, 'wasExecuted':False})

        URL examples::

            activity.used('http://mydomain.com/my/awesome/data.RData')
            activity.used(url='http://mydomain.com/my/awesome/data.RData', name='Awesome Data')
            activity.used(url='https://github.com/joe_hacker/code_repo', name='Gnarly hacks', wasExecuted=True)
            activity.used({'url':'https://github.com/joe_hacker/code_repo', 'name':'Gnarly hacks'}, wasExecuted=True)

        List example::

            activity.used(['syn12345', 'syn23456', entity, \
                          {'reference':{'target':'syn100009', 'targetVersion':2}, 'wasExecuted':True}, \
                          'http://mydomain.com/my/awesome/data.RData'])
        """
        # -- A list of targets
        if isinstance(target, list):
            badargs = _get_any_bad_args(['targetVersion', 'url', 'name'],
                                        locals())
            _raise_incorrect_used_usage(badargs, 'list of used resources')

            for item in target:
                self.used(item, wasExecuted=wasExecuted)
            return

        # -- UsedEntity
        elif is_used_entity(target):
            badargs = _get_any_bad_args(['targetVersion', 'url', 'name'],
                                        locals())
            _raise_incorrect_used_usage(
                badargs, 'dictionary representing a used resource')

            resource = target
            if 'concreteType' not in resource:
                resource[
                    'concreteType'] = 'org.sagebionetworks.repo.model.provenance.UsedEntity'

        # -- Used URL
        elif is_used_url(target):
            badargs = _get_any_bad_args(['targetVersion', 'url', 'name'],
                                        locals())
            _raise_incorrect_used_usage(badargs, 'URL')

            resource = target
            if 'concreteType' not in resource:
                resource[
                    'concreteType'] = 'org.sagebionetworks.repo.model.provenance.UsedURL'

        # -- Synapse Entity
        elif is_synapse_entity(target):
            badargs = _get_any_bad_args(['url', 'name'], locals())
            _raise_incorrect_used_usage(badargs, 'Synapse entity')

            reference = {'targetId': target['id']}
            if 'versionNumber' in target:
                reference['targetVersionNumber'] = target['versionNumber']
            if targetVersion:
                reference['targetVersionNumber'] = int(targetVersion)
            resource = {
                'reference':
                reference,
                'concreteType':
                'org.sagebionetworks.repo.model.provenance.UsedEntity'
            }
        # -- URL parameter
        elif url:
            badargs = _get_any_bad_args(['target', 'targetVersion'], locals())
            _raise_incorrect_used_usage(badargs, 'URL')

            resource = {
                'url': url,
                'name': name if name else target,
                'concreteType':
                'org.sagebionetworks.repo.model.provenance.UsedURL'
            }

        # -- URL as a string
        elif is_url(target):
            badargs = _get_any_bad_args(['targetVersion'], locals())
            _raise_incorrect_used_usage(badargs, 'URL')
            resource = {
                'url': target,
                'name': name if name else target,
                'concreteType':
                'org.sagebionetworks.repo.model.provenance.UsedURL'
            }

        # -- Synapse Entity ID (assuming the string is an ID)
        elif isinstance(target, str):
            badargs = _get_any_bad_args(['url', 'name'], locals())
            _raise_incorrect_used_usage(badargs, 'Synapse entity')
            vals = target.split('.')  # Handle synapseIds of from syn234.4
            if not is_synapse_id(vals[0]):
                raise ValueError('%s is not a valid Synapse id' % target)
            if len(vals) == 2:
                if targetVersion and int(targetVersion) != int(vals[1]):
                    raise ValueError(
                        'Two conflicting versions for %s were specified' %
                        target)
                targetVersion = int(vals[1])
            reference = {'targetId': vals[0]}
            if targetVersion:
                reference['targetVersionNumber'] = int(targetVersion)
            resource = {
                'reference':
                reference,
                'concreteType':
                'org.sagebionetworks.repo.model.provenance.UsedEntity'
            }
        else:
            raise SynapseError(
                'Unexpected parameters in call to Activity.used().')

        # Set wasExecuted
        if wasExecuted is None:
            # Default to False
            if 'wasExecuted' not in resource:
                resource['wasExecuted'] = False
        else:
            # wasExecuted parameter overrides setting in an object
            resource['wasExecuted'] = wasExecuted

        # Add the used resource to the activity
        self['used'].append(resource)
Exemple #5
0
def syncFromSynapse(syn,
                    entity,
                    path=None,
                    ifcollision='overwrite.local',
                    allFiles=None,
                    followLink=False):
    """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file
    metadata.

    :param syn:         A synapse object as obtained with syn = synapseclient.login()

    :param entity:      A Synapse ID, a Synapse Entity object of type file, folder or project.

    :param path:        An optional path where the file hierarchy will be reproduced. If not specified the files will by
                        default be placed in the synapseCache.

    :param ifcollision: Determines how to handle file collisions. Maybe "overwrite.local", "keep.local", or "keep.both".
                        Defaults to "overwrite.local".

    :param followLink:  Determines whether the link returns the target Entity.
                        Defaults to False

    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder specified by `entity` and download all files that have
    not already been downloaded.  If there are newer files in Synapse (or a local file has been edited outside of the
    cache) since the last download then local the file will be replaced by the new file unless "ifcollision" is changed.

    If the files are being downloaded to a specific location outside of the Synapse cache a file
    (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains the metadata (annotations, storage
    location and provenance of all downloaded files).

    See also:
    - :py:func:`synapseutils.sync.syncToSynapse`

    Example:
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)

    """
    # initialize the result list
    if allFiles is None:
        allFiles = list()

    # perform validation check on user input
    if is_synapse_id(entity):
        entity = syn.get(entity,
                         downloadLocation=path,
                         ifcollision=ifcollision,
                         followLink=followLink)

    if isinstance(entity, File):
        allFiles.append(entity)
        return allFiles

    entity_id = id_of(entity)
    if not is_container(entity):
        raise ValueError(
            "The provided id: %s is neither a container nor a File" %
            entity_id)

    # get the immediate children as iterator
    children = syn.getChildren(entity_id)

    # process each child
    for child in children:
        if is_container(child):
            # If we are downloading outside cache create directory
            if path is not None:
                new_path = os.path.join(path, child['name'])
                try:
                    os.makedirs(new_path)
                except OSError as err:
                    if err.errno != errno.EEXIST:
                        raise
            else:
                new_path = None
            # recursively explore this container's children
            syncFromSynapse(syn,
                            child['id'],
                            new_path,
                            ifcollision,
                            allFiles,
                            followLink=followLink)
        else:
            # getting the child
            ent = syn.get(child['id'],
                          downloadLocation=path,
                          ifcollision=ifcollision,
                          followLink=followLink)
            if isinstance(ent, File):
                allFiles.append(ent)

    if path is not None:  # If path is None files are stored in cache.
        filename = os.path.join(path, MANIFEST_FILENAME)
        filename = os.path.expanduser(os.path.normcase(filename))
        generateManifest(syn, allFiles, filename)

    return allFiles