def test_is_container__getChildren_results():
    file_result = {'versionLabel': '1',
                   'name': 'firstPageResult',
                   'versionNumber': 1,
                   'benefactorId': 987,
                   'type': 'org.sagebionetworks.repo.model.FileEntity',
                   'id': 'syn123'}
    assert_false(is_container(file_result))
    folder_result = {'versionLabel': '1',
                     'name': 'secondPageResult',
                     'versionNumber': 1,
                     'benefactorId': 654,
                     'type': 'org.sagebionetworks.repo.model.Folder',
                     'id': 'syn456'}
    assert_true(is_container(folder_result))
Ejemplo n.º 2
0
    def sync(self, entity, path, ifcollision, followLink):
        progress = CumulativeTransferProgress('Downloaded')

        if is_synapse_id(entity):
            # ensure that we seed with an actual entity
            entity = self._syn.get(
                entity,
                downloadLocation=path,
                ifcollision=ifcollision,
                followLink=followLink,
            )

        if is_container(entity):
            root_folder_sync = self._sync_root(entity, path, ifcollision,
                                               followLink, progress)

            # once the whole folder hierarchy has been traversed this entrant thread waits for
            # all file downloads to complete before returning
            files = root_folder_sync.wait_until_finished()

        elif isinstance(entity, File):
            files = [entity]

        else:
            raise ValueError(
                "Cannot initiate a sync from an entity that is not a File or Folder"
            )

        # since the sub folders could complete out of order from when they were submitted we
        # sort the files by their path (which includes their local folder) to get a predictable ordering.
        # not required but nice for testing etc.
        files.sort(key=lambda f: f.get('path') or '')
        return files
def updateFormatTypeByFileName(syn,synId,annoKey,annoDict):
    """
    Audit entity file type annotations
    :param syn:            A Synapse object: syn = synapseclient.login()- Must be logged into synapse
    :param synId:          A Synapse ID of Project, Folder, or File OR a list of Synapse IDs of File
    :param annoKey:        The annotation key for file type. (i.e. "fileType", "fileFormat", or "formatType")
    :param annoDict        A dict where key is the extension of the filename, 
                           value is the corresponding file type value in entity annotations
        
    Example:
    
       updateFormatTypeByFileName(syn,"syn12345","fileType",{".bam":"bam", ".doc":"word", "bw":"bigwig"})
       OR
       updateFormatTypeByFileName(syn,["syn1","syn2"],"fileType",{".bam":"bam", ".doc":"word", "bw":"bigwig"})
       
    """
    if type(synId) is list:
        print "Input is a list of Synapse IDs \n"
        for synID in synId:
            print "Getting File %s ..." % synID
            temp = syn.get(synID,downloadFile = False)
            _helperUpdateFormatTypeByFileName(syn,temp,annoKey,annoDict)
    else:
        print "Input is a Synpase ID \n"
        starting = syn.get(synId,downloadFile = False)
        if not is_container(starting):
            print "%s is a File \n" % synId
            _helperUpdateFormatTypeByFileName(syn,starting,annoKey,annoDict)
        else:
            directory = synu.walk(syn,synId)
            for dirpath,dirname,filename in directory:
                for i in filename:
                    temp = syn.get(i[1],downloadFile = False)
                    print "Getting File %s ..." % i[1]
                    _helperUpdateFormatTypeByFileName(syn,temp,annoKey,annoDict)
def updateAnnoByMetadata(syn, synId, metaDf, refCol, cols2Add,fileExts):
    """
    Audit entity annotations against metadata
    :param syn:            A Synapse object: syn = synapseclient.login()- Must be logged into synapse
    :param synId:          A Synapse ID of Project, Folder, or File OR a list of Synpase IDs of File
    :param metaDf          A pandas data frame of entity metadata
    :param refCol          A name of the column in metaDf that matching one of the entity attributes
    :param cols2Add        A list of columns in metaDf need to be added as entity annotations 
    :param fileExts        A list of all file extensions (PsychENCODE ONLY!!!) 
        
    Example:
    
        updateAnnoByMetadata(syn,"syn12345",metadata,"id",["dataType","tester"],[".bam",".csv"])
       
    """
    
    if type(synId) is list:
        print "Input is a list of Synapse IDs \n"
        for synID in synId:
            print "Getting File %s ..." % synID
            temp = syn.get(synID,downloadFile = False)
            _helperUpdateAnnoByMetadata(syn,temp,metaDf,refCol,cols2Add,fileExts)
    else:
        print "Input is a Synpase ID \n"
        starting = syn.get(synId,downloadFile = False)
        if not is_container(starting):
            print "%s is a File \n" % synId
            _helperUpdateAnnoByMetadata(syn,starting,metaDf,refCol,cols2Add,fileExts)
        else:
            directory = synu.walk(syn,synId)
            for dirpath,dirname,filename in directory:
                for i in filename:
                    temp = syn.get(i[1],downloadFile = False)
                    print "Getting File %s ..." % i[1]
                    _helperUpdateAnnoByMetadata(syn,temp,metaDf,refCol,cols2Add,fileExts)
def searchInMetadata(syn, synId, metaDf, refCol,col2Check,values2Check,fileExts):
    """
    Search for a list of Synapse IDs with a given column and expected values
    :param syn:            A Synapse object: syn = synapseclient.login()- Must be logged into synapse
    :param synId:          A Synapse ID of Project, Folder
    :param metaDf          A pandas data frame of entity metadata
    :param refCol          A name of the column in metaDf that matching one of the entity attributes
    :param col2Check       A name of the column in metaDf you are seaching
    :param values2Check    A list of values you are searching
    :param fileExts        A list of all file extensions (PsychENCODE ONLY!!!) 
    
    Return:
      A list of Synapse IDs
        
    Example:
       IDs = searchInMetadata(syn,"syn123",metadata,"id","tester",["foo","bar"],[".bam",".csv"])
       
    """
    synapseIds = []
    print "Search in metadata for key: %s \n" % col2Check
    starting = syn.get(synId,downloadFile = False)
    if not is_container(starting):
        print "ERROR: %s is not a Synapse ID of Project or Folder" % synId
    else:
        directory = synu.walk(syn,synId)
        for dirpath,dirname,filename in directory:
            for i in filename:
                temp = syn.get(i[1],downloadFile = False)
                print "Getting File %s ..." % i[1]
                _helperSearchInMetadata(syn,temp,metaDf,refCol,col2Check,values2Check,fileExts,synapseIds)
                                    
        return synapseIds
def updateAnnoByDict(syn,synId,annoDict):
    """
    Update annotations by giving a dict
    :param syn:            A Synapse object: syn = synapseclient.login()- Must be logged into synapse
    :param synId:          A Synapse ID of Project, Folder, or File OR a list of Synpase IDs of File
    :param annoDict        A dict of annotations
    
    Example:

       updateAnnoByDict(syn,"syn12345",{"dataType":"testing","projectName":"foo"})
       OR
       updateAnnoByDict(syn,["syn1","syn2"],{"dataType":"testing","projectName":"foo"})
       
    """
    
    if type(synId) is list:
        print "Input is a list of Synapse IDs \n"
        for synID in synId:
            print "Getting File %s ..." % synID
            temp = syn.get(synID,downloadFile = False)
            _helperUpdateAnnoByDict(syn,temp,annoDict)
    else:
        print "Input is a Synpase ID \n"
        starting = syn.get(synId,downloadFile = False)
        if not is_container(starting):
            print "%s is a File \n" % synId
            _helperUpdateAnnoByDict(syn,starting,annoDict)
        else:
            directory = synu.walk(syn,synId)
            for dirpath,dirname,filename in directory:
                for i in filename:
                    temp = syn.get(i[1],downloadFile = False)
                    print "Getting File %s ..." % i[1]
                    _helperUpdateAnnoByDict(syn,temp,annoDict)
Ejemplo n.º 7
0
def test_is_container():
    ## result from a Synapse entity annotation query
    ## Note: prefix may be capitalized or not, depending on the from clause of the query
    result = {'entity.versionNumber': 1,
              'entity.nodeType': 'project',
              'entity.concreteType': ['org.sagebionetworks.repo.model.Project'],
              'entity.createdOn': 1451512703905,
              'entity.id': 'syn5570912',
              'entity.name': 'blah'}
    assert is_container(result)

    result = {'Entity.nodeType': 'project',
              'Entity.id': 'syn5570912',
              'Entity.name': 'blah'}
    assert is_container(result)

    result = {'entity.concreteType': ['org.sagebionetworks.repo.model.Folder'],
              'entity.id': 'syn5570914',
              'entity.name': 'flapdoodle'}
    assert is_container(result)

    result = {'File.concreteType': ['org.sagebionetworks.repo.model.FileEntity'],
              'File.id': 'syn5570914',
              'File.name': 'flapdoodle'}
    assert not is_container(result)

    assert is_container(Folder("Stuff", parentId="syn12345"))
    assert is_container(Project("My Project", parentId="syn12345"))
    assert not is_container(File("asdf.png", parentId="syn12345"))
def test_is_container__getChildren_results():
    file_result = {
        'versionLabel': '1',
        'name': 'firstPageResult',
        'versionNumber': 1,
        'benefactorId': 987,
        'type': 'org.sagebionetworks.repo.model.FileEntity',
        'id': 'syn123'
    }
    assert not is_container(file_result)
    folder_result = {
        'versionLabel': '1',
        'name': 'secondPageResult',
        'versionNumber': 1,
        'benefactorId': 654,
        'type': 'org.sagebionetworks.repo.model.Folder',
        'id': 'syn456'
    }
    assert is_container(folder_result)
Ejemplo n.º 9
0
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles = None):
    """Synchronizes all the files in a folder (including subfolders) from Synapse.

    :param syn:    A synapse object as obtained with syn = synapseclient.login()

    :param entity:  A Synapse ID, a Synapse Entity object of type folder or project.

    :param path: An optional path where the file hierarchy will be
                 reproduced.  If not specified the files will by default
                 be placed in the synapseCache.

    :param ifcollision:   Determines how to handle file collisions.
                          May be "overwrite.local", "keep.local", or "keep.both".
                          Defaults to "overwrite.local".


    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder
    specified by `id` and download all files that have not already
    been downloaded.  If there are newer files in Synapse (or a local
    file has been edited outside of the cache) since the last download
    then local the file will be replaced by the new file unless
    ifcollision is changed.

    Example::
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)
    """
    if allFiles is None: allFiles = list()
    id = id_of(entity)
    results = syn.chunkedQuery("select id, name, nodeType from entity where entity.parentId=='%s'" %id)
    for result in results:
        if is_container(result):
            if path is not None:  #If we are downloading outside cache create directory.
                new_path = os.path.join(path, result['entity.name'])
                try:
                    os.mkdir(new_path)
                except OSError as err:
                    if err.errno!=errno.EEXIST:
                        raise
                print('making dir', new_path)
            else:
                new_path = None
            syncFromSynapse(syn, result['entity.id'], new_path, ifcollision, allFiles)
        else:
            ent = syn.get(result['entity.id'], downloadLocation = path, ifcollision = ifcollision)
            allFiles.append(ent)
    return allFiles
Ejemplo n.º 10
0
def _helpWalk(syn,synId,newpath=None):
    starting = syn.get(synId,downloadFile=False)
    #If the first file is not a container, return immediately
    if newpath is None and not is_container(starting):
        return
    elif newpath is None:
        dirpath = (starting.name, synId)
    else:
        dirpath = (newpath,synId)
    dirs = []
    nondirs = []
    results = syn.getChildren(synId)
    for i in results:
        if is_container(i):
            dirs.append((i['name'],i['id']))
        else:
            nondirs.append((i['name'],i['id']))
    yield dirpath, dirs, nondirs
    for name in dirs:
        newpath = os.path.join(dirpath[0],name[0])
        for x in _helpWalk(syn, name[1], newpath=newpath):
            yield x
Ejemplo n.º 11
0
def _helpWalk(syn,synId,newpath=None):
    starting = syn.get(synId,downloadFile=False)
    #If the first file is not a container, return immediately
    if newpath is None and not is_container(starting):
        return
    elif newpath is None:
        dirpath = (starting.name, synId)
    else:
        dirpath = (newpath,synId)
    dirs = []
    nondirs = []
    results = syn.chunkedQuery('select id, name, nodeType from entity where parentId == "%s"'%synId)
    for i in results:
        if is_container(i):
            dirs.append((i['entity.name'],i['entity.id']))
        else:
            nondirs.append((i['entity.name'],i['entity.id']))
    yield dirpath, dirs, nondirs
    for name in dirs:
        newpath = os.path.join(dirpath[0],name[0])
        for x in _helpWalk(syn, name[1], newpath):
            yield x
Ejemplo n.º 12
0
def _helpWalk(syn, synId, newpath=None):
    starting = syn.get(synId, downloadFile=False)
    # If the first file is not a container, return immediately
    if newpath is None and not is_container(starting):
        return
    elif newpath is None:
        dirpath = (starting.name, synId)
    else:
        dirpath = (newpath, synId)
    dirs = []
    nondirs = []
    results = syn.getChildren(synId)
    for i in results:
        if is_container(i):
            dirs.append((i['name'], i['id']))
        else:
            nondirs.append((i['name'], i['id']))
    yield dirpath, dirs, nondirs
    for name in dirs:
        newpath = os.path.join(dirpath[0], name[0])
        for x in _helpWalk(syn, name[1], newpath=newpath):
            yield x
def auditAgainstMetadata(syn, synId, metaDf, refCol, cols2Check,fileExts):
    """
    Audit entity annotations against metadata
    :param syn:            A Synapse object: syn = synapseclient.login()- Must be logged into synapse
    :param synId:          A Synapse ID of Project, Folder, or File
    :param metaDf          A pandas data frame of entity metadata
    :param refCol          A name of the column in metaDf that matching one of the entity attributes
    :param cols2Check      A list of columns in metaDf need to be audited with entity annotations 
    :param fileExts        A list of all file extensions (PsychENCODE ONLY!!!) 
    
    Return:
      If synId is an ID of a Project/Folder
        entityMissMetadata:    A list of Synapse IDs that have no matching metadata
        incorrectAnnoated:     A dict object where key is the annotation key and value is a list of entities 
                               that were annotated incorrectly (i.e. {"sampleId":["syn1","syn2"]})
        missedAnno:            A dict object where key is the annotation key and value is a list of entities
                               that were missing the annotation (i.e. {"dataType":["syn3","syn4"]})
      If synId is an ID of a File
        entityMissMetadata:    A boolean. True when the entity has no matching metadata
        incorrectAnnoated:     A list of keys that were annotated incorrectly
        missedAnno:            A list of keys were not annotated
        
    Example:
       entityMissMetadata,incorrectAnnotated, missingAnno = 
               auditAgainstMetadata(syn,"syn12345",metadata,"id",["dataType","tester"],[".bam",".csv"])
       
    """
    entityMissMetadata = []
    incorrectAnnotated = {}
    missingAnno = {}
    print "Check annotations against metadata.\n"
    starting = syn.get(synId,downloadFile = False)
    if not is_container(starting):
        print "%s is a File \n" % synId
        _helperAuditMetadata(syn,starting,metaDf,refCol,cols2Check,fileExts,
                             entityMissMetadata,incorrectAnnotated,missingAnno)
        noMeta = False
        if len(entityMissMetadata):
            noMeta = True
        return noMeta,incorrectAnnotated.keys(),missingAnno.keys()
    else:
        directory = synu.walk(syn,synId)
        for dirpath,dirname,filename in directory:
            for i in filename:
                temp = syn.get(i[1],downloadFile = False)
                print "Getting File %s ..." % i[1]
                _helperAuditMetadata(syn,temp,metaDf,refCol,cols2Check,fileExts,
                                     entityMissMetadata,incorrectAnnotated,missingAnno)
        return entityMissMetadata,incorrectAnnotated,missingAnno
def auditFormatTypeByFileName(syn,synId,annoKey,annoDict):
    """
    Audit entity file type annotations by checking file name with file type annotation
    :param syn:            A Synapse object: syn = synapseclient.login()- Must be logged into synapse
    :param synId:          A Synapse ID of Project, Folder, or File
    :param annoKey:        The annotation key for file type. (i.e. "fileType", "fileFormat", or "formatType")
    :param annoDict        A dict where key is the extension of the filename, 
                           value is the corresponding file type value in entity annotations
    
    Return:
    
      If synId is an ID of a Project/Folder
        A dict with 3 keys and each value is a list of File Synapse ID 
        (i.e. {"incorrect":[], "missingInAnno":["syn1"], "missingInDict":["syn2","syn3"])
      If synId is an ID of a File
        A string with 4 possible answers:
            1. "correct"
            2. "incorrect"
            3. "missingInAnno" - no file type in entity annotations
            4. "missingInDict" - file type is not found in file type annoDict
        
    Example:
    
       result = auditFormatType(syn,"syn12345","fileType",{".bam":"bam", ".doc":"word", "bw":"bigwig"})
       
    """
    
    needAnnotated = {"incorrect":[],
                     "missingInAnno":[],
                     "missingInDict":[]}
    
    starting = syn.get(synId,downloadFile = False)
    if not is_container(starting):
        print "%s is a File \n" % synId
        _helperAuditFormatTypeByFileName(syn,starting,annoKey,annoDict,needAnnotated)
        result = "correct"
        for key in needAnnotated.keys(): 
            if len(needAnnotated[key]):
                result = key
        return result
    else:
        directory = synu.walk(syn,synId)
        for dirpath,dirname,filename in directory:
            for i in filename:
                temp = syn.get(i[1],downloadFile = False)
                print "Getting File %s ..." % i[1]
                _helperAuditFormatTypeByFileName(syn,temp,annoKey,annoDict,needAnnotated)
        return needAnnotated
def auditCommonDict(syn, synId, annoDict):
    """
    Audit entity annotations against common dictionary shared among all enities
    :param syn:            A Synapse object: syn = synapseclient.login()- Must be logged into synapse
    :param synId:          A Synapse ID of Project, Folder, or File
    :param annoDict        A dict of annotations shared among entities
    
    Return:
      If synId is an ID of a Project/Folder
        entityMissAllAnno:     A list of Synapse IDs that have not been annotatd
        incorrectAnnoated:     A dict where key is the annotation key and value is a list of entities 
                               that were annotated incorrectly (i.e. {"sampleId":["syn1","syn2"]})
        missedAnno:            A dict where key is the annotation key and value is a list of entities
                               that were missing the annotation (i.e. {"dataType":["syn3","syn4"]})
      If synId is an ID of a File
        entityMissAllAnno:     A boolean if synID is a ID of a File
        incorrectAnnoated:     A list of keys that were annotated incorrectly
        missedAnno:            A list of keys were missing the entity annotation
        
    Example:

       entityMissAllAnno, incorrectAnnoated, missingAnno = 
               auditCommonDict(syn,"syn12345",{"dataType":"testing","projectName":"foo"})
       
    """

    entityMissAllAnno = []
    incorrectAnnotated = {}
    missingAnno = {}
    print "Check annotations against common dictionary. \n"
    starting = syn.get(synId,downloadFile = False)
    if not is_container(starting):
        print "%s is a File \n" % synId
        _helperAuditCommonDict(syn,starting,annoDict,entityMissAllAnno,incorrectAnnotated,missingAnno)
        noAnno = False
        if len(entityMissAllAnno):
            noAnno = True
        return noAnno,incorrectAnnotated.keys(),missingAnno.keys()
    else:
        directory = synu.walk(syn,synId)
        for dirpath,dirname,filename in directory:
            for i in filename:
                temp = syn.get(i[1],downloadFile = False)
                print "Getting file %s ..." % i[1]
                _helperAuditCommonDict(syn,temp,annoDict,entityMissAllAnno,incorrectAnnotated,missingAnno)
        return entityMissAllAnno,incorrectAnnotated,missingAnno
Ejemplo n.º 16
0
def _recursiveGet(id, path, syn):
    """Traverses a heirarchy and download files and create subfolders as necessary."""
    from synapseclient.entity import is_container

    results = syn.chunkedQuery("select id, name, concreteType from entity where entity.parentId=='%s'" %id)
    for result in results:
        if is_container(result):
            new_path = os.path.join(path, result['entity.name'])
            try:
                os.mkdir(new_path)
            except OSError as err:
                if err.errno!=17:
                    raise
            print('making dir', new_path)
            _recursiveGet(result['entity.id'], new_path, syn)
        else:
            syn.get(result['entity.id'], downloadLocation=path)
Ejemplo n.º 17
0
def _recursiveGet(id, path, syn):
    """Traverses a heirarchy and download files and create subfolders as necessary."""
    from synapseclient.entity import is_container

    results = syn.chunkedQuery("select id, name, nodeType from entity where entity.parentId=='%s'" %id)
    for result in results:
        if is_container(result):
            new_path = os.path.join(path, result['entity.name'])
            try:
                os.mkdir(new_path)
            except OSError as err:
                if err.errno!=17:
                    raise
            print('making dir', new_path)
            _recursiveGet(result['entity.id'], new_path, syn)
        else:
            syn.get(result['entity.id'], downloadLocation=path)
Ejemplo n.º 18
0
def delAnnoByKey(syn, synId, keyList):
    """
    Delete annotations by key for a Synapse object
    :param syn:            A Synapse object: syn = synapseclient.login()- Must be logged into synapse
    :param synId:          A Synapse ID of Project, Folder, or File or a list of Synapse IDs
    :param keyList         A list of annotations keys that needs to be deleted
   
    Example:
    
       delAnnoByKey(syn,"syn12345",["dataType","projectName","sampleId"])
       OR
       delAnnoByKey(syn,["syn1","syn2","syn3"],["dataType"])
       
    """

    print "Delte entity annotations by key(s) - \n %s" % "\n".join(keyList)

    if type(synId) is list:
        print "Input is a list of Synapse IDs \n"
        for synID in synId:
            print "Getting File %s ..." % synID
            temp = syn.get(synID, downloadFile=False)
            _helperDelAnnoByKey(syn, temp, keyList)
    else:
        print "Input is a Synpase ID \n"
        starting = syn.get(synId, downloadFile=False)
        if not is_container(starting):
            print "%s is a File \n" % synId
            _helperDelAnnoByKey(syn, starting, keyList)
        else:
            directory = synu.walk(syn, synId)
            for dirpath, dirname, filename in directory:
                for i in filename:
                    temp = syn.get(i[1], downloadFile=False)
                    print "Getting File %s ..." % i[1]
                    _helperDelAnnoByKey(syn, temp, keyList)
Ejemplo n.º 19
0
def delAnnoByKey(syn,synId,keyList):
    """
    Delete annotations by key for a Synapse object
    :param syn:            A Synapse object: syn = synapseclient.login()- Must be logged into synapse
    :param synId:          A Synapse ID of Project, Folder, or File or a list of Synapse IDs
    :param keyList         A list of annotations keys that needs to be deleted
   
    Example:
    
       delAnnoByKey(syn,"syn12345",["dataType","projectName","sampleId"])
       OR
       delAnnoByKey(syn,["syn1","syn2","syn3"],["dataType"])
       
    """

    print "Delte entity annotations by key(s) - \n %s" % "\n".join(keyList)
    
    if type(synId) is list:
        print "Input is a list of Synapse IDs \n"
        for synID in synId:
            print "Getting File %s ..." % synID
            temp = syn.get(synID,downloadFile = False)
            _helperDelAnnoByKey(syn,temp,keyList)
    else:
        print "Input is a Synpase ID \n"
        starting = syn.get(synId,downloadFile = False)
        if not is_container(starting):
            print "%s is a File \n" % synId
            _helperDelAnnoByKey(syn,starting,keyList)
        else:
            directory = synu.walk(syn,synId)
            for dirpath,dirname,filename in directory:
                for i in filename:
                    temp = syn.get(i[1],downloadFile = False)
                    print "Getting File %s ..." % i[1]
                    _helperDelAnnoByKey(syn,temp,keyList)
def test_is_container():
    # result from a Synapse entity annotation query
    # Note: prefix may be capitalized or not, depending on the from clause of the query
    result = {
        'entity.versionNumber': 1,
        'entity.nodeType': 'project',
        'entity.concreteType': ['org.sagebionetworks.repo.model.Project'],
        'entity.createdOn': 1451512703905,
        'entity.id': 'syn5570912',
        'entity.name': 'blah'
    }
    assert is_container(result)

    result = {
        'Entity.nodeType': 'project',
        'Entity.id': 'syn5570912',
        'Entity.name': 'blah'
    }
    assert is_container(result)

    result = {
        'entity.concreteType': ['org.sagebionetworks.repo.model.Folder'],
        'entity.id': 'syn5570914',
        'entity.name': 'flapdoodle'
    }
    assert is_container(result)

    result = {
        'File.concreteType': ['org.sagebionetworks.repo.model.FileEntity'],
        'File.id': 'syn5570914',
        'File.name': 'flapdoodle'
    }
    assert not is_container(result)

    assert is_container(Folder("Stuff", parentId="syn12345"))
    assert is_container(Project("My Project", parentId="syn12345"))
    assert not is_container(File("asdf.png", parentId="syn12345"))
Ejemplo n.º 21
0
def readManifestFile(syn, manifestFile):
    """Verifies a file manifest and returns a reordered dataframe ready for upload.

    :param syn:             A synapse object as obtained with syn = synapseclient.login()

    :param manifestFile:    A tsv file with file locations and metadata to be pushed to Synapse.
                            See below for details

    :returns A pandas dataframe if the manifest is validated.

    See also for a description of the file format:
        - :py:func:`synapseutils.sync.syncToSynapse`
    """
    table.test_import_pandas()
    import pandas as pd

    sys.stdout.write('Validation and upload of: %s\n' % manifestFile)
    # Read manifest file into pandas dataframe
    df = pd.read_csv(manifestFile, sep='\t')
    if 'synapseStore' not in df:
        df = df.assign(synapseStore=None)
    df.synapseStore[df['path'].apply(
        is_url
    )] = False  # override synapseStore values to False when path is a url
    df.synapseStore[df['synapseStore'].isnull(
    )] = True  # remaining unset values default to True
    df.synapseStore = df.synapseStore.astype(bool)
    df = df.fillna('')

    sys.stdout.write('Validating columns of manifest...')
    for field in REQUIRED_FIELDS:
        sys.stdout.write('.')
        if field not in df.columns:
            sys.stdout.write('\n')
            raise ValueError("Manifest must contain a column of %s" % field)
    sys.stdout.write('OK\n')

    sys.stdout.write('Validating that all paths exist')
    df.path = df.path.apply(_check_path_and_normalize)

    sys.stdout.write('OK\n')

    sys.stdout.write('Validating that all files are unique...')
    if len(df.path) != len(set(df.path)):
        raise ValueError(
            "All rows in manifest must contain a unique file to upload")
    sys.stdout.write('OK\n')

    sys.stdout.write('Validating provenance...')
    df = _sortAndFixProvenance(syn, df)
    sys.stdout.write('OK\n')

    sys.stdout.write('Validating that parents exist and are containers...')
    parents = set(df.parent)
    for synId in parents:
        try:
            container = syn.get(synId, downloadFile=False)
        except SynapseHTTPError:
            sys.stdout.write(
                '\n%s in the parent column is not a valid Synapse Id\n' %
                synId)
            raise
        if not is_container(container):
            sys.stdout.write(
                '\n%s in the parent column is is not a Folder or Project\n' %
                synId)
            raise SynapseHTTPError
    sys.stdout.write('OK\n')
    return df
Ejemplo n.º 22
0
    def _sync_root(self,
                   root,
                   root_path,
                   ifcollision,
                   followLink,
                   progress,
                   downloadFile,
                   manifest="all"):
        # stack elements are a 3-tuple of:
        # 1. the folder entity/dict
        # 2. the local path to the folder to download to
        # 3. the FolderSync of the parent to the folder (None at the root)

        create_root_manifest = True if manifest != "suppress" else False
        folder_stack = [(root, root_path, None, create_root_manifest)]
        create_child_manifest = True if manifest == "all" else False

        root_folder_sync = None
        while folder_stack:
            if root_folder_sync:
                # if at any point the sync encounters an exception it will
                # be communicated up to the root at which point we should abort
                exception = root_folder_sync.get_exception()
                if exception:
                    raise ValueError(
                        "File download failed during sync") from exception

            folder, parent_path, parent_folder_sync, create_manifest = folder_stack.pop(
            )

            entity_id = id_of(folder)
            folder_path = None
            if parent_path is not None:
                folder_path = parent_path
                if root_folder_sync:
                    # syncFromSynapse behavior is that we do NOT create a folder for the root folder of the sync.
                    # we treat the download local path folder as the root and write the children of the sync
                    # directly into that local folder
                    folder_path = os.path.join(folder_path, folder['name'])
                os.makedirs(folder_path, exist_ok=True)

            child_ids = []
            child_file_ids = []
            child_folders = []
            for child in self._syn.getChildren(entity_id):
                child_id = id_of(child)
                child_ids.append(child_id)
                if is_container(child):
                    child_folders.append(child)
                else:
                    child_file_ids.append(child_id)

            folder_sync = _FolderSync(
                self._syn,
                entity_id,
                folder_path,
                child_ids,
                parent_folder_sync,
                create_manifest=create_manifest,
            )
            if not root_folder_sync:
                root_folder_sync = folder_sync

            if not child_ids:
                # this folder has no children, so it is immediately finished
                folder_sync.update()

            else:
                for child_file_id in child_file_ids:
                    self._file_semaphore.acquire()
                    self._executor.submit(
                        self._sync_file,
                        child_file_id,
                        folder_sync,
                        folder_path,
                        ifcollision,
                        followLink,
                        progress,
                        downloadFile,
                    )

                for child_folder in child_folders:
                    folder_stack.append((child_folder, folder_path,
                                         folder_sync, create_child_manifest))

        return root_folder_sync
Ejemplo n.º 23
0
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles = None, followLink=False):
    """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file metadata.

    :param syn:    A synapse object as obtained with syn = synapseclient.login()

    :param entity:  A Synapse ID, a Synapse Entity object of type folder or project.

    :param path: An optional path where the file hierarchy will be
                 reproduced.  If not specified the files will by default
                 be placed in the synapseCache.

    :param ifcollision:   Determines how to handle file collisions.
                          May be "overwrite.local", "keep.local", or "keep.both".
                          Defaults to "overwrite.local".

    :param followLink:  Determines whether the link returns the target Entity.
                        Defaults to False

    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder
    specified by `entity` and download all files that have not already
    been downloaded.  If there are newer files in Synapse (or a local
    file has been edited outside of the cache) since the last download
    then local the file will be replaced by the new file unless
    ifcollision is changed.

    If the files are being downloaded to a specific location outside
    of the Synapse Cache We will also add a file
    (SYNAPSE_METADATA_MANIFEST.tsv) in the path that contains
    the metadata (annotations, storage location and provenance of all
    downloaded files)

    See also: 
    - :py:func:`synapseutils.sync.syncToSynapse`

    Example::
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)

    """
    if allFiles is None: allFiles = list()
    id = id_of(entity)
    results = syn.chunkedQuery("select id, name, nodeType from entity where entity.parentId=='%s'" %id)
    for result in results:
        if is_container(result):
            if path is not None:  #If we are downloading outside cache create directory.
                new_path = os.path.join(path, result['entity.name'])
                try:
                    os.mkdir(new_path)
                except OSError as err:
                    if err.errno!=errno.EEXIST:
                        raise
                print('making dir', new_path)
            else:
                new_path = None
            syncFromSynapse(syn, result['entity.id'], new_path, ifcollision, allFiles)
        else:
            ent = syn.get(result['entity.id'], downloadLocation = path, ifcollision = ifcollision, followLink=followLink)
            allFiles.append(ent)
            
    if path is not None:  #If path is None files are stored in cache.
        filename = os.path.join(path, MANIFEST_FILENAME)
        filename = os.path.expanduser(os.path.normcase(filename))
        generateManifest(syn, allFiles, filename)
    return allFiles
Ejemplo n.º 24
0
def syncFromSynapse(syn,
                    entity,
                    path=None,
                    ifcollision='overwrite.local',
                    allFiles=None,
                    followLink=False):
    """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file
    metadata.

    :param syn:         A synapse object as obtained with syn = synapseclient.login()

    :param entity:      A Synapse ID, a Synapse Entity object of type file, folder or project.

    :param path:        An optional path where the file hierarchy will be reproduced. If not specified the files will by
                        default be placed in the synapseCache.

    :param ifcollision: Determines how to handle file collisions. Maybe "overwrite.local", "keep.local", or "keep.both".
                        Defaults to "overwrite.local".

    :param followLink:  Determines whether the link returns the target Entity.
                        Defaults to False

    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder specified by `entity` and download all files that have
    not already been downloaded.  If there are newer files in Synapse (or a local file has been edited outside of the
    cache) since the last download then local the file will be replaced by the new file unless "ifcollision" is changed.

    If the files are being downloaded to a specific location outside of the Synapse cache a file
    (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains the metadata (annotations, storage
    location and provenance of all downloaded files).

    See also:
    - :py:func:`synapseutils.sync.syncToSynapse`

    Example:
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)

    """
    # initialize the result list
    if allFiles is None:
        allFiles = list()

    # perform validation check on user input
    if is_synapse_id(entity):
        entity = syn.get(entity,
                         downloadLocation=path,
                         ifcollision=ifcollision,
                         followLink=followLink)

    if isinstance(entity, File):
        allFiles.append(entity)
        return allFiles

    entity_id = id_of(entity)
    if not is_container(entity):
        raise ValueError(
            "The provided id: %s is neither a container nor a File" %
            entity_id)

    # get the immediate children as iterator
    children = syn.getChildren(entity_id)

    # process each child
    for child in children:
        if is_container(child):
            # If we are downloading outside cache create directory
            if path is not None:
                new_path = os.path.join(path, child['name'])
                try:
                    os.makedirs(new_path)
                except OSError as err:
                    if err.errno != errno.EEXIST:
                        raise
            else:
                new_path = None
            # recursively explore this container's children
            syncFromSynapse(syn,
                            child['id'],
                            new_path,
                            ifcollision,
                            allFiles,
                            followLink=followLink)
        else:
            # getting the child
            ent = syn.get(child['id'],
                          downloadLocation=path,
                          ifcollision=ifcollision,
                          followLink=followLink)
            if isinstance(ent, File):
                allFiles.append(ent)

    if path is not None:  # If path is None files are stored in cache.
        filename = os.path.join(path, MANIFEST_FILENAME)
        filename = os.path.expanduser(os.path.normcase(filename))
        generateManifest(syn, allFiles, filename)

    return allFiles
Ejemplo n.º 25
0
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles = None, followLink=False):
    """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file metadata.

    :param syn:    A synapse object as obtained with syn = synapseclient.login()

    :param entity:  A Synapse ID, a Synapse Entity object of type folder or project.

    :param path: An optional path where the file hierarchy will be
                 reproduced.  If not specified the files will by default
                 be placed in the synapseCache.

    :param ifcollision:   Determines how to handle file collisions.
                          May be "overwrite.local", "keep.local", or "keep.both".
                          Defaults to "overwrite.local".

    :param followLink:  Determines whether the link returns the target Entity.
                        Defaults to False

    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder
    specified by `entity` and download all files that have not already
    been downloaded.  If there are newer files in Synapse (or a local
    file has been edited outside of the cache) since the last download
    then local the file will be replaced by the new file unless
    "ifcollision" is changed.

    If the files are being downloaded to a specific location outside
    of the Synapse cache a file
    (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains
    the metadata (annotations, storage location and provenance of all
    downloaded files).

    See also:
    - :py:func:`synapseutils.sync.syncToSynapse`

    Example:
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)

    """
    if allFiles is None: allFiles = list()
    id = id_of(entity)
    results = syn.getChildren(id)
    zero_results = True
    for result in results:
        zero_results = False
        if is_container(result):
            if path is not None:  #If we are downloading outside cache create directory.
                new_path = os.path.join(path, result['name'])
                try:
                    os.makedirs(new_path)
                except OSError as err:
                    if err.errno!=errno.EEXIST:
                        raise
                print('making dir', new_path)
            else:
                new_path = None
            syncFromSynapse(syn, result['id'], new_path, ifcollision, allFiles)
        else:
            ent = syn.get(result['id'], downloadLocation = path, ifcollision = ifcollision, followLink=followLink)
            if isinstance(ent, File):
                allFiles.append(ent)
    if zero_results:
        #a http error would be raised if the synapse Id was not valid (404) or no permission (403) so at this point the entity should be get-able
        stderr.write("The synapse id %s is not a container (Project/Folder), attempting to get the entity anyways" % id)
        ent = syn.get(id, downloadLocation=path, ifcollision=ifcollision, followLink=followLink)
        if isinstance(ent, File):
            allFiles.append(ent)
        else:
            raise ValueError("The provided id: %s is was neither a container nor a File" % id)

    if path is not None:  #If path is None files are stored in cache.
        filename = os.path.join(path, MANIFEST_FILENAME)
        filename = os.path.expanduser(os.path.normcase(filename))
        generateManifest(syn, allFiles, filename)

    return allFiles