Example #1
0
def remove_team_member(syn, team, user):
    """Removes team member

    Args:
        syn: Synapse object
        team: synaspeclient.Team or its id
        user: synapseclient.UserProfile or its id
    """
    teamid = id_of(team)
    userid = id_of(user)
    syn.restDELETE(f"/team/{teamid}/member/{userid}")
Example #2
0
def test_entity_view_add_annotation_columns(syn, project,
                                            schedule_for_cleanup):
    folder1 = syn.store(
        Folder(name=str(uuid.uuid4()) +
               'test_entity_view_add_annotation_columns_proj1',
               parent=project,
               annotations={
                   'strAnno': 'str1',
                   'intAnno': 1,
                   'floatAnno': 1.1
               }))
    folder2 = syn.store(
        Folder(name=str(uuid.uuid4()) +
               'test_entity_view_add_annotation_columns_proj2',
               parent=project,
               annotations={
                   'dateAnno': datetime.now(),
                   'strAnno': 'str2',
                   'intAnno': 2
               }))
    schedule_for_cleanup(folder1)
    schedule_for_cleanup(folder2)
    scopeIds = [utils.id_of(folder1), utils.id_of(folder2)]

    # This test is to ensure that user code which use the deprecated field `type` continue to work
    # TODO: remove this test case in Synapse Python client 2.0
    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=False,
                                   addAnnotationColumns=True,
                                   type='project',
                                   parent=project)
    syn.store(entity_view)
    # This test is to ensure that user code which use the deprecated field `type` continue to work
    # TODO: remove this test case in Synapse Python client 2.0
    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=False,
                                   addAnnotationColumns=True,
                                   type='file',
                                   includeEntityTypes=[EntityViewType.PROJECT],
                                   parent=project)
    syn.store(entity_view)

    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=False,
                                   addAnnotationColumns=True,
                                   includeEntityTypes=[EntityViewType.PROJECT],
                                   parent=project)
    syn.store(entity_view)
Example #3
0
def get_forum_threads(syn,
                      ent,
                      query_filter='EXCLUDE_DELETED',
                      limit=20,
                      offset=0):
    """
    Gets threads from a forum

    Args:
        syn: synapse object
        ent: Synapse Project entity or id
        query_filter:  filter forum threads returned. Can be NO_FILTER,
                       DELETED_ONLY, EXCLUDE_DELETED.
                       Defaults to EXCLUDE_DELETED.

    Yields:
        list: Forum threads
    """
    api = DiscussionApi(syn)
    synid = id_of(ent)
    forum_obj = api.get_project_forum(synid)
    response = api.get_forum_threads(forum_obj.id,
                                     query_filter=query_filter,
                                     limit=limit,
                                     offset=offset)
    return response
Example #4
0
def test_id_of():
    assert utils.id_of(1) == '1'
    assert utils.id_of('syn12345') == 'syn12345'
    assert utils.id_of({'foo': 1, 'id': 123}) == '123'
    pytest.raises(ValueError, utils.id_of, {'foo': 1, 'idzz': 123})
    assert utils.id_of({'properties': {'id': 123}}) == '123'
    pytest.raises(ValueError, utils.id_of, {'properties': {'qq': 123}})
    pytest.raises(ValueError, utils.id_of, object())

    class Foo:
        def __init__(self, id_attr_name, id):
            self.properties = {id_attr_name: id}

    id_attr_names = ['id', 'ownerId', 'tableId']

    for attr_name in id_attr_names:
        foo = Foo(attr_name, 123)
        assert utils.id_of(foo) == '123'
Example #5
0
def create_challenge(syn, entity, team):
    """Creates Challenge associated with a Project

    See the definition of a Challenge object here:
    https://docs.synapse.org/rest/org/sagebionetworks/repo/model/Challenge.html

    Args:
        syn: Synapse connection
        entity: An Entity or Synapse ID of a Project.
        team: A Team or Team ID.

    Returns:
        Challenge object
    """
    synid = id_of(entity)
    teamid = id_of(team)
    challenge_object = {'participantTeamId': teamid, 'projectId': synid}
    challenge = syn.restPOST('/challenge', json.dumps(challenge_object))
    challenge_obj = Challenge(**challenge)
    return challenge_obj
Example #6
0
def test_store_table_datetime(syn, project):
    current_datetime = datetime.fromtimestamp(round(time.time(), 3))
    schema = syn.store(
        Schema("testTable", [Column(name="testerino", columnType='DATE')],
               project))
    rowset = RowSet(rows=[Row([current_datetime])], schema=schema)
    syn.store(Table(schema, rowset))

    query_result = syn.tableQuery("select * from %s" % utils.id_of(schema),
                                  resultsAs="rowset")
    assert current_datetime == query_result.rowset['rows'][0]['values'][0]
def create_challenge(syn: Synapse, project: Union[Project, str],
                     team: Union[Team, str]) -> Challenge:
    """Creates Challenge associated with a Project

    Args:
        syn: Synapse connection
        project: A synapseclient.Project or its id
        team: A synapseclient.Team or its id

    Returns:
        Challenge object

    """
    synid = id_of(project)
    teamid = id_of(team)

    challenge_api = ChallengeApi(syn=syn)
    challenge_obj = challenge_api.create_challenge(projectid=synid,
                                                   teamid=teamid)
    return challenge_obj
Example #8
0
    def _test_method(self, syn, schema, resultsAs, partial_changes,
                     expected_results):
        query_results = self._query_with_retry(
            syn, "SELECT * FROM %s" % utils.id_of(schema), resultsAs, 2, None,
            QUERY_TIMEOUT_SEC)
        assert query_results is not None
        df = query_results.asDataFrame(rowIdAndVersionInIndex=False)

        partial_changes = {
            df['ROW_ID'][i]: row_changes
            for i, row_changes in enumerate(partial_changes)
        }

        partial_rowset = PartialRowset.from_mapping(partial_changes,
                                                    query_results)
        syn.store(partial_rowset)

        assert self._query_with_retry(syn,
                                      "SELECT * FROM %s" % utils.id_of(schema),
                                      resultsAs, None, expected_results,
                                      QUERY_TIMEOUT_SEC) is not None
Example #9
0
 def __init__(self,
              targetId=None,
              targetVersion=None,
              parent=None,
              properties=None,
              annotations=None,
              local_state=None,
              **kwargs):
     if targetId is not None and targetVersion is not None:
         kwargs['linksTo'] = dict(targetId=utils.id_of(targetId),
                                  targetVersionNumber=targetVersion)
     elif targetId is not None and targetVersion is None:
         kwargs['linksTo'] = dict(targetId=utils.id_of(targetId))
     elif properties is not None and 'linksTo' in properties:
         pass
     else:
         raise SynapseMalformedEntityError("Must provide a target id")
     super(Link, self).__init__(concreteType=Link._synapse_entity_type,
                                properties=properties,
                                annotations=annotations,
                                local_state=local_state,
                                parent=parent,
                                **kwargs)
def get_user_entity_permissions(syn, entity):
    """Gets the list of permission that the caller has on a given Entity.
    https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/auth/UserEntityPermissions.html

    Args:
        syn: Synapse connection
        entity: Synapse id or Entity

    Returns:
        UserEntityPermissions
    """
    synid = id_of(entity)
    permissions = syn.restGET("/entity/{}/permissions".format(synid))
    return permissions
Example #11
0
def get_challenge(syn, entity):
    """Get the Challenge associated with a Project.

    See the definition of a Challenge object here:
    https://docs.synapse.org/rest/org/sagebionetworks/repo/model/Challenge.html

    Args:
        entity: An Entity or Synapse ID of a Project.

    Returns:
        Challenge object
    """
    synid = id_of(entity)
    challenge = syn.restGET("/entity/%s/challenge" % synid)
    challenge_obj = Challenge(**challenge)
    return challenge_obj
def get_challenge(syn: Synapse, project: Union[Project, str]) -> Challenge:
    """Get the Challenge associated with a Project.

    See the definition of a Challenge object here:
    https://docs.synapse.org/rest/org/sagebionetworks/repo/model/Challenge.html

    Args:
        syn: Synapse connection
        project: A synapseclient.Project or its id

    Returns:
        Challenge object

    """
    synid = id_of(project)
    challenge_api = ChallengeApi(syn=syn)
    challenge_obj = challenge_api.get_challenge(projectid=synid)
    return challenge_obj
Example #13
0
def copy_reply(syn, reply, thread):
    """Copies a discussion thread reply to a thread

    Args:
        syn: synapse object
        reply: Synapse Reply
        thread: Synapse thread or threadid to copy reply to

    Returns:
        dict: Reply bundle
    """
    threadid = id_of(thread)
    author = reply.createdby
    username = syn.getUserProfile(author)['userName']
    on_behalf_of = "On behalf of @{user}\n\n".format(user=username)
    text = get_thread_reply_text(syn, reply)
    new_reply_text = on_behalf_of + text
    return create_thread_reply(syn, threadid, new_reply_text)
Example #14
0
def create_thread(syn, ent, title, message):
    '''
    Create a thread

    Args:
        syn: synapse object
        ent: Synapse Project entity or id
        title: title of thread
        message: message in thread

    Returns:
        dict: Thread bundle
    '''
    api = DiscussionApi(syn)
    synid = id_of(ent)
    forum_obj = api.get_project_forum(synid)
    thread_obj = api.post_thread(forum_obj.id, title, message)
    return thread_obj
Example #15
0
def get_forum_participants(syn, ent):
    '''
    Get all forum participants

    Args:
        ent: Synapse Project entity or id
        synid: Synapse Project id

    Return:
        list: user profiles active in forum
    '''
    synid = id_of(ent)
    threads = get_forum_threads(syn, synid)
    users = set()
    for thread in threads:
        unique_users = set(thread['activeAuthors'])
        users.update(unique_users)
    userprofiles = [syn.getUserProfile(user) for user in users]
    return userprofiles
Example #16
0
def copy_thread(syn, thread, project):
    """Copies a discussion thread to a project

    Args:
        syn: synapse object
        thread: Synapse Thread
        project: Synapse Project or its id to copy thread to

    Returns:
        dict: Thread bundle
    """
    projectid = id_of(project)
    title = thread['title']
    author = thread['createdBy']
    username = syn.getUserProfile(author)['userName']
    on_behalf_of = "On behalf of @{user}\n\n".format(user=username)
    text = get_thread_text(syn, thread['messageKey'])
    new_thread_text = on_behalf_of + text

    return create_thread(syn, projectid, title, new_thread_text)
Example #17
0
def get_thread_replies(syn: Synapse, thread: Thread,
                       **kwargs) -> Iterator[Reply]:
    """Gets replies of a thread

    Args:
        syn: synapse object
        thread: Synapse thread or id
        **kwargs: query_filter: filter forum threads returned. Can be,
                  NO_FILTER, DELETED_ONLY, EXCLUDE_DELETED.
                  Defaults to EXCLUDE_DELETED.
                  limit - Number of query results
                  offset -  Page of query result

    Yields:
        synapseservices.Reply

    """
    api = DiscussionApi(syn)
    threadid = id_of(thread)
    replies = api.get_thread_replies(threadid, **kwargs)
    return replies
Example #18
0
def _copy_thread(syn, thread: Thread, project: Union[Project, str]) -> Thread:
    """Copies a discussion thread to a project

    Args:
        syn: synapse object
        thread: Synapse Thread
        project: Synapse Project or its id to copy thread to

    Returns:
        synapseservices.Thread
    """
    projectid = id_of(project)
    title = thread.title
    author = thread.createdby
    username = syn.getUserProfile(author)['userName']
    on_behalf_of = f"On behalf of @{username}\n\n"
    text = get_thread_text(syn, thread)
    new_thread_text = on_behalf_of + text
    new_thread_obj = create_thread(syn, projectid, title, new_thread_text)

    return new_thread_obj
Example #19
0
    def __init__(self, **kwargs):
        # Verify that the parameters are correct
        if 'owner' not in kwargs:
            raise ValueError('Wiki constructor must have an owner specified')

        # Initialize the file handle list to be an empty list
        if 'attachmentFileHandleIds' not in kwargs:
            kwargs['attachmentFileHandleIds'] = []

        # update the markdown
        self.update_markdown(kwargs.pop('markdown', None),
                             kwargs.pop('markdownFile', None))

        # Move the 'fileHandles' into the proper (wordier) bucket
        if 'fileHandles' in kwargs:
            for handle in kwargs['fileHandles']:
                kwargs['attachmentFileHandleIds'].append(handle)
            del kwargs['fileHandles']

        super(Wiki, self).__init__(kwargs)
        self.ownerId = id_of(self.owner)
        del self['owner']
Example #20
0
def get_forum_threads(syn: Synapse, ent: Union[Project, str],
                      **kwargs) -> Iterator[Thread]:
    """
    Gets threads from a forum

    Args:
        syn: synapse object
        ent: Synapse Project entity or id
        **kwargs: query_filter - filter forum threads returned. Can be,
                  NO_FILTER, DELETED_ONLY, EXCLUDE_DELETED.
                  Defaults to EXCLUDE_DELETED.
                  limit - Number of query results
                  offset -  Page of query result

    Yields:
        synapseservices.Thread

    """
    api = DiscussionApi(syn)
    synid = id_of(ent)
    forum_obj = api.get_project_forum(synid)
    threads = api.get_forum_threads(forum_obj.id, **kwargs)
    return threads
Example #21
0
def get_thread_replies(syn,
                       thread,
                       query_filter='EXCLUDE_DELETED',
                       limit=20,
                       offset=0):
    """Gets replies of a thread

    Args:
        syn: synapse object
        thread: Synapse thread or id
        query_filter:  filter forum threads returned. Can be NO_FILTER,
                       DELETED_ONLY, EXCLUDE_DELETED.
                       Defaults to EXCLUDE_DELETED.

    Yields:
        list: Thread replies
    """
    api = DiscussionApi(syn)
    threadid = id_of(thread)
    response = api.get_thread_replies(threadid,
                                      query_filter=query_filter,
                                      limit=limit,
                                      offset=offset)
    return response
 def syn_get_side_effect(entity, *args, **kwargs):
     return entities[id_of(entity)]
Example #23
0
def syncFromSynapse(syn,
                    entity,
                    path=None,
                    ifcollision='overwrite.local',
                    allFiles=None,
                    followLink=False):
    """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file
    metadata.

    :param syn:         A synapse object as obtained with syn = synapseclient.login()

    :param entity:      A Synapse ID, a Synapse Entity object of type file, folder or project.

    :param path:        An optional path where the file hierarchy will be reproduced. If not specified the files will by
                        default be placed in the synapseCache.

    :param ifcollision: Determines how to handle file collisions. Maybe "overwrite.local", "keep.local", or "keep.both".
                        Defaults to "overwrite.local".

    :param followLink:  Determines whether the link returns the target Entity.
                        Defaults to False

    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder specified by `entity` and download all files that have
    not already been downloaded.  If there are newer files in Synapse (or a local file has been edited outside of the
    cache) since the last download then local the file will be replaced by the new file unless "ifcollision" is changed.

    If the files are being downloaded to a specific location outside of the Synapse cache a file
    (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains the metadata (annotations, storage
    location and provenance of all downloaded files).

    See also:
    - :py:func:`synapseutils.sync.syncToSynapse`

    Example:
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)

    """
    # initialize the result list
    if allFiles is None:
        allFiles = list()

    # perform validation check on user input
    if is_synapse_id(entity):
        entity = syn.get(entity,
                         downloadLocation=path,
                         ifcollision=ifcollision,
                         followLink=followLink)

    if isinstance(entity, File):
        allFiles.append(entity)
        return allFiles

    entity_id = id_of(entity)
    if not is_container(entity):
        raise ValueError(
            "The provided id: %s is neither a container nor a File" %
            entity_id)

    # get the immediate children as iterator
    children = syn.getChildren(entity_id)

    # process each child
    for child in children:
        if is_container(child):
            # If we are downloading outside cache create directory
            if path is not None:
                new_path = os.path.join(path, child['name'])
                try:
                    os.makedirs(new_path)
                except OSError as err:
                    if err.errno != errno.EEXIST:
                        raise
            else:
                new_path = None
            # recursively explore this container's children
            syncFromSynapse(syn,
                            child['id'],
                            new_path,
                            ifcollision,
                            allFiles,
                            followLink=followLink)
        else:
            # getting the child
            ent = syn.get(child['id'],
                          downloadLocation=path,
                          ifcollision=ifcollision,
                          followLink=followLink)
            if isinstance(ent, File):
                allFiles.append(ent)

    if path is not None:  # If path is None files are stored in cache.
        filename = os.path.join(path, MANIFEST_FILENAME)
        filename = os.path.expanduser(os.path.normcase(filename))
        generateManifest(syn, allFiles, filename)

    return allFiles
Example #24
0
    def __init__(self,
                 properties=None,
                 annotations=None,
                 local_state=None,
                 parent=None,
                 **kwargs):

        if properties:
            if isinstance(properties, collections.abc.Mapping):
                if 'annotations' in properties and isinstance(
                        properties['annotations'], collections.abc.Mapping):
                    annotations.update(properties['annotations'])
                    del properties['annotations']
                self.__dict__['properties'].update(properties)
            else:
                raise SynapseMalformedEntityError(
                    'Unknown argument type: properties is a %s' %
                    str(type(properties)))

        if annotations:
            if isinstance(annotations, collections.abc.Mapping):
                self.__dict__['annotations'].update(annotations)
            elif isinstance(annotations, str):
                self.properties['annotations'] = annotations
            else:
                raise SynapseMalformedEntityError(
                    'Unknown argument type: annotations is a %s' %
                    str(type(annotations)))

        if local_state:
            if isinstance(local_state, collections.abc.Mapping):
                self.local_state(local_state)
            else:
                raise SynapseMalformedEntityError(
                    'Unknown argument type: local_state is a %s' %
                    str(type(local_state)))

        for key in self.__class__._local_keys:
            if key not in self.__dict__:
                self.__dict__[key] = None

        # Extract parentId from parent
        if 'parentId' not in kwargs:
            if parent:
                try:
                    kwargs['parentId'] = id_of(parent)
                except Exception:
                    if isinstance(parent, Entity) and 'id' not in parent:
                        raise SynapseMalformedEntityError(
                            "Couldn't find 'id' of parent."
                            " Has it been stored in Synapse?")
                    else:
                        raise SynapseMalformedEntityError(
                            "Couldn't find 'id' of parent.")

        # Note: that this will work properly if derived classes declare their internal state variable *before* invoking
        # super(...).__init__(...)
        for key, value in kwargs.items():
            self.__setitem__(key, value)

        if 'concreteType' not in self:
            self['concreteType'] = self.__class__._synapse_entity_type

        # Only project can be top-level. All other entity types require parentId don't enforce this for generic Entity
        if 'parentId' not in self \
                and not isinstance(self, Project) \
                and not type(self) == Entity:
            raise SynapseMalformedEntityError(
                "Entities of type %s must have a parentId." % type(self))
Example #25
0
def test_migrate_project(request, syn, schedule_for_cleanup,
                         storage_location_id):
    test_name = request.node.name
    project_name = "{}-{}".format(test_name, uuid.uuid4())
    project = synapseclient.Project(name=project_name)
    project_entity = syn.store(project)

    file_0_path = _create_temp_file()
    schedule_for_cleanup(file_0_path)
    file_0_name = "{}-{}".format(test_name, 1)
    file_0 = synapseclient.File(name=file_0_name,
                                path=file_0_path,
                                parent=project_entity)
    file_0_entity = syn.store(file_0)
    default_storage_location_id = file_0_entity._file_handle[
        'storageLocationId']

    folder_1_name = "{}-{}-{}".format(test_name, 1, uuid.uuid4())
    folder_1 = synapseclient.Folder(parent=project_entity, name=folder_1_name)
    folder_1_entity = syn.store(folder_1)

    file_1_path = _create_temp_file()
    schedule_for_cleanup(file_1_path)
    file_1_name = "{}-{}".format(test_name, 1)
    file_1 = synapseclient.File(name=file_1_name,
                                path=file_1_path,
                                parent=folder_1_entity)
    file_1_entity = syn.store(file_1)

    file_2_path = _create_temp_file()
    schedule_for_cleanup(file_2_path)
    file_2_name = "{}-{}".format(test_name, 2)
    file_2 = synapseclient.File(name=file_2_name,
                                path=file_2_path,
                                parent=folder_1_entity)
    file_2_entity = syn.store(file_2)

    # file 3 shares the same file handle id as file 1
    file_3_path = file_1_path
    file_3_name = "{}-{}".format(test_name, 3)
    file_3 = synapseclient.File(name=file_3_name,
                                path=file_3_path,
                                parent=folder_1_entity)
    file_3.dataFileHandleId = file_1_entity.dataFileHandleId
    file_3_entity = syn.store(file_3)

    table_1_cols = [
        synapseclient.Column(name='file_col_1', columnType='FILEHANDLEID'),
        synapseclient.Column(name='num', columnType='INTEGER'),
        synapseclient.Column(name='file_col_2', columnType='FILEHANDLEID'),
    ]
    table_1 = syn.store(
        synapseclient.Schema(name=test_name,
                             columns=table_1_cols,
                             parent=folder_1_entity))
    table_1_file_col_1_1 = _create_temp_file()
    table_1_file_handle_1 = syn.uploadFileHandle(table_1_file_col_1_1, table_1)
    table_1_file_col_1_2 = _create_temp_file()
    table_1_file_handle_2 = syn.uploadFileHandle(table_1_file_col_1_2, table_1)
    table_1_file_col_2_1 = _create_temp_file()
    table_1_file_handle_3 = syn.uploadFileHandle(table_1_file_col_2_1, table_1)
    table_1_file_col_2_2 = _create_temp_file()
    table_1_file_handle_4 = syn.uploadFileHandle(table_1_file_col_2_2, table_1)

    data = [
        [table_1_file_handle_1['id'], 1, table_1_file_handle_2['id']],
        [table_1_file_handle_3['id'], 2, table_1_file_handle_4['id']],
    ]

    table_1_entity = syn.store(
        synapseclient.RowSet(schema=table_1,
                             rows=[synapseclient.Row(r) for r in data]))

    db_path = tempfile.NamedTemporaryFile(delete=False).name
    schedule_for_cleanup(db_path)

    index_result = synapseutils.index_files_for_migration(
        syn,
        project_entity,
        storage_location_id,
        db_path,
        file_version_strategy='new',
        include_table_files=True,
    )

    counts_by_status = index_result.get_counts_by_status()
    assert counts_by_status['INDEXED'] == 8
    assert counts_by_status['ERRORED'] == 0

    migration_result = synapseutils.migrate_indexed_files(syn,
                                                          db_path,
                                                          force=True)

    file_0_entity_updated = syn.get(utils.id_of(file_0_entity),
                                    downloadFile=False)
    file_1_entity_updated = syn.get(utils.id_of(file_1_entity),
                                    downloadFile=False)
    file_2_entity_updated = syn.get(utils.id_of(file_2_entity),
                                    downloadFile=False)
    file_3_entity_updated = syn.get(utils.id_of(file_3_entity),
                                    downloadFile=False)
    file_handles = [
        f['_file_handle'] for f in (
            file_0_entity_updated,
            file_1_entity_updated,
            file_2_entity_updated,
            file_3_entity_updated,
        )
    ]

    table_1_id = utils.id_of(table_1_entity)
    results = syn.tableQuery("select file_col_1, file_col_2 from {}".format(
        utils.id_of(table_1_entity)))
    table_file_handles = []
    for row in results:
        for file_handle_id in row[2:]:
            file_handle = syn._getFileHandleDownload(
                file_handle_id, table_1_id,
                objectType='TableEntity')['fileHandle']
            table_file_handles.append(file_handle)
    file_handles.extend(table_file_handles)

    _assert_storage_location(file_handles, storage_location_id)
    assert storage_location_id != default_storage_location_id

    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        query_result = cursor.execute(
            "select status, count(*) from migrations where type in (?, ?) group by status",
            (_MigrationType.FILE.value,
             _MigrationType.TABLE_ATTACHED_FILE.value)).fetchall()

        counts = {r[0]: r[1] for r in query_result}

        # should only be one status and they should all be migrated
        # should be 3 migrated files entities + 4 migrated table attached files
        assert len(counts) == 1
        assert counts[_MigrationStatus.MIGRATED.value] == 8

    csv_file = tempfile.NamedTemporaryFile(delete=False)
    schedule_for_cleanup(csv_file.name)
    migration_result.as_csv(csv_file.name)
    with open(csv_file.name, 'r') as csv_file_in:
        csv_contents = csv_file_in.read()

    table_1_id = table_1_entity['tableId']

    # assert the content of the csv. we don't assert any particular order of the lines
    # but the presence of the expected lines and the correct # of lines
    csv_lines = csv_contents.split('\n')
    assert "id,type,version,row_id,col_name,from_storage_location_id,from_file_handle_id,to_file_handle_id,status,exception" in csv_lines  # noqa
    assert f"{file_0_entity.id},file,,,,{default_storage_location_id},{file_0_entity.dataFileHandleId},{file_0_entity_updated.dataFileHandleId},MIGRATED," in csv_lines  # noqa
    assert f"{file_1_entity.id},file,,,,{default_storage_location_id},{file_1_entity.dataFileHandleId},{file_1_entity_updated.dataFileHandleId},MIGRATED," in csv_lines  # noqa
    assert f"{file_2_entity.id},file,,,,{default_storage_location_id},{file_2_entity.dataFileHandleId},{file_2_entity_updated.dataFileHandleId},MIGRATED," in csv_lines  # noqa
    assert f"{file_3_entity.id},file,,,,{default_storage_location_id},{file_3_entity.dataFileHandleId},{file_3_entity_updated.dataFileHandleId},MIGRATED," in csv_lines  # noqa
    assert f"{table_1_id},table,1,1,file_col_1,{default_storage_location_id},{table_1_file_handle_1['id']},{table_file_handles[0]['id']},MIGRATED," in csv_lines  # noqa
    assert f"{table_1_id},table,1,1,file_col_2,{default_storage_location_id},{table_1_file_handle_2['id']},{table_file_handles[1]['id']},MIGRATED," in csv_lines  # noqa
    assert f"{table_1_id},table,1,2,file_col_1,{default_storage_location_id},{table_1_file_handle_3['id']},{table_file_handles[2]['id']},MIGRATED," in csv_lines  # noqa
    assert f"{table_1_id},table,1,2,file_col_2,{default_storage_location_id},{table_1_file_handle_4['id']},{table_file_handles[3]['id']},MIGRATED," in csv_lines  # noqa
    assert "" in csv_lines  # expect trailing newline in a csv
Example #26
0
 def id(self, value):
     if value is None:
         raise ValueError("id must not be None")
     self._id = id_of(value)
Example #27
0
    def _sync_root(self,
                   root,
                   root_path,
                   ifcollision,
                   followLink,
                   progress,
                   downloadFile,
                   manifest="all"):
        # stack elements are a 3-tuple of:
        # 1. the folder entity/dict
        # 2. the local path to the folder to download to
        # 3. the FolderSync of the parent to the folder (None at the root)

        create_root_manifest = True if manifest != "suppress" else False
        folder_stack = [(root, root_path, None, create_root_manifest)]
        create_child_manifest = True if manifest == "all" else False

        root_folder_sync = None
        while folder_stack:
            if root_folder_sync:
                # if at any point the sync encounters an exception it will
                # be communicated up to the root at which point we should abort
                exception = root_folder_sync.get_exception()
                if exception:
                    raise ValueError(
                        "File download failed during sync") from exception

            folder, parent_path, parent_folder_sync, create_manifest = folder_stack.pop(
            )

            entity_id = id_of(folder)
            folder_path = None
            if parent_path is not None:
                folder_path = parent_path
                if root_folder_sync:
                    # syncFromSynapse behavior is that we do NOT create a folder for the root folder of the sync.
                    # we treat the download local path folder as the root and write the children of the sync
                    # directly into that local folder
                    folder_path = os.path.join(folder_path, folder['name'])
                os.makedirs(folder_path, exist_ok=True)

            child_ids = []
            child_file_ids = []
            child_folders = []
            for child in self._syn.getChildren(entity_id):
                child_id = id_of(child)
                child_ids.append(child_id)
                if is_container(child):
                    child_folders.append(child)
                else:
                    child_file_ids.append(child_id)

            folder_sync = _FolderSync(
                self._syn,
                entity_id,
                folder_path,
                child_ids,
                parent_folder_sync,
                create_manifest=create_manifest,
            )
            if not root_folder_sync:
                root_folder_sync = folder_sync

            if not child_ids:
                # this folder has no children, so it is immediately finished
                folder_sync.update()

            else:
                for child_file_id in child_file_ids:
                    self._file_semaphore.acquire()
                    self._executor.submit(
                        self._sync_file,
                        child_file_id,
                        folder_sync,
                        folder_path,
                        ifcollision,
                        followLink,
                        progress,
                        downloadFile,
                    )

                for child_folder in child_folders:
                    folder_stack.append((child_folder, folder_path,
                                         folder_sync, create_child_manifest))

        return root_folder_sync
 def getProvenance_side_effect(entity, *args, **kwargs):
     return provenance[id_of(entity)]
def test_syncFromSynase__manifest(syn):
    """Verify that we generate manifest files when syncing to a location outside of the cache."""

    project = Project(name="the project", parent="whatever", id="syn123")
    path1 = '/tmp/foo'
    file1 = File(name="file1", parent=project, id="syn456", path=path1)
    path2 = '/tmp/afolder/bar'
    file2 = File(name="file2",
                 parent=project,
                 id="syn789",
                 parentId='syn098',
                 path=path2)
    folder = Folder(name="afolder", parent=project, id="syn098")
    entities = {
        file1.id: file1,
        file2.id: file2,
        folder.id: folder,
    }

    def syn_get_side_effect(entity, *args, **kwargs):
        return entities[id_of(entity)]

    file_1_provenance = Activity(data={
        'used': '',
        'executed': '',
    })
    file_2_provenance = Activity(data={
        'used': '',
        'executed': '',
        'name': 'foo',
        'description': 'bar',
    })

    provenance = {
        file1.id: file_1_provenance,
        file2.id: file_2_provenance,
    }

    def getProvenance_side_effect(entity, *args, **kwargs):
        return provenance[id_of(entity)]

    expected_project_manifest = \
        f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription
{path1}\tsyn123\tfile1\tTrue\t\t\t\t\t
{path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar
"""

    expected_folder_manifest = \
        f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription
{path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar
"""

    expected_synced_files = [file2, file1]

    with tempfile.TemporaryDirectory() as sync_dir:

        with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\
                patch.object(syn, "get", side_effect=syn_get_side_effect),\
                patch.object(syn, "getProvenance") as patch_syn_get_provenance:

            patch_syn_get_provenance.side_effect = getProvenance_side_effect

            synced_files = synapseutils.syncFromSynapse(syn,
                                                        project,
                                                        path=sync_dir)
            assert sorted([id_of(e) for e in expected_synced_files
                           ]) == sorted([id_of(e) for e in synced_files])

            # we only expect two calls to provenance even though there are three rows of provenance data
            # in the manifests (two in the outer project, one in the folder)
            # since one of the files is repeated in both manifests we expect only the single get provenance call
            assert len(
                expected_synced_files) == patch_syn_get_provenance.call_count

            # we should have two manifest files, one rooted at the project and one rooted in the sub folder

            _compareCsv(
                expected_project_manifest,
                os.path.join(sync_dir, synapseutils.sync.MANIFEST_FILENAME))
            _compareCsv(
                expected_folder_manifest,
                os.path.join(sync_dir, folder.name,
                             synapseutils.sync.MANIFEST_FILENAME))
Example #30
0
def upload_file_handle(
        syn,
        parent_entity,
        path,
        synapseStore=True,
        md5=None,
        file_size=None,
        mimetype=None,
        max_threads=None,
):
    """Uploads the file in the provided path (if necessary) to a storage location based on project settings.
    Returns a new FileHandle as a dict to represent the stored file.

    :param parent_entity:   Entity object or id of the parent entity.
    :param path:            file path to the file being uploaded
    :param synapseStore:    If False, will not upload the file, but instead create an ExternalFileHandle that references
                            the file on the local machine.
                            If True, will upload the file based on StorageLocation determined by the entity_parent_id
    :param md5:             The MD5 checksum for the file, if known. Otherwise if the file is a local file, it will be
                            calculated automatically.
    :param file_size:       The size the file, if known. Otherwise if the file is a local file, it will be calculated
                            automatically.
    :param file_size:       The MIME type the file, if known. Otherwise if the file is a local file, it will be
                            calculated automatically.

    :returns: a dict of a new FileHandle as a dict that represents the uploaded file
    """
    if path is None:
        raise ValueError('path can not be None')

    # if doing a external file handle with no actual upload
    if not synapseStore:
        return create_external_file_handle(syn, path, mimetype=mimetype, md5=md5, file_size=file_size)

    # expand the path because past this point an upload is required and some upload functions require an absolute path
    expanded_upload_path = os.path.expandvars(os.path.expanduser(path))

    entity_parent_id = id_of(parent_entity)

    # determine the upload function based on the UploadDestination
    location = syn._getDefaultUploadDestination(entity_parent_id)
    upload_destination_type = location['concreteType']

    if sts_transfer.is_boto_sts_transfer_enabled(syn) and \
       sts_transfer.is_storage_location_sts_enabled(syn, entity_parent_id, location) and \
       upload_destination_type == concrete_types.EXTERNAL_S3_UPLOAD_DESTINATION:
        log_upload_message(
            syn,
            '\n' + '#' * 50 + '\n Uploading file to external S3 storage using boto3 \n' + '#' * 50 + '\n'
        )

        return upload_synapse_sts_boto_s3(
            syn,
            entity_parent_id,
            location,
            expanded_upload_path,
            mimetype=mimetype,
        )

    elif upload_destination_type in (
        concrete_types.SYNAPSE_S3_UPLOAD_DESTINATION,
        concrete_types.EXTERNAL_S3_UPLOAD_DESTINATION,
    ):
        storageString = 'Synapse' \
            if upload_destination_type == concrete_types.SYNAPSE_S3_UPLOAD_DESTINATION \
            else 'your external S3'
        log_upload_message(
            syn,
            '\n' + '#' * 50 + '\n Uploading file to ' + storageString + ' storage \n' + '#' * 50 + '\n'
        )

        return upload_synapse_s3(
            syn,
            expanded_upload_path,
            location['storageLocationId'],
            mimetype=mimetype,
            max_threads=max_threads
        )
    # external file handle (sftp)
    elif upload_destination_type == concrete_types.EXTERNAL_UPLOAD_DESTINATION:
        if location['uploadType'] == 'SFTP':
            log_upload_message(
                syn,
                '\n%s\n%s\nUploading to: %s\n%s\n' % ('#' * 50, location.get('banner', ''),
                                                      urllib_parse.urlparse(location['url']).netloc,
                                                      '#' * 50)
            )
            return upload_external_file_handle_sftp(syn, expanded_upload_path, location['url'], mimetype=mimetype)
        else:
            raise NotImplementedError('Can only handle SFTP upload locations.')
    # client authenticated S3
    elif upload_destination_type == concrete_types.EXTERNAL_OBJECT_STORE_UPLOAD_DESTINATION:
        log_upload_message(
            syn,
            '\n%s\n%s\nUploading to endpoint: [%s] bucket: [%s]\n%s\n' % (
                '#' * 50, location.get('banner', ''),
                location.get('endpointUrl'),
                location.get('bucket'),
                '#' * 50,
            )
        )
        return upload_client_auth_s3(syn, expanded_upload_path, location['bucket'], location['endpointUrl'],
                                     location['keyPrefixUUID'], location['storageLocationId'], mimetype=mimetype)
    else:  # unknown storage location
        log_upload_message(
            syn,
            '\n%s\n%s\nUNKNOWN STORAGE LOCATION. Defaulting upload to Synapse.\n%s\n' % (
                '!' * 50, location.get('banner', ''), '!' * 50
            )
        )
        return upload_synapse_s3(syn, expanded_upload_path, None, mimetype=mimetype, max_threads=max_threads)