def remove_team_member(syn, team, user): """Removes team member Args: syn: Synapse object team: synaspeclient.Team or its id user: synapseclient.UserProfile or its id """ teamid = id_of(team) userid = id_of(user) syn.restDELETE(f"/team/{teamid}/member/{userid}")
def test_entity_view_add_annotation_columns(syn, project, schedule_for_cleanup): folder1 = syn.store( Folder(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj1', parent=project, annotations={ 'strAnno': 'str1', 'intAnno': 1, 'floatAnno': 1.1 })) folder2 = syn.store( Folder(name=str(uuid.uuid4()) + 'test_entity_view_add_annotation_columns_proj2', parent=project, annotations={ 'dateAnno': datetime.now(), 'strAnno': 'str2', 'intAnno': 2 })) schedule_for_cleanup(folder1) schedule_for_cleanup(folder2) scopeIds = [utils.id_of(folder1), utils.id_of(folder2)] # This test is to ensure that user code which use the deprecated field `type` continue to work # TODO: remove this test case in Synapse Python client 2.0 entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=False, addAnnotationColumns=True, type='project', parent=project) syn.store(entity_view) # This test is to ensure that user code which use the deprecated field `type` continue to work # TODO: remove this test case in Synapse Python client 2.0 entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=False, addAnnotationColumns=True, type='file', includeEntityTypes=[EntityViewType.PROJECT], parent=project) syn.store(entity_view) entity_view = EntityViewSchema(name=str(uuid.uuid4()), scopeIds=scopeIds, addDefaultViewColumns=False, addAnnotationColumns=True, includeEntityTypes=[EntityViewType.PROJECT], parent=project) syn.store(entity_view)
def get_forum_threads(syn, ent, query_filter='EXCLUDE_DELETED', limit=20, offset=0): """ Gets threads from a forum Args: syn: synapse object ent: Synapse Project entity or id query_filter: filter forum threads returned. Can be NO_FILTER, DELETED_ONLY, EXCLUDE_DELETED. Defaults to EXCLUDE_DELETED. Yields: list: Forum threads """ api = DiscussionApi(syn) synid = id_of(ent) forum_obj = api.get_project_forum(synid) response = api.get_forum_threads(forum_obj.id, query_filter=query_filter, limit=limit, offset=offset) return response
def test_id_of(): assert utils.id_of(1) == '1' assert utils.id_of('syn12345') == 'syn12345' assert utils.id_of({'foo': 1, 'id': 123}) == '123' pytest.raises(ValueError, utils.id_of, {'foo': 1, 'idzz': 123}) assert utils.id_of({'properties': {'id': 123}}) == '123' pytest.raises(ValueError, utils.id_of, {'properties': {'qq': 123}}) pytest.raises(ValueError, utils.id_of, object()) class Foo: def __init__(self, id_attr_name, id): self.properties = {id_attr_name: id} id_attr_names = ['id', 'ownerId', 'tableId'] for attr_name in id_attr_names: foo = Foo(attr_name, 123) assert utils.id_of(foo) == '123'
def create_challenge(syn, entity, team): """Creates Challenge associated with a Project See the definition of a Challenge object here: https://docs.synapse.org/rest/org/sagebionetworks/repo/model/Challenge.html Args: syn: Synapse connection entity: An Entity or Synapse ID of a Project. team: A Team or Team ID. Returns: Challenge object """ synid = id_of(entity) teamid = id_of(team) challenge_object = {'participantTeamId': teamid, 'projectId': synid} challenge = syn.restPOST('/challenge', json.dumps(challenge_object)) challenge_obj = Challenge(**challenge) return challenge_obj
def test_store_table_datetime(syn, project): current_datetime = datetime.fromtimestamp(round(time.time(), 3)) schema = syn.store( Schema("testTable", [Column(name="testerino", columnType='DATE')], project)) rowset = RowSet(rows=[Row([current_datetime])], schema=schema) syn.store(Table(schema, rowset)) query_result = syn.tableQuery("select * from %s" % utils.id_of(schema), resultsAs="rowset") assert current_datetime == query_result.rowset['rows'][0]['values'][0]
def create_challenge(syn: Synapse, project: Union[Project, str], team: Union[Team, str]) -> Challenge: """Creates Challenge associated with a Project Args: syn: Synapse connection project: A synapseclient.Project or its id team: A synapseclient.Team or its id Returns: Challenge object """ synid = id_of(project) teamid = id_of(team) challenge_api = ChallengeApi(syn=syn) challenge_obj = challenge_api.create_challenge(projectid=synid, teamid=teamid) return challenge_obj
def _test_method(self, syn, schema, resultsAs, partial_changes, expected_results): query_results = self._query_with_retry( syn, "SELECT * FROM %s" % utils.id_of(schema), resultsAs, 2, None, QUERY_TIMEOUT_SEC) assert query_results is not None df = query_results.asDataFrame(rowIdAndVersionInIndex=False) partial_changes = { df['ROW_ID'][i]: row_changes for i, row_changes in enumerate(partial_changes) } partial_rowset = PartialRowset.from_mapping(partial_changes, query_results) syn.store(partial_rowset) assert self._query_with_retry(syn, "SELECT * FROM %s" % utils.id_of(schema), resultsAs, None, expected_results, QUERY_TIMEOUT_SEC) is not None
def __init__(self, targetId=None, targetVersion=None, parent=None, properties=None, annotations=None, local_state=None, **kwargs): if targetId is not None and targetVersion is not None: kwargs['linksTo'] = dict(targetId=utils.id_of(targetId), targetVersionNumber=targetVersion) elif targetId is not None and targetVersion is None: kwargs['linksTo'] = dict(targetId=utils.id_of(targetId)) elif properties is not None and 'linksTo' in properties: pass else: raise SynapseMalformedEntityError("Must provide a target id") super(Link, self).__init__(concreteType=Link._synapse_entity_type, properties=properties, annotations=annotations, local_state=local_state, parent=parent, **kwargs)
def get_user_entity_permissions(syn, entity): """Gets the list of permission that the caller has on a given Entity. https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/auth/UserEntityPermissions.html Args: syn: Synapse connection entity: Synapse id or Entity Returns: UserEntityPermissions """ synid = id_of(entity) permissions = syn.restGET("/entity/{}/permissions".format(synid)) return permissions
def get_challenge(syn, entity): """Get the Challenge associated with a Project. See the definition of a Challenge object here: https://docs.synapse.org/rest/org/sagebionetworks/repo/model/Challenge.html Args: entity: An Entity or Synapse ID of a Project. Returns: Challenge object """ synid = id_of(entity) challenge = syn.restGET("/entity/%s/challenge" % synid) challenge_obj = Challenge(**challenge) return challenge_obj
def get_challenge(syn: Synapse, project: Union[Project, str]) -> Challenge: """Get the Challenge associated with a Project. See the definition of a Challenge object here: https://docs.synapse.org/rest/org/sagebionetworks/repo/model/Challenge.html Args: syn: Synapse connection project: A synapseclient.Project or its id Returns: Challenge object """ synid = id_of(project) challenge_api = ChallengeApi(syn=syn) challenge_obj = challenge_api.get_challenge(projectid=synid) return challenge_obj
def copy_reply(syn, reply, thread): """Copies a discussion thread reply to a thread Args: syn: synapse object reply: Synapse Reply thread: Synapse thread or threadid to copy reply to Returns: dict: Reply bundle """ threadid = id_of(thread) author = reply.createdby username = syn.getUserProfile(author)['userName'] on_behalf_of = "On behalf of @{user}\n\n".format(user=username) text = get_thread_reply_text(syn, reply) new_reply_text = on_behalf_of + text return create_thread_reply(syn, threadid, new_reply_text)
def create_thread(syn, ent, title, message): ''' Create a thread Args: syn: synapse object ent: Synapse Project entity or id title: title of thread message: message in thread Returns: dict: Thread bundle ''' api = DiscussionApi(syn) synid = id_of(ent) forum_obj = api.get_project_forum(synid) thread_obj = api.post_thread(forum_obj.id, title, message) return thread_obj
def get_forum_participants(syn, ent): ''' Get all forum participants Args: ent: Synapse Project entity or id synid: Synapse Project id Return: list: user profiles active in forum ''' synid = id_of(ent) threads = get_forum_threads(syn, synid) users = set() for thread in threads: unique_users = set(thread['activeAuthors']) users.update(unique_users) userprofiles = [syn.getUserProfile(user) for user in users] return userprofiles
def copy_thread(syn, thread, project): """Copies a discussion thread to a project Args: syn: synapse object thread: Synapse Thread project: Synapse Project or its id to copy thread to Returns: dict: Thread bundle """ projectid = id_of(project) title = thread['title'] author = thread['createdBy'] username = syn.getUserProfile(author)['userName'] on_behalf_of = "On behalf of @{user}\n\n".format(user=username) text = get_thread_text(syn, thread['messageKey']) new_thread_text = on_behalf_of + text return create_thread(syn, projectid, title, new_thread_text)
def get_thread_replies(syn: Synapse, thread: Thread, **kwargs) -> Iterator[Reply]: """Gets replies of a thread Args: syn: synapse object thread: Synapse thread or id **kwargs: query_filter: filter forum threads returned. Can be, NO_FILTER, DELETED_ONLY, EXCLUDE_DELETED. Defaults to EXCLUDE_DELETED. limit - Number of query results offset - Page of query result Yields: synapseservices.Reply """ api = DiscussionApi(syn) threadid = id_of(thread) replies = api.get_thread_replies(threadid, **kwargs) return replies
def _copy_thread(syn, thread: Thread, project: Union[Project, str]) -> Thread: """Copies a discussion thread to a project Args: syn: synapse object thread: Synapse Thread project: Synapse Project or its id to copy thread to Returns: synapseservices.Thread """ projectid = id_of(project) title = thread.title author = thread.createdby username = syn.getUserProfile(author)['userName'] on_behalf_of = f"On behalf of @{username}\n\n" text = get_thread_text(syn, thread) new_thread_text = on_behalf_of + text new_thread_obj = create_thread(syn, projectid, title, new_thread_text) return new_thread_obj
def __init__(self, **kwargs): # Verify that the parameters are correct if 'owner' not in kwargs: raise ValueError('Wiki constructor must have an owner specified') # Initialize the file handle list to be an empty list if 'attachmentFileHandleIds' not in kwargs: kwargs['attachmentFileHandleIds'] = [] # update the markdown self.update_markdown(kwargs.pop('markdown', None), kwargs.pop('markdownFile', None)) # Move the 'fileHandles' into the proper (wordier) bucket if 'fileHandles' in kwargs: for handle in kwargs['fileHandles']: kwargs['attachmentFileHandleIds'].append(handle) del kwargs['fileHandles'] super(Wiki, self).__init__(kwargs) self.ownerId = id_of(self.owner) del self['owner']
def get_forum_threads(syn: Synapse, ent: Union[Project, str], **kwargs) -> Iterator[Thread]: """ Gets threads from a forum Args: syn: synapse object ent: Synapse Project entity or id **kwargs: query_filter - filter forum threads returned. Can be, NO_FILTER, DELETED_ONLY, EXCLUDE_DELETED. Defaults to EXCLUDE_DELETED. limit - Number of query results offset - Page of query result Yields: synapseservices.Thread """ api = DiscussionApi(syn) synid = id_of(ent) forum_obj = api.get_project_forum(synid) threads = api.get_forum_threads(forum_obj.id, **kwargs) return threads
def get_thread_replies(syn, thread, query_filter='EXCLUDE_DELETED', limit=20, offset=0): """Gets replies of a thread Args: syn: synapse object thread: Synapse thread or id query_filter: filter forum threads returned. Can be NO_FILTER, DELETED_ONLY, EXCLUDE_DELETED. Defaults to EXCLUDE_DELETED. Yields: list: Thread replies """ api = DiscussionApi(syn) threadid = id_of(thread) response = api.get_thread_replies(threadid, query_filter=query_filter, limit=limit, offset=offset) return response
def syn_get_side_effect(entity, *args, **kwargs): return entities[id_of(entity)]
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles=None, followLink=False): """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file metadata. :param syn: A synapse object as obtained with syn = synapseclient.login() :param entity: A Synapse ID, a Synapse Entity object of type file, folder or project. :param path: An optional path where the file hierarchy will be reproduced. If not specified the files will by default be placed in the synapseCache. :param ifcollision: Determines how to handle file collisions. Maybe "overwrite.local", "keep.local", or "keep.both". Defaults to "overwrite.local". :param followLink: Determines whether the link returns the target Entity. Defaults to False :returns: list of entities (files, tables, links) This function will crawl all subfolders of the project/folder specified by `entity` and download all files that have not already been downloaded. If there are newer files in Synapse (or a local file has been edited outside of the cache) since the last download then local the file will be replaced by the new file unless "ifcollision" is changed. If the files are being downloaded to a specific location outside of the Synapse cache a file (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains the metadata (annotations, storage location and provenance of all downloaded files). See also: - :py:func:`synapseutils.sync.syncToSynapse` Example: Download and print the paths of all downloaded files:: entities = syncFromSynapse(syn, "syn1234") for f in entities: print(f.path) """ # initialize the result list if allFiles is None: allFiles = list() # perform validation check on user input if is_synapse_id(entity): entity = syn.get(entity, downloadLocation=path, ifcollision=ifcollision, followLink=followLink) if isinstance(entity, File): allFiles.append(entity) return allFiles entity_id = id_of(entity) if not is_container(entity): raise ValueError( "The provided id: %s is neither a container nor a File" % entity_id) # get the immediate children as iterator children = syn.getChildren(entity_id) # process each child for child in children: if is_container(child): # If we are downloading outside cache create directory if path is not None: new_path = os.path.join(path, child['name']) try: os.makedirs(new_path) except OSError as err: if err.errno != errno.EEXIST: raise else: new_path = None # recursively explore this container's children syncFromSynapse(syn, child['id'], new_path, ifcollision, allFiles, followLink=followLink) else: # getting the child ent = syn.get(child['id'], downloadLocation=path, ifcollision=ifcollision, followLink=followLink) if isinstance(ent, File): allFiles.append(ent) if path is not None: # If path is None files are stored in cache. filename = os.path.join(path, MANIFEST_FILENAME) filename = os.path.expanduser(os.path.normcase(filename)) generateManifest(syn, allFiles, filename) return allFiles
def __init__(self, properties=None, annotations=None, local_state=None, parent=None, **kwargs): if properties: if isinstance(properties, collections.abc.Mapping): if 'annotations' in properties and isinstance( properties['annotations'], collections.abc.Mapping): annotations.update(properties['annotations']) del properties['annotations'] self.__dict__['properties'].update(properties) else: raise SynapseMalformedEntityError( 'Unknown argument type: properties is a %s' % str(type(properties))) if annotations: if isinstance(annotations, collections.abc.Mapping): self.__dict__['annotations'].update(annotations) elif isinstance(annotations, str): self.properties['annotations'] = annotations else: raise SynapseMalformedEntityError( 'Unknown argument type: annotations is a %s' % str(type(annotations))) if local_state: if isinstance(local_state, collections.abc.Mapping): self.local_state(local_state) else: raise SynapseMalformedEntityError( 'Unknown argument type: local_state is a %s' % str(type(local_state))) for key in self.__class__._local_keys: if key not in self.__dict__: self.__dict__[key] = None # Extract parentId from parent if 'parentId' not in kwargs: if parent: try: kwargs['parentId'] = id_of(parent) except Exception: if isinstance(parent, Entity) and 'id' not in parent: raise SynapseMalformedEntityError( "Couldn't find 'id' of parent." " Has it been stored in Synapse?") else: raise SynapseMalformedEntityError( "Couldn't find 'id' of parent.") # Note: that this will work properly if derived classes declare their internal state variable *before* invoking # super(...).__init__(...) for key, value in kwargs.items(): self.__setitem__(key, value) if 'concreteType' not in self: self['concreteType'] = self.__class__._synapse_entity_type # Only project can be top-level. All other entity types require parentId don't enforce this for generic Entity if 'parentId' not in self \ and not isinstance(self, Project) \ and not type(self) == Entity: raise SynapseMalformedEntityError( "Entities of type %s must have a parentId." % type(self))
def test_migrate_project(request, syn, schedule_for_cleanup, storage_location_id): test_name = request.node.name project_name = "{}-{}".format(test_name, uuid.uuid4()) project = synapseclient.Project(name=project_name) project_entity = syn.store(project) file_0_path = _create_temp_file() schedule_for_cleanup(file_0_path) file_0_name = "{}-{}".format(test_name, 1) file_0 = synapseclient.File(name=file_0_name, path=file_0_path, parent=project_entity) file_0_entity = syn.store(file_0) default_storage_location_id = file_0_entity._file_handle[ 'storageLocationId'] folder_1_name = "{}-{}-{}".format(test_name, 1, uuid.uuid4()) folder_1 = synapseclient.Folder(parent=project_entity, name=folder_1_name) folder_1_entity = syn.store(folder_1) file_1_path = _create_temp_file() schedule_for_cleanup(file_1_path) file_1_name = "{}-{}".format(test_name, 1) file_1 = synapseclient.File(name=file_1_name, path=file_1_path, parent=folder_1_entity) file_1_entity = syn.store(file_1) file_2_path = _create_temp_file() schedule_for_cleanup(file_2_path) file_2_name = "{}-{}".format(test_name, 2) file_2 = synapseclient.File(name=file_2_name, path=file_2_path, parent=folder_1_entity) file_2_entity = syn.store(file_2) # file 3 shares the same file handle id as file 1 file_3_path = file_1_path file_3_name = "{}-{}".format(test_name, 3) file_3 = synapseclient.File(name=file_3_name, path=file_3_path, parent=folder_1_entity) file_3.dataFileHandleId = file_1_entity.dataFileHandleId file_3_entity = syn.store(file_3) table_1_cols = [ synapseclient.Column(name='file_col_1', columnType='FILEHANDLEID'), synapseclient.Column(name='num', columnType='INTEGER'), synapseclient.Column(name='file_col_2', columnType='FILEHANDLEID'), ] table_1 = syn.store( synapseclient.Schema(name=test_name, columns=table_1_cols, parent=folder_1_entity)) table_1_file_col_1_1 = _create_temp_file() table_1_file_handle_1 = syn.uploadFileHandle(table_1_file_col_1_1, table_1) table_1_file_col_1_2 = _create_temp_file() table_1_file_handle_2 = syn.uploadFileHandle(table_1_file_col_1_2, table_1) table_1_file_col_2_1 = _create_temp_file() table_1_file_handle_3 = syn.uploadFileHandle(table_1_file_col_2_1, table_1) table_1_file_col_2_2 = _create_temp_file() table_1_file_handle_4 = syn.uploadFileHandle(table_1_file_col_2_2, table_1) data = [ [table_1_file_handle_1['id'], 1, table_1_file_handle_2['id']], [table_1_file_handle_3['id'], 2, table_1_file_handle_4['id']], ] table_1_entity = syn.store( synapseclient.RowSet(schema=table_1, rows=[synapseclient.Row(r) for r in data])) db_path = tempfile.NamedTemporaryFile(delete=False).name schedule_for_cleanup(db_path) index_result = synapseutils.index_files_for_migration( syn, project_entity, storage_location_id, db_path, file_version_strategy='new', include_table_files=True, ) counts_by_status = index_result.get_counts_by_status() assert counts_by_status['INDEXED'] == 8 assert counts_by_status['ERRORED'] == 0 migration_result = synapseutils.migrate_indexed_files(syn, db_path, force=True) file_0_entity_updated = syn.get(utils.id_of(file_0_entity), downloadFile=False) file_1_entity_updated = syn.get(utils.id_of(file_1_entity), downloadFile=False) file_2_entity_updated = syn.get(utils.id_of(file_2_entity), downloadFile=False) file_3_entity_updated = syn.get(utils.id_of(file_3_entity), downloadFile=False) file_handles = [ f['_file_handle'] for f in ( file_0_entity_updated, file_1_entity_updated, file_2_entity_updated, file_3_entity_updated, ) ] table_1_id = utils.id_of(table_1_entity) results = syn.tableQuery("select file_col_1, file_col_2 from {}".format( utils.id_of(table_1_entity))) table_file_handles = [] for row in results: for file_handle_id in row[2:]: file_handle = syn._getFileHandleDownload( file_handle_id, table_1_id, objectType='TableEntity')['fileHandle'] table_file_handles.append(file_handle) file_handles.extend(table_file_handles) _assert_storage_location(file_handles, storage_location_id) assert storage_location_id != default_storage_location_id with sqlite3.connect(db_path) as conn: cursor = conn.cursor() query_result = cursor.execute( "select status, count(*) from migrations where type in (?, ?) group by status", (_MigrationType.FILE.value, _MigrationType.TABLE_ATTACHED_FILE.value)).fetchall() counts = {r[0]: r[1] for r in query_result} # should only be one status and they should all be migrated # should be 3 migrated files entities + 4 migrated table attached files assert len(counts) == 1 assert counts[_MigrationStatus.MIGRATED.value] == 8 csv_file = tempfile.NamedTemporaryFile(delete=False) schedule_for_cleanup(csv_file.name) migration_result.as_csv(csv_file.name) with open(csv_file.name, 'r') as csv_file_in: csv_contents = csv_file_in.read() table_1_id = table_1_entity['tableId'] # assert the content of the csv. we don't assert any particular order of the lines # but the presence of the expected lines and the correct # of lines csv_lines = csv_contents.split('\n') assert "id,type,version,row_id,col_name,from_storage_location_id,from_file_handle_id,to_file_handle_id,status,exception" in csv_lines # noqa assert f"{file_0_entity.id},file,,,,{default_storage_location_id},{file_0_entity.dataFileHandleId},{file_0_entity_updated.dataFileHandleId},MIGRATED," in csv_lines # noqa assert f"{file_1_entity.id},file,,,,{default_storage_location_id},{file_1_entity.dataFileHandleId},{file_1_entity_updated.dataFileHandleId},MIGRATED," in csv_lines # noqa assert f"{file_2_entity.id},file,,,,{default_storage_location_id},{file_2_entity.dataFileHandleId},{file_2_entity_updated.dataFileHandleId},MIGRATED," in csv_lines # noqa assert f"{file_3_entity.id},file,,,,{default_storage_location_id},{file_3_entity.dataFileHandleId},{file_3_entity_updated.dataFileHandleId},MIGRATED," in csv_lines # noqa assert f"{table_1_id},table,1,1,file_col_1,{default_storage_location_id},{table_1_file_handle_1['id']},{table_file_handles[0]['id']},MIGRATED," in csv_lines # noqa assert f"{table_1_id},table,1,1,file_col_2,{default_storage_location_id},{table_1_file_handle_2['id']},{table_file_handles[1]['id']},MIGRATED," in csv_lines # noqa assert f"{table_1_id},table,1,2,file_col_1,{default_storage_location_id},{table_1_file_handle_3['id']},{table_file_handles[2]['id']},MIGRATED," in csv_lines # noqa assert f"{table_1_id},table,1,2,file_col_2,{default_storage_location_id},{table_1_file_handle_4['id']},{table_file_handles[3]['id']},MIGRATED," in csv_lines # noqa assert "" in csv_lines # expect trailing newline in a csv
def id(self, value): if value is None: raise ValueError("id must not be None") self._id = id_of(value)
def _sync_root(self, root, root_path, ifcollision, followLink, progress, downloadFile, manifest="all"): # stack elements are a 3-tuple of: # 1. the folder entity/dict # 2. the local path to the folder to download to # 3. the FolderSync of the parent to the folder (None at the root) create_root_manifest = True if manifest != "suppress" else False folder_stack = [(root, root_path, None, create_root_manifest)] create_child_manifest = True if manifest == "all" else False root_folder_sync = None while folder_stack: if root_folder_sync: # if at any point the sync encounters an exception it will # be communicated up to the root at which point we should abort exception = root_folder_sync.get_exception() if exception: raise ValueError( "File download failed during sync") from exception folder, parent_path, parent_folder_sync, create_manifest = folder_stack.pop( ) entity_id = id_of(folder) folder_path = None if parent_path is not None: folder_path = parent_path if root_folder_sync: # syncFromSynapse behavior is that we do NOT create a folder for the root folder of the sync. # we treat the download local path folder as the root and write the children of the sync # directly into that local folder folder_path = os.path.join(folder_path, folder['name']) os.makedirs(folder_path, exist_ok=True) child_ids = [] child_file_ids = [] child_folders = [] for child in self._syn.getChildren(entity_id): child_id = id_of(child) child_ids.append(child_id) if is_container(child): child_folders.append(child) else: child_file_ids.append(child_id) folder_sync = _FolderSync( self._syn, entity_id, folder_path, child_ids, parent_folder_sync, create_manifest=create_manifest, ) if not root_folder_sync: root_folder_sync = folder_sync if not child_ids: # this folder has no children, so it is immediately finished folder_sync.update() else: for child_file_id in child_file_ids: self._file_semaphore.acquire() self._executor.submit( self._sync_file, child_file_id, folder_sync, folder_path, ifcollision, followLink, progress, downloadFile, ) for child_folder in child_folders: folder_stack.append((child_folder, folder_path, folder_sync, create_child_manifest)) return root_folder_sync
def getProvenance_side_effect(entity, *args, **kwargs): return provenance[id_of(entity)]
def test_syncFromSynase__manifest(syn): """Verify that we generate manifest files when syncing to a location outside of the cache.""" project = Project(name="the project", parent="whatever", id="syn123") path1 = '/tmp/foo' file1 = File(name="file1", parent=project, id="syn456", path=path1) path2 = '/tmp/afolder/bar' file2 = File(name="file2", parent=project, id="syn789", parentId='syn098', path=path2) folder = Folder(name="afolder", parent=project, id="syn098") entities = { file1.id: file1, file2.id: file2, folder.id: folder, } def syn_get_side_effect(entity, *args, **kwargs): return entities[id_of(entity)] file_1_provenance = Activity(data={ 'used': '', 'executed': '', }) file_2_provenance = Activity(data={ 'used': '', 'executed': '', 'name': 'foo', 'description': 'bar', }) provenance = { file1.id: file_1_provenance, file2.id: file_2_provenance, } def getProvenance_side_effect(entity, *args, **kwargs): return provenance[id_of(entity)] expected_project_manifest = \ f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription {path1}\tsyn123\tfile1\tTrue\t\t\t\t\t {path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar """ expected_folder_manifest = \ f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription {path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar """ expected_synced_files = [file2, file1] with tempfile.TemporaryDirectory() as sync_dir: with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\ patch.object(syn, "get", side_effect=syn_get_side_effect),\ patch.object(syn, "getProvenance") as patch_syn_get_provenance: patch_syn_get_provenance.side_effect = getProvenance_side_effect synced_files = synapseutils.syncFromSynapse(syn, project, path=sync_dir) assert sorted([id_of(e) for e in expected_synced_files ]) == sorted([id_of(e) for e in synced_files]) # we only expect two calls to provenance even though there are three rows of provenance data # in the manifests (two in the outer project, one in the folder) # since one of the files is repeated in both manifests we expect only the single get provenance call assert len( expected_synced_files) == patch_syn_get_provenance.call_count # we should have two manifest files, one rooted at the project and one rooted in the sub folder _compareCsv( expected_project_manifest, os.path.join(sync_dir, synapseutils.sync.MANIFEST_FILENAME)) _compareCsv( expected_folder_manifest, os.path.join(sync_dir, folder.name, synapseutils.sync.MANIFEST_FILENAME))
def upload_file_handle( syn, parent_entity, path, synapseStore=True, md5=None, file_size=None, mimetype=None, max_threads=None, ): """Uploads the file in the provided path (if necessary) to a storage location based on project settings. Returns a new FileHandle as a dict to represent the stored file. :param parent_entity: Entity object or id of the parent entity. :param path: file path to the file being uploaded :param synapseStore: If False, will not upload the file, but instead create an ExternalFileHandle that references the file on the local machine. If True, will upload the file based on StorageLocation determined by the entity_parent_id :param md5: The MD5 checksum for the file, if known. Otherwise if the file is a local file, it will be calculated automatically. :param file_size: The size the file, if known. Otherwise if the file is a local file, it will be calculated automatically. :param file_size: The MIME type the file, if known. Otherwise if the file is a local file, it will be calculated automatically. :returns: a dict of a new FileHandle as a dict that represents the uploaded file """ if path is None: raise ValueError('path can not be None') # if doing a external file handle with no actual upload if not synapseStore: return create_external_file_handle(syn, path, mimetype=mimetype, md5=md5, file_size=file_size) # expand the path because past this point an upload is required and some upload functions require an absolute path expanded_upload_path = os.path.expandvars(os.path.expanduser(path)) entity_parent_id = id_of(parent_entity) # determine the upload function based on the UploadDestination location = syn._getDefaultUploadDestination(entity_parent_id) upload_destination_type = location['concreteType'] if sts_transfer.is_boto_sts_transfer_enabled(syn) and \ sts_transfer.is_storage_location_sts_enabled(syn, entity_parent_id, location) and \ upload_destination_type == concrete_types.EXTERNAL_S3_UPLOAD_DESTINATION: log_upload_message( syn, '\n' + '#' * 50 + '\n Uploading file to external S3 storage using boto3 \n' + '#' * 50 + '\n' ) return upload_synapse_sts_boto_s3( syn, entity_parent_id, location, expanded_upload_path, mimetype=mimetype, ) elif upload_destination_type in ( concrete_types.SYNAPSE_S3_UPLOAD_DESTINATION, concrete_types.EXTERNAL_S3_UPLOAD_DESTINATION, ): storageString = 'Synapse' \ if upload_destination_type == concrete_types.SYNAPSE_S3_UPLOAD_DESTINATION \ else 'your external S3' log_upload_message( syn, '\n' + '#' * 50 + '\n Uploading file to ' + storageString + ' storage \n' + '#' * 50 + '\n' ) return upload_synapse_s3( syn, expanded_upload_path, location['storageLocationId'], mimetype=mimetype, max_threads=max_threads ) # external file handle (sftp) elif upload_destination_type == concrete_types.EXTERNAL_UPLOAD_DESTINATION: if location['uploadType'] == 'SFTP': log_upload_message( syn, '\n%s\n%s\nUploading to: %s\n%s\n' % ('#' * 50, location.get('banner', ''), urllib_parse.urlparse(location['url']).netloc, '#' * 50) ) return upload_external_file_handle_sftp(syn, expanded_upload_path, location['url'], mimetype=mimetype) else: raise NotImplementedError('Can only handle SFTP upload locations.') # client authenticated S3 elif upload_destination_type == concrete_types.EXTERNAL_OBJECT_STORE_UPLOAD_DESTINATION: log_upload_message( syn, '\n%s\n%s\nUploading to endpoint: [%s] bucket: [%s]\n%s\n' % ( '#' * 50, location.get('banner', ''), location.get('endpointUrl'), location.get('bucket'), '#' * 50, ) ) return upload_client_auth_s3(syn, expanded_upload_path, location['bucket'], location['endpointUrl'], location['keyPrefixUUID'], location['storageLocationId'], mimetype=mimetype) else: # unknown storage location log_upload_message( syn, '\n%s\n%s\nUNKNOWN STORAGE LOCATION. Defaulting upload to Synapse.\n%s\n' % ( '!' * 50, location.get('banner', ''), '!' * 50 ) ) return upload_synapse_s3(syn, expanded_upload_path, None, mimetype=mimetype, max_threads=max_threads)