def write(syn: Synapse, center_mapping_df: pd.DataFrame, error_tracker_synid: str): """Write center errors to a file Args: syn: Synapse connection center_mapping_df: Center mapping dataframe error_tracker_synid: Error tracking synapse id """ center_errors = get_center_invalid_errors(syn, error_tracker_synid) for center in center_mapping_df["center"]: logger.info(center) staging_synid = center_mapping_df["stagingSynId"][ center_mapping_df["center"] == center][0] with open(center + "_errors.txt", "w") as errorfile: if center not in center_errors: errorfile.write("No errors!") else: errorfile.write(center_errors[center]) ent = synapseclient.File(center + "_errors.txt", parentId=staging_synid) syn.store(ent) os.remove(center + "_errors.txt")
def test_teams(syn, project, schedule_for_cleanup): name = "My Uniquely Named Team " + str(uuid.uuid4()) team = syn.store(Team(name=name, description="A fake team for testing...")) schedule_for_cleanup(team) # not logged in, teams are public anonymous_syn = Synapse() found_team = anonymous_syn.getTeam(team.id) assert team == found_team p = syn.getUserProfile() found = None for m in anonymous_syn.getTeamMembers(team): if m.member.ownerId == p.ownerId: found = m break assert found is not None, "Couldn't find user {} in team".format( p.userName) # needs to be retried 'cause appending to the search index is asynchronous tries = 8 sleep_time = 1 found_team = None while tries > 0: try: found_team = anonymous_syn.getTeam(name) break except ValueError: tries -= 1 if tries > 0: time.sleep(sleep_time) sleep_time *= 2 assert team == found_team
def syn(): """ Create a Synapse instance that can be shared by all tests in the session. """ syn = Synapse(debug=False, skip_checks=True) syn.logger = logging.getLogger(SILENT_LOGGER_NAME) return syn
def set_evaluation_quota(syn: Synapse, evalid: int, **kwargs): """Sets evaluation submission limit quota. This WILL erase any old quota you had previously set. Note - round_start must be specified with either round_end or round_duration and number_of_rounds must be defined for the time limits to work. submission_limit will work without number_of_rounds. Args: syn: Synapse object evalid: Evaluation id **kwargs: round_start: Start of round (local time) in YEAR-MM-DDTHH:MM:SS format (ie. 2020-02-21T17:00:00) round_end: End of round (local time) in YEAR-MM-DDTHH:MM:SS format (ie. 2020-02-21T19:00:00) number_of_rounds: Number of rounds round_duration: Round duration in milliseconds submission_limit: Number of submissions allowed per team Returns: A synapseclient.Evaluation Examples: >>> set_evaluation_quota(syn, 12345, round_start="2020-02-21T17:00:00", round_end="2020-02-23T17:00:00", number_of_rounds=1, submission_limit=3) """ quota = _create_quota(**kwargs) evaluation = syn.getEvaluation(evalid) evaluation.quota = vars(quota) evaluation = syn.store(evaluation) return evaluation
def create_new_fileformat_table( syn: Synapse, file_format: str, newdb_name: str, projectid: str, archive_projectid: str, ) -> dict: """Creates new database table based on old database table and archives old database table Args: syn: Synapse object file_format: File format to update newdb_name: Name of new database table projectid: Project id where new database should live archive_projectid: Project id where old database should be moved Returns: {"newdb_ent": New database synapseclient.Table, "newdb_mappingdf": new databse pd.DataFrame, "moved_ent": old database synpaseclient.Table} """ db_info = get_dbmapping(syn, projectid) database_mappingdf = db_info["df"] dbmapping_synid = db_info["synid"] olddb_synid = getDatabaseSynId(syn, file_format, databaseToSynIdMappingDf=database_mappingdf) olddb_ent = syn.get(olddb_synid) olddb_columns = list(syn.getTableColumns(olddb_synid)) newdb_ent = _create_schema( syn, table_name=newdb_name, columns=olddb_columns, parentid=projectid, annotations=olddb_ent.annotations, ) newdb_mappingdf = _update_database_mapping(syn, database_mappingdf, dbmapping_synid, file_format, newdb_ent.id) # Automatically rename the archived entity with ARCHIVED # This will attempt to resolve any issues if the table already exists at # location new_table_name = f"ARCHIVED {time.time()}-{olddb_ent.name}" moved_ent = _move_entity(syn, olddb_ent, archive_projectid, name=new_table_name) return { "newdb_ent": newdb_ent, "newdb_mappingdf": newdb_mappingdf, "moved_ent": moved_ent, }
def store_full_maf(syn: Synapse, filepath: str, parentid: str): """Stores full maf file Args: syn: Synapse connection filepath: Path to file parentid: Synapse container id """ syn.store(synapseclient.File(filepath, parentId=parentid))
def push_wiki(syn: Synapse, projectid: str, workdir: str = "./") -> typing.List[dict]: """Pushes Wiki from configuration Args: syn: Synapse connection project: synapseclient.Project or its id workdir: Location to download markdown files and wiki_config.json. Defaults to location of where code is being executed. Returns: Wiki Configuration:: [ { "id": "111", "title": "title", "parentId": "33333", "markdown_path": "home.md" }, {...} ] """ wiki_config = validate_config(workdir) for wiki_header in wiki_config: # no markdown path, nothing to update markdown_path = wiki_header.get('markdown_path') if not wiki_header.get('markdown_path'): print(f"Markdown not specified: {wiki_header['title']}") continue markdown_path = os.path.join(workdir, markdown_path) with open(markdown_path, 'r') as md_f: markdown = md_f.read() # Create new wiki page if id isn't specified if wiki_header.get('id') is not None: wiki = syn.getWiki(projectid, subpageId=wiki_header['id']) # Don't store if the wiki pages are the same if wiki.markdown == markdown: print(f"no updates: {wiki_header['title']}") continue print(f"Wiki updated: {wiki_header['title']}") else: wiki = synapseclient.Wiki(owner=projectid, title=wiki_header['title'], parentWikiId=wiki_header['parentId']) print(f"Wiki added: {wiki_header['title']}") wiki.markdown = markdown wiki = syn.store(wiki) # If new wiki page is added, must add to wiki_config.json wiki_header['id'] = wiki['id'] return wiki_config
def _download_from_synapse(self, synid, target_dir): try: from synapseclient import Synapse except ImportError: raise ImportError("Please install synapseclient using 'pip install synapseclient'") try: self._synapse.get(synid, downloadLocation=target_dir) except: self._synapse = Synapse() self._synapse.login() self._synapse.get(synid, downloadLocation=target_dir)
def testCustomConfigFile(syn, schedule_for_cleanup): if os.path.isfile(client.CONFIG_FILE): configPath = './CONFIGFILE' shutil.copyfile(client.CONFIG_FILE, configPath) schedule_for_cleanup(configPath) syn2 = Synapse(configPath=configPath) syn2.login() else: raise ValueError( "Please supply a username and password in the configuration file.")
def create_site_bundle(syn: Synapse, question: int, site: str): """Creates workflow and entity bundles for the internal site submissions Args: syn: Synapse connection question: Question number site: Site """ shutil.copyfile( os.path.join(SCRIPT_DIR, "../infrastructure/internal_workflow.cwl"), os.path.join(SCRIPT_DIR, f"../infrastructure/{question}_internal_workflow.cwl")) internal = create_evaluation_queue( syn, f"COVID-19 DREAM {site} - Question {question}") syn.setPermissions(internal, accessType=[ 'DELETE_SUBMISSION', 'DELETE', 'SUBMIT', 'UPDATE', 'CREATE', 'READ', 'UPDATE_SUBMISSION', 'READ_PRIVATE_SUBMISSION', 'CHANGE_PERMISSIONS' ], principalId=3407544) internal_test = create_evaluation_queue( syn, f"COVID-19 DREAM {site} - Question {question} TEST") syn.setPermissions(internal_test, accessType=[ 'DELETE_SUBMISSION', 'DELETE', 'SUBMIT', 'UPDATE', 'CREATE', 'READ', 'UPDATE_SUBMISSION', 'READ_PRIVATE_SUBMISSION', 'CHANGE_PERMISSIONS' ], principalId=3407544) prod_wf = os.path.join("covid19-challenge-master/infrastructure", f"{question}_internal_workflow.cwl") test_wf = os.path.join("covid19-challenge-develop/infrastructure", f"{question}_internal_workflow.cwl") ent = create_entity(syn, name=f"COVID-19 {site} Q{question}", link=MASTER, annotations={'ROOT_TEMPLATE': prod_wf}) test_ent = create_entity(syn, name=f"COVID-19 {site} Q{question} TEST", link=DEV, annotations={'ROOT_TEMPLATE': test_wf}) print(f"Add to {site}'s docker-compose .env") print({internal.id: ent.id, internal_test.id: test_ent.id}) return { "internal_queueid": internal.id, "internal_queue_testid": internal_test.id }
def _set_acl(syn: Synapse, entity: Union[File, Folder, Project], acl_config: dict): """Sets ACLs to Synapse entity Args: syn: Synapse connection entity: Synapse Folder or Project acl_config: ACL template json configuration """ for acl in acl_config: syn.setPermissions(entity=entity, principalId=acl['principal_id'], accessType=acl['access_type'])
def test_create_Link_to_entity_with_the_same_parent(): parent = "syn123" file = File("new file", parent=parent, id="syn456") file_bundle = { 'accessControlList': '/repo/v1/entity/syn456/acl', 'entityType': 'org.sagebionetworks.repo.model.FileEntity', 'annotations': '/repo/v1/entity/syn456/annotations', 'uri': '/repo/v1/entity/syn456', 'createdOn': '2018-08-27T20:48:43.562Z', 'parentId': 'syn123', 'versionNumber': 1, 'dataFileHandleId': '3594', 'modifiedOn': '2018-08-27T20:48:44.938Z', 'versionLabel': '1', 'createdBy': '1', 'versions': '/repo/v1/entity/syn456/version', 'name': 'new file', 'concreteType': 'org.sagebionetworks.repo.model.FileEntity', 'etag': '62fd1a76-ed9c-425a-b4a8-1c4e6aad7fc6', 'modifiedBy': '1', 'id': 'syn456', 'versionUrl': '/repo/v1/entity/syn456/version/1' } link = Link(targetId=file, parent=parent) syn = Synapse(skip_checks=True) with patch.object(syn, "_getEntity", return_value=file_bundle): pytest.raises(ValueError, syn.store, link)
def syn(): """ Create a logged in Synapse instance that can be shared by all tests in the session. """ print("Python version:", sys.version) syn = Synapse(debug=False, skip_checks=True) print("Testing against endpoints:") print(" " + syn.repoEndpoint) print(" " + syn.authEndpoint) print(" " + syn.fileHandleEndpoint) print(" " + syn.portalEndpoint + "\n") syn.logger = logging.getLogger(SILENT_LOGGER_NAME) syn.login() return syn
def get_dbmapping(syn: Synapse, projectid: str) -> dict: """Gets database mapping information Args: syn: Synapse connection projectid: Project id where new data lives Returns: {'synid': database mapping syn id, 'df': database mapping pd.DataFrame} """ project_ent = syn.get(projectid) dbmapping_synid = project_ent.annotations.get("dbMapping", "")[0] database_mapping = syn.tableQuery(f"select * from {dbmapping_synid}") database_mappingdf = database_mapping.asDataFrame() return {"synid": dbmapping_synid, "df": database_mappingdf}
def pull_wiki(syn: Synapse, project: str, workdir: str = "./") -> typing.List[dict]: """Downloads each wikipage's content into a markdown file and stores a configuration file Args: syn: Synapse connection project: synapseclient.Project or its id workdir: Location to download markdown files and wiki_config.json into. Defaults to location of where code is being executed. Returns: Wiki Configuration:: [ { "id": "111", "title": "homepage", "markdown_path": "111-homepage.md" }, {...} ] """ projectid = synapseclient.core.utils.id_of(project) wiki_headers = syn.getWikiHeaders(projectid) for wiki_header in wiki_headers: wiki = syn.getWiki(projectid, subpageId=wiki_header['id']) # Convert all special characters to underscore # This way markdown paths don't have special characters # and json file can be written without encoding clean_title = ''.join(letter for letter in wiki['title'] if letter.isalnum()) # Home page title is always blank if clean_title == '': clean_title = 'homepage' wiki_header['title'] = clean_title # The wiki id is added to the markdown path because wiki ids are # unique, but wiki titles don't have to be markdown_path = os.path.join(workdir, f"{wiki.id}-{clean_title}.md") with open(markdown_path, 'w') as md_file: md_file.write(wiki['markdown']) wiki_header['markdown_path'] = f"{wiki.id}-{clean_title}.md" return wiki_headers
def create_evaluation_queue(syn: Synapse, name: str) -> Evaluation: """Creates evaluation queue Args: name: Name of queue Returns: a synapseclient.Evaluation """ queue = Evaluation(name=name, contentSource="syn21849255") try: queue = syn.store(queue) except Exception: url_name = quote(name) queue = syn.restGET(f"/evaluation/name/{url_name}") queue = Evaluation(**queue) return queue
def create_main_bundle(syn: Synapse, question: int): """Creates workflow and entity bundles for the main submission Args: syn: Synapse connection question: Question number """ shutil.copyfile( os.path.join(SCRIPT_DIR, "../infrastructure/main_workflow.cwl"), os.path.join(SCRIPT_DIR, f"../infrastructure/{question}_workflow.cwl")) main_queue = create_evaluation_queue( syn, f"COVID-19 DREAM Challenge - Question {question}") # Global view syn.setPermissions(main_queue, accessType=['READ']) # Participant team syn.setPermissions(main_queue, accessType=['READ'], principalId=3407543) # Admin team syn.setPermissions(main_queue, accessType=[ 'DELETE_SUBMISSION', 'DELETE', 'SUBMIT', 'UPDATE', 'CREATE', 'READ', 'UPDATE_SUBMISSION', 'READ_PRIVATE_SUBMISSION', 'CHANGE_PERMISSIONS' ], principalId=3407544) main_queue_test = create_evaluation_queue( syn, f"COVID-19 DREAM Challenge - Question {question} TEST") syn.setPermissions(main_queue_test, accessType=[ 'DELETE_SUBMISSION', 'DELETE', 'SUBMIT', 'UPDATE', 'CREATE', 'READ', 'UPDATE_SUBMISSION', 'READ_PRIVATE_SUBMISSION', 'CHANGE_PERMISSIONS' ], principalId=3407544) prof_wf = os.path.join("covid19-challenge-master/infrastructure", f"{question}_workflow.cwl") test_wf = os.path.join("covid19-challenge-develop/infrastructure", f"{question}_workflow.cwl") main_ent = create_entity(syn, name=f"COVID-19 Q{question}", link=MASTER, annotations={'ROOT_TEMPLATE': prof_wf}) main_test_ent = create_entity(syn, name=f"COVID-19 Q{question} TEST", link=DEV, annotations={'ROOT_TEMPLATE': test_wf}) print("Add to NCAT's docker-compose .env") print({main_queue.id: main_ent.id, main_queue_test.id: main_test_ent.id}) return { "main_queueid": main_queue.id, "main_queue_testid": main_queue_test.id }
def _get_wikipages_and_mapping( syn: Synapse, entity: Union[File, Folder, Project], destination: Union[File, Folder, Project]) -> dict: """Get entity/destination pages and mapping of wiki pages Args: syn: Synapse connection entity: Synapse File, Project, Folder Entity or Id with Wiki you want to copy destination: Synapse File, Project, Folder Entity or Id with Wiki that matches entity Returns: {'entity_wiki_pages': {'title': synapseclient.Wiki} 'destination_wiki_pages': {'title': synapseclient.Wiki} 'wiki_mapping': {'wiki_id': 'dest_wiki_id'}} """ entity_wiki = _get_headers(syn, entity) destination_wiki = _get_headers(syn, destination) entity_wiki_pages = {} for wiki in entity_wiki: entity_wiki = syn.getWiki(entity, wiki['id']) entity_wiki_pages[wiki['title']] = entity_wiki # Mapping dictionary containing wiki page mapping between # entity and destination wiki_mapping = {} destination_wiki_pages = {} for wiki in destination_wiki: destination_wiki = syn.getWiki(destination, wiki['id']) destination_wiki_pages[wiki['title']] = destination_wiki # Only map wiki pages that exist in `entity` (source) if entity_wiki_pages.get(wiki['title']) is not None: wiki_mapping[entity_wiki_pages[wiki['title']].id] = wiki['id'] else: logger.info("Title exists at destination but not in " f"entity: {wiki['title']}") return { 'entity_wiki_pages': entity_wiki_pages, 'destination_wiki_pages': destination_wiki_pages, 'wiki_mapping': wiki_mapping }
def test_synapse_client__discrete_sts_token_stores(self, mock_fetch_token): """Verify that two Synapse objects will not share the same cached tokens""" syn1 = Synapse(skip_checks=True) syn2 = Synapse(skip_checks=True) expected_token = { 'awsAccessKeyId': 'ABC', 'awsSecretAccessKey': '456', 'expiration': datetime_to_iso(datetime.datetime.utcnow() + datetime.timedelta(hours=12)) } mock_fetch_token.return_value = expected_token synapse_id = 'syn_123' permission = 'read_write' token = syn1.get_sts_storage_token(synapse_id, permission) assert expected_token == token assert mock_fetch_token.call_count == 1 token = syn1.get_sts_storage_token(synapse_id, permission) assert expected_token == token # should have been satisfied from cache, not fetched again assert mock_fetch_token.call_count == 1 # but now fetching from a separate synapse object should not be satisfied from a common cache token = syn2.get_sts_storage_token(synapse_id, permission) assert expected_token == token assert mock_fetch_token.call_count == 2
def _get_team_count(syn: Synapse, teamid: int) -> dict: """Rest call wrapper for getting team member count Args: syn: Synapse object teamid: Synapse team id """ count = syn.restGET(f"/teamMembers/count/{teamid}") return count
def get_team_count(syn: Synapse, team: Union[int, str, Team]) -> int: """Get number of team members Args: syn: Synapse object team: synaspeclient.Team, its id, or name. """ team_obj = syn.getTeam(team) count = _get_team_count(syn, team_obj.id) return count['count']
def store_narrow_maf(syn: Synapse, filepath: str, maf_tableid: str): """ Stores the narrow maf in Synapse Table Args: syn: Synapse connection filepath: Path to maf file maf_tableid: database synid """ logger.info(f"STORING {filepath}") # database = syn.get(maf_tableid) try: update_table = synapseclient.Table(maf_tableid, filepath, separator="\t") syn.store(update_table) except SynapseTimeoutError: # This error occurs because of waiting for table to index. # Don't worry about this. pass
def syn_connect(args): "connect and log in based on options" syn_conf = LrgaspSynConfig.factory(args) syn = Synapse() syn.logger.setLevel(getLrgaspLogger().level) # cut down on noise # None user/password uses cache or prompts. Command line overrides conf user = args.synapseUser if args.synapseUser is not None else syn_conf.user password = args.synapsePassword if args.synapsePassword is not None else syn_conf.password login_with_prompt(syn, user, password, rememberMe=args.rememberMe) getLrgaspLogger().debug(f"logged in as synpase user '{syn.username}'") return syn
def annotate_submission(syn: Synapse, submissionid: str, annotation_dict: dict = None, status: str = None, is_private: bool = True, force: bool = False) -> MockResponse: """Annotate submission with annotation values from a dict Args: syn: Synapse object submissionid: Submission id annotation_dict: Annotation dict status: Submission Status is_private: Set annotations acl to private (default is True) force: Force change the annotation from private to public and vice versa. Returns: MockResponse """ sub_status = syn.getSubmissionStatus(submissionid) # Update the status as well if status is not None: sub_status.status = status if annotation_dict is None: annotation_dict = {} # Don't add any annotations that are None annotation_dict = { key: annotation_dict[key] for key in annotation_dict if annotation_dict[key] is not None } sub_status = update_single_submission_status(sub_status, annotation_dict, is_private=is_private, force=force) sub_status = update_submission_status(sub_status, annotation_dict) syn.store(sub_status) return MockResponse
def mirror(syn: Synapse, entity: Union[File, Folder, Project], destination: Union[File, Folder, Project], force: bool = False, dryrun: bool = False): """Mirrors (sync) wiki pages by using the wikipage titles between two Synapse Entities. This function only works if `entity` and `destination` are the same type and both must have wiki pages. Only wiki pages with the same titles will be copied from `entity` to `destination` - if there is a wiki page that you want to add, you will have to create a wiki page first in the `destination` with the same name. Args: entity: Synapse File, Project, Folder Entity or Id with Wiki you want to copy destination: Synapse File, Project, Folder Entity or Id with Wiki that matches entity force: Update a page even if its the same. Default to False. dryrun: Show the pages that have changed but don't update. Default is False. """ entity = syn.get(entity, downloadFile=False) destination = syn.get(destination, downloadFile=False) if type(entity) is not type(destination): raise ValueError("Can only mirror wiki pages between similar " "entity types") # Get entity/destination pages and mapping of wiki pages pages_and_mappings = _get_wikipages_and_mapping(syn, entity, destination) if dryrun: logger.info("Your wiki pages will not be mirrored. `dryrun` is True") _update_wiki(syn, **pages_and_mappings, force=force, dryrun=dryrun, entity=entity, destination=destination)
def get_dbmapping(syn: Synapse, project_id: str) -> dict: """Gets database mapping information Args: syn: Synapse connection project_id: Project id where new data lives Returns: {'synid': database mapping syn id, 'df': database mapping pd.DataFrame} """ project_ent = syn.get(project_id) dbmapping_synid = project_ent.annotations.get("dbMapping", "")[0] database_mappingdf = get_syntabledf(syn, f'select * from {dbmapping_synid}') return {'synid': dbmapping_synid, 'df': database_mappingdf}
def test_EntityViewSchema__before_synapse_store(syn): syn = Synapse(debug=True, skip_checks=True) with patch.object(syn, '_get_default_view_columns') as mocked_get_default,\ patch.object(syn, '_get_annotation_view_columns') as mocked_get_annotations,\ patch.object(SchemaBase, "_before_synapse_store"): submission_view = EntityViewSchema(scopes=['syn123'], parent="idk") submission_view._before_synapse_store(syn) mocked_get_default.assert_called_once_with("entityview", view_type_mask=1) mocked_get_annotations.assert_called_once_with(['syn123'], "entityview", view_type_mask=1)
def test_EntityViewSchema__repeated_columnName_same_type(syn): syn = Synapse(debug=True, skip_checks=True) entity_view = EntityViewSchema("someName", parent="syn123") columns = [Column(name='annoName', columnType='INTEGER'), Column(name='annoName', columnType='INTEGER')] with patch.object(syn, 'getColumns') as mocked_get_columns: filtered_results = entity_view._filter_duplicate_columns(syn, columns) mocked_get_columns.assert_called_once_with([]) assert 1 == len(filtered_results) assert Column(name='annoName', columnType='INTEGER') == filtered_results[0]
def get_registered_challenges(syn: Synapse, userid: str = None) -> Iterator[Project]: """Get the Synapse Challenge Projects a user is registered to. Defaults to the logged in synapse user. Args: syn: Synapse connection userid: Specify userid if you want to know the challenges another Synapse user is registered to. Yields: A synapseclient.Project """ challenge_api = ChallengeApi(syn=syn) # This will return the logged in user profile if None is passed in profile = syn.getUserProfile(userid) userid = profile.ownerId registered = challenge_api.get_registered_challenges(participantId=userid) for challenge in registered: challenge_ent = syn.get(challenge.projectId) print(challenge_ent.name) yield challenge_ent
def test_login(test_state): alt_syn = Synapse() username = "******" password = "******" with patch.object(alt_syn, "login") as mock_login, \ patch.object(alt_syn, "getUserProfile", return_value={"userName": "******", "ownerId": "ownerId"})\ as mock_get_user_profile: run(test_state, 'synapse' '--skip-checks', 'login', '-u', username, '-p', password, '--rememberMe', syn=alt_syn) mock_login.assert_called_once_with(username, password, forced=True, rememberMe=True, silent=False) mock_get_user_profile.assert_called_once_with()
def submit_to_challenge(filename, challenge, label, retry=True): try: client = Synapse() client.login() evaluation = client.getEvaluation(CODES[challenge]) filename = filename + '.gct' if challenge == 'sc1' else filename + '.zip' myfile = File(RESULTS_FOLDER + filename, parent=PROJECT_ID) myfile = client.store(myfile) client.submit(evaluation, myfile, name=label, teamName=TEAM) except: if retry: submit_to_challenge(filename, challenge, label, retry=False) else: print 'failed to submit', label, 'to', challenge
if (aws_access_key_id is None) or (aws_secret_access_key is None): break else: s3Connections.append(S3Connection(aws_access_key_id, aws_secret_access_key)) iamConnections.append( IAMConnection(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) ) i = i + 1 if len(s3Connections) == 0: raise ("No AWS crdentials provided") MAXIMUM_USER_NAME_LENGTH = 63 ## connect to Synapse syn = Synapse() syn.login(synapseUserId, synapseUserPw) ownUserProfile = syn.getUserProfile() ownPrincipalId = ownUserProfile["ownerId"] ## get all Participants for Evaluation participants = syn.restGET("/evaluation/" + evaluationId + "/participant?limit=99999")["results"] print "total number of results: " + str(len(participants)) ## For each participant participantList = [] anyNewUsers = False for i, part in enumerate(participants): ## add to a list the user's first name, last name, email address, user name and principal ID ## "user name" is defined as <firstName>.<lastName>.<principalId>.wcpe.sagebase.org partId = part["userId"]
class KrakenDownload(object): """Utility to download Kraken DB and place them in a local directory :: from sequana import KrakenDownload kd = KrakenDownload() kd.download('toydb') kd.download('minikraken') A large database (8Gb) is available on synapse and has the following DOI:: doi:10.7303/syn6171000 It can be downloaded manually or if you have a Synapse login (https://www.synapse.org), you can use:: from sequana import KrakenDownload kd = KrakenDownload() kd.downloaded("sequana_db1") """ dv = DevTools() def download(self, name, verbose=True): if name == "minikraken": self._download_minikraken(verbose=verbose) elif name == "toydb": self._download_kraken_toydb(verbose=verbose) elif name == "sequana_db1": self._download_sequana_db1(verbose=verbose) else: raise ValueError("name must be toydb or minikraken, or sequana_db1") def _download_kraken_toydb(self, verbose=True): """Download the kraken DB toy example from sequana_data into .config/sequana directory Checks the md5 checksums. About 32Mb of data """ dv = DevTools() base = sequana_config_path + os.sep + "kraken_toydb" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) baseurl = "https://github.com/sequana/data/raw/master/" # download only if required logger.info("Downloading the database into %s" % base) md5sums = [ "28661f8baf0514105b0c6957bec0fc6e", "97a39d44ed86cadea470352d6f69748d", "d91a0fcbbc0f4bbac918755b6400dea6", "c8bae69565af2170ece194925b5fdeb9"] filenames = [ "database.idx", "database.kdb", "taxonomy/names.dmp", "taxonomy/nodes.dmp"] for filename, md5sum in zip(filenames, md5sums): url = baseurl + "kraken_toydb/%s" % filename filename = base + os.sep + filename if os.path.exists(filename) and md5(filename) == md5sum: logger.warning("%s already present" % filename) else: logger.info("Downloading %s" % url) wget(url, filename) def _download_minikraken(self, verbose=True): dv = DevTools() base = sequana_config_path + os.sep + "" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) logger.info("Downloading minikraken (4Gb)") filename = base + os.sep + "minikraken.tgz" if os.path.exists(filename) and md5(filename) == "30eab12118158d0b31718106785195e2": logger.warning("%s already present" % filename) else: wget("https://ccb.jhu.edu/software/kraken/dl/minikraken.tgz", filename) # unzipping. requires tar and gzip def _download_from_synapse(self, synid, target_dir): try: from synapseclient import Synapse except ImportError: raise ImportError("Please install synapseclient using 'pip install synapseclient'") try: self._synapse.get(synid, downloadLocation=target_dir) except: self._synapse = Synapse() self._synapse.login() self._synapse.get(synid, downloadLocation=target_dir) def _download_sequana_db1(self, verbose=True): dbname = "sequana_db1" from easydev import md5 dir1 = sequana_config_path + os.sep + dbname dir2 = dir1 + os.sep + "taxonomy" self.dv.mkdir(dir1) self.dv.mkdir(dir2) logger.info("Downloading about 8Gb of data (if not already downloaded) from" " Synapse into %s" % dir1) from os.path import exists filename = dir1 + "ena_list.txt" if exists(filename) and md5(filename) == "a9cc6268f3338d1632c4712a412593f2": pass else: self._download_from_synapse('syn6171700', dir1) # database.idx filename = dir1 + "database.idx" if exists(filename) and md5(filename) == "2fa4a99a4f52f2f04c5a965adb1534ac": pass else: self._download_from_synapse('syn6171017', dir1) # database.kdb ; this one is large (8Gb) filename = dir1 + "database.kdb" if exists(filename) and md5(filename) == "ff698696bfc88fe83bc201937cd9cbdf": pass else: self._download_from_synapse('syn6171107', dir1) # Then, the taxonomy directory filename = dir1 + "names.dmp" if exists(filename) and md5(filename) == "10bc7a63c579de02112d125a51fd65d0": pass else: self._download_from_synapse('syn6171286', dir2) filename = dir1 + "nodes.dmp" if exists(filename) and md5(filename) == "a68af5a60434e2067c4a0a16df873980": pass else: self._download_from_synapse('syn6171289', dir2) filename = dir1 + "taxons.txt" if exists(filename) and md5(filename) == "e78fbb43b3b41cbf4511d6af16c0287f": pass else: self._download_from_synapse('syn6171290', dir2) logger.info('done. You should have a kraken DB in %s' % dir1) # The annotations wget("https://github.com/sequana/data/raw/master/sequana_db1/annotations.csv", dir1 + os.sep + "annotations.csv")