def create_new_fileformat_table( syn: Synapse, file_format: str, newdb_name: str, projectid: str, archive_projectid: str, ) -> dict: """Creates new database table based on old database table and archives old database table Args: syn: Synapse object file_format: File format to update newdb_name: Name of new database table projectid: Project id where new database should live archive_projectid: Project id where old database should be moved Returns: {"newdb_ent": New database synapseclient.Table, "newdb_mappingdf": new databse pd.DataFrame, "moved_ent": old database synpaseclient.Table} """ db_info = get_dbmapping(syn, projectid) database_mappingdf = db_info["df"] dbmapping_synid = db_info["synid"] olddb_synid = getDatabaseSynId(syn, file_format, databaseToSynIdMappingDf=database_mappingdf) olddb_ent = syn.get(olddb_synid) olddb_columns = list(syn.getTableColumns(olddb_synid)) newdb_ent = _create_schema( syn, table_name=newdb_name, columns=olddb_columns, parentid=projectid, annotations=olddb_ent.annotations, ) newdb_mappingdf = _update_database_mapping(syn, database_mappingdf, dbmapping_synid, file_format, newdb_ent.id) # Automatically rename the archived entity with ARCHIVED # This will attempt to resolve any issues if the table already exists at # location new_table_name = f"ARCHIVED {time.time()}-{olddb_ent.name}" moved_ent = _move_entity(syn, olddb_ent, archive_projectid, name=new_table_name) return { "newdb_ent": newdb_ent, "newdb_mappingdf": newdb_mappingdf, "moved_ent": moved_ent, }
def mirror(syn: Synapse, entity: Union[File, Folder, Project], destination: Union[File, Folder, Project], force: bool = False, dryrun: bool = False): """Mirrors (sync) wiki pages by using the wikipage titles between two Synapse Entities. This function only works if `entity` and `destination` are the same type and both must have wiki pages. Only wiki pages with the same titles will be copied from `entity` to `destination` - if there is a wiki page that you want to add, you will have to create a wiki page first in the `destination` with the same name. Args: entity: Synapse File, Project, Folder Entity or Id with Wiki you want to copy destination: Synapse File, Project, Folder Entity or Id with Wiki that matches entity force: Update a page even if its the same. Default to False. dryrun: Show the pages that have changed but don't update. Default is False. """ entity = syn.get(entity, downloadFile=False) destination = syn.get(destination, downloadFile=False) if type(entity) is not type(destination): raise ValueError("Can only mirror wiki pages between similar " "entity types") # Get entity/destination pages and mapping of wiki pages pages_and_mappings = _get_wikipages_and_mapping(syn, entity, destination) if dryrun: logger.info("Your wiki pages will not be mirrored. `dryrun` is True") _update_wiki(syn, **pages_and_mappings, force=force, dryrun=dryrun, entity=entity, destination=destination)
def get_dbmapping(syn: Synapse, project_id: str) -> dict: """Gets database mapping information Args: syn: Synapse connection project_id: Project id where new data lives Returns: {'synid': database mapping syn id, 'df': database mapping pd.DataFrame} """ project_ent = syn.get(project_id) dbmapping_synid = project_ent.annotations.get("dbMapping", "")[0] database_mappingdf = get_syntabledf(syn, f'select * from {dbmapping_synid}') return {'synid': dbmapping_synid, 'df': database_mappingdf}
def get_dbmapping(syn: Synapse, projectid: str) -> dict: """Gets database mapping information Args: syn: Synapse connection projectid: Project id where new data lives Returns: {'synid': database mapping syn id, 'df': database mapping pd.DataFrame} """ project_ent = syn.get(projectid) dbmapping_synid = project_ent.annotations.get("dbMapping", "")[0] database_mapping = syn.tableQuery(f"select * from {dbmapping_synid}") database_mappingdf = database_mapping.asDataFrame() return {"synid": dbmapping_synid, "df": database_mappingdf}
def _combine_center_file_errors(syn: Synapse, center_errorsdf: pd.DataFrame) -> str: """Combine all center errors into one printable string Args: syn: Synapse connection center_errorsdf: Center errors dataframe Returns: Center errors in a pretty formatted string """ center_errors = "" for _, row in center_errorsdf.iterrows(): ent = syn.get(row["id"], downloadFile=False) file_errors = row["errors"].replace("|", "\n") error_text = f"\t{ent.name} ({ent.id}):\n\n{file_errors}\n\n" center_errors += error_text return center_errors
def main(): # Parse command-line arguments args = parse_arguments() # Set up Synapse syn = Synapse() syn.login(args.username, args.password, rememberMe=args.remember) # Retrieve Synapse entity (e.g., project, folder) entity = syn.get(args.synid, downloadFile=False) log("Entity", entity) # Retrieve team team = syn.getTeam(args.team) # TODO: Handle users with try-catch log("Team", team) # Assign specified permissions for given entity and team permissions = syn.setPermissions(entity, team.id, accessType=args.permissions) log("Permissions", permissions) # Celebrate print("Success!")
def get_registered_challenges(syn: Synapse, userid: str = None) -> Iterator[Project]: """Get the Synapse Challenge Projects a user is registered to. Defaults to the logged in synapse user. Args: syn: Synapse connection userid: Specify userid if you want to know the challenges another Synapse user is registered to. Yields: A synapseclient.Project """ challenge_api = ChallengeApi(syn=syn) # This will return the logged in user profile if None is passed in profile = syn.getUserProfile(userid) userid = profile.ownerId registered = challenge_api.get_registered_challenges(participantId=userid) for challenge in registered: challenge_ent = syn.get(challenge.projectId) print(challenge_ent.name) yield challenge_ent
class KrakenDownload(object): """Utility to download Kraken DB and place them in a local directory :: from sequana import KrakenDownload kd = KrakenDownload() kd.download('toydb') kd.download('minikraken') A large database (8Gb) is available on synapse and has the following DOI:: doi:10.7303/syn6171000 It can be downloaded manually or if you have a Synapse login (https://www.synapse.org), you can use:: from sequana import KrakenDownload kd = KrakenDownload() kd.downloaded("sequana_db1") """ dv = DevTools() def download(self, name, verbose=True): if name == "minikraken": self._download_minikraken(verbose=verbose) elif name == "toydb": self._download_kraken_toydb(verbose=verbose) elif name == "sequana_db1": self._download_sequana_db1(verbose=verbose) else: raise ValueError( "name must be toydb or minikraken, or sequana_db1") def _download_kraken_toydb(self, verbose=True): """Download the kraken DB toy example from sequana_data into .config/sequana directory Checks the md5 checksums. About 32Mb of data """ dv = DevTools() base = sequana_config_path + os.sep + "kraken_toydb" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) baseurl = "https://github.com/sequana/data/raw/master/" # download only if required logger.info("Downloading the database into %s" % base) md5sums = [ "28661f8baf0514105b0c6957bec0fc6e", "97a39d44ed86cadea470352d6f69748d", "d91a0fcbbc0f4bbac918755b6400dea6", "c8bae69565af2170ece194925b5fdeb9" ] filenames = [ "database.idx", "database.kdb", "taxonomy/names.dmp", "taxonomy/nodes.dmp" ] for filename, md5sum in zip(filenames, md5sums): url = baseurl + "kraken_toydb/%s" % filename filename = base + os.sep + filename if os.path.exists(filename) and md5(filename) == md5sum: logger.warning("%s already present" % filename) else: logger.info("Downloading %s" % url) wget(url, filename) def _download_minikraken(self, verbose=True): dv = DevTools() base = sequana_config_path + os.sep + "" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) logger.info("Downloading minikraken (4Gb)") filename = base + os.sep + "minikraken.tgz" if os.path.exists(filename) and md5( filename) == "30eab12118158d0b31718106785195e2": logger.warning("%s already present" % filename) else: wget("https://ccb.jhu.edu/software/kraken/dl/minikraken.tgz", filename) # unzipping. requires tar and gzip def _download_from_synapse(self, synid, target_dir): try: from synapseclient import Synapse except ImportError: raise ImportError( "Please install synapseclient using 'pip install synapseclient'" ) try: self._synapse.get(synid, downloadLocation=target_dir) except: self._synapse = Synapse() self._synapse.login() self._synapse.get(synid, downloadLocation=target_dir) def _download_sequana_db1(self, verbose=True): dbname = "sequana_db1" from easydev import md5 dir1 = sequana_config_path + os.sep + dbname dir2 = dir1 + os.sep + "taxonomy" self.dv.mkdir(dir1) self.dv.mkdir(dir2) logger.info( "Downloading about 8Gb of data (if not already downloaded) from" " Synapse into %s" % dir1) from os.path import exists filename = dir1 + "ena_list.txt" if exists(filename) and md5( filename) == "a9cc6268f3338d1632c4712a412593f2": pass else: self._download_from_synapse('syn6171700', dir1) # database.idx filename = dir1 + "database.idx" if exists(filename) and md5( filename) == "2fa4a99a4f52f2f04c5a965adb1534ac": pass else: self._download_from_synapse('syn6171017', dir1) # database.kdb ; this one is large (8Gb) filename = dir1 + "database.kdb" if exists(filename) and md5( filename) == "ff698696bfc88fe83bc201937cd9cbdf": pass else: self._download_from_synapse('syn6171107', dir1) # Then, the taxonomy directory filename = dir1 + "names.dmp" if exists(filename) and md5( filename) == "10bc7a63c579de02112d125a51fd65d0": pass else: self._download_from_synapse('syn6171286', dir2) filename = dir1 + "nodes.dmp" if exists(filename) and md5( filename) == "a68af5a60434e2067c4a0a16df873980": pass else: self._download_from_synapse('syn6171289', dir2) filename = dir1 + "taxons.txt" if exists(filename) and md5( filename) == "e78fbb43b3b41cbf4511d6af16c0287f": pass else: self._download_from_synapse('syn6171290', dir2) logger.info('done. You should have a kraken DB in %s' % dir1) # The annotations wget( "https://github.com/sequana/data/raw/master/sequana_db1/annotations.csv", dir1 + os.sep + "annotations.csv")
from msda import process_phospho_ms as pm from msda import preprocessing as pr from msda import pca from msda import kmeans from msda.clustering import plot_clustermap as pc from msda import enrichr_api as ai from msda import mapping # Load individual 10-plexes, process and # normalize data into a single dataset. # -------------------------------------- syn = Synapse() syn.login() set1_ids = ['syn10534323', 'syn10534325', 'syn10534331', 'syn10534329'] set1_df_list = [pd.read_excel((syn.get(id).path)) for id in set1_ids] # no recorded value for syn1053432 df_set1, _ = pm.merge(set1_df_list[:-1]) # Filter peptides with localization score less than 13 df_set1 = pm.filter_max_score(df_set1, max_score_cutoff=13.0) set1_columns = [ str(s).replace('default', 'Set1') for s in df_set1.columns.tolist() ] set1_columns = [s.replace('max_score', 'set1_max_score') for s in set1_columns] df_set1.columns = set1_columns set2_ids = ['syn10534326', 'syn10534328', 'syn10534332', 'syn10534333'] set2_df_list = [pd.read_excel((syn.get(id).path)) for id in set2_ids] df_set2, _ = pm.merge(set2_df_list) # Filter peptides with localization score less than 13 df_set2 = pm.filter_max_score(df_set2, max_score_cutoff=13.0)
class KrakenDownload(object): """Utility to download Kraken DB and place them in a local directory :: from sequana import KrakenDownload kd = KrakenDownload() kd.download('toydb') kd.download('minikraken') A large database (8Gb) is available on synapse and has the following DOI:: doi:10.7303/syn6171000 It can be downloaded manually or if you have a Synapse login (https://www.synapse.org), you can use:: from sequana import KrakenDownload kd = KrakenDownload() kd.downloaded("sequana_db1") """ dv = DevTools() def download(self, name, verbose=True): if name == "minikraken": self._download_minikraken(verbose=verbose) elif name == "toydb": self._download_kraken_toydb(verbose=verbose) elif name == "sequana_db1": self._download_sequana_db1(verbose=verbose) else: raise ValueError("name must be toydb or minikraken, or sequana_db1") def _download_kraken_toydb(self, verbose=True): """Download the kraken DB toy example from sequana_data into .config/sequana directory Checks the md5 checksums. About 32Mb of data """ dv = DevTools() base = sequana_config_path + os.sep + "kraken_toydb" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) baseurl = "https://github.com/sequana/data/raw/master/" # download only if required logger.info("Downloading the database into %s" % base) md5sums = [ "28661f8baf0514105b0c6957bec0fc6e", "97a39d44ed86cadea470352d6f69748d", "d91a0fcbbc0f4bbac918755b6400dea6", "c8bae69565af2170ece194925b5fdeb9"] filenames = [ "database.idx", "database.kdb", "taxonomy/names.dmp", "taxonomy/nodes.dmp"] for filename, md5sum in zip(filenames, md5sums): url = baseurl + "kraken_toydb/%s" % filename filename = base + os.sep + filename if os.path.exists(filename) and md5(filename) == md5sum: logger.warning("%s already present" % filename) else: logger.info("Downloading %s" % url) wget(url, filename) def _download_minikraken(self, verbose=True): dv = DevTools() base = sequana_config_path + os.sep + "" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) logger.info("Downloading minikraken (4Gb)") filename = base + os.sep + "minikraken.tgz" if os.path.exists(filename) and md5(filename) == "30eab12118158d0b31718106785195e2": logger.warning("%s already present" % filename) else: wget("https://ccb.jhu.edu/software/kraken/dl/minikraken.tgz", filename) # unzipping. requires tar and gzip def _download_from_synapse(self, synid, target_dir): try: from synapseclient import Synapse except ImportError: raise ImportError("Please install synapseclient using 'pip install synapseclient'") try: self._synapse.get(synid, downloadLocation=target_dir) except: self._synapse = Synapse() self._synapse.login() self._synapse.get(synid, downloadLocation=target_dir) def _download_sequana_db1(self, verbose=True): dbname = "sequana_db1" from easydev import md5 dir1 = sequana_config_path + os.sep + dbname dir2 = dir1 + os.sep + "taxonomy" self.dv.mkdir(dir1) self.dv.mkdir(dir2) logger.info("Downloading about 8Gb of data (if not already downloaded) from" " Synapse into %s" % dir1) from os.path import exists filename = dir1 + "ena_list.txt" if exists(filename) and md5(filename) == "a9cc6268f3338d1632c4712a412593f2": pass else: self._download_from_synapse('syn6171700', dir1) # database.idx filename = dir1 + "database.idx" if exists(filename) and md5(filename) == "2fa4a99a4f52f2f04c5a965adb1534ac": pass else: self._download_from_synapse('syn6171017', dir1) # database.kdb ; this one is large (8Gb) filename = dir1 + "database.kdb" if exists(filename) and md5(filename) == "ff698696bfc88fe83bc201937cd9cbdf": pass else: self._download_from_synapse('syn6171107', dir1) # Then, the taxonomy directory filename = dir1 + "names.dmp" if exists(filename) and md5(filename) == "10bc7a63c579de02112d125a51fd65d0": pass else: self._download_from_synapse('syn6171286', dir2) filename = dir1 + "nodes.dmp" if exists(filename) and md5(filename) == "a68af5a60434e2067c4a0a16df873980": pass else: self._download_from_synapse('syn6171289', dir2) filename = dir1 + "taxons.txt" if exists(filename) and md5(filename) == "e78fbb43b3b41cbf4511d6af16c0287f": pass else: self._download_from_synapse('syn6171290', dir2) logger.info('done. You should have a kraken DB in %s' % dir1) # The annotations wget("https://github.com/sequana/data/raw/master/sequana_db1/annotations.csv", dir1 + os.sep + "annotations.csv")
# Download Synapse reference and example data using inputs.json # into "./data" from getpass import getpass from synapseclient import Synapse import json print ''' This script helps download the reference and example data from synapse.org. You need a Synapse user account. https://www.synapse.org/ Data will be downloaded to ./data ''' username = raw_input("Enter your Synapse account email: ") password = getpass() syn = Synapse() print 'Logging in to synapse.org...' syn.login(username, password, rememberMe=True) print 'Loading synapse_inputs.json' inputs = json.load(open('synapse_inputs.json')) for k, v in inputs.items(): print 'Downloading "{}"'.format(k) syn.get(v, downloadLocation='data')
def process_mutation_workflow( syn: Synapse, center: str, validfiles: pd.DataFrame, genie_annotation_pkg: str, database_mappingdf: str, workdir: str, ) -> str: """Process vcf/maf workflow Args: syn: Synapse connection center: Center name validfiles: Center validated files genie_annotation_pkg: Genome Nexus annotation tools database_mappingdf: Database to synapse id mapping dataframe workdir: Working directory Returns: Annotated Maf Path """ # Get valid files mutation_files = validfiles["fileType"].isin(["maf", "vcf"]) valid_mutation_files = validfiles["path"][mutation_files].tolist() # If there are no valid mutation files, return if not valid_mutation_files: logger.info("No mutation data") return # Certificate to use GENIE Genome Nexus syn.get( "syn22053204", ifcollision="overwrite.local", downloadLocation=genie_annotation_pkg, ) # Genome Nexus Jar file syn.get( "syn22084320", ifcollision="overwrite.local", downloadLocation=genie_annotation_pkg, ) annotated_maf_path = annotate_mutation( center=center, mutation_files=valid_mutation_files, genie_annotation_pkg=genie_annotation_pkg, workdir=workdir, ) maf_tableid = database_mappingdf.Id[database_mappingdf["Database"] == "vcf2maf"].iloc[0] flatfiles_synid = database_mappingdf.Id[database_mappingdf["Database"] == "centerMaf"].iloc[0] # Split into narrow maf and store into db / flat file split_and_store_maf( syn=syn, center=center, maf_tableid=maf_tableid, annotated_maf_path=annotated_maf_path, flatfiles_synid=flatfiles_synid, workdir=workdir, ) return annotated_maf_path