def synapse_login(self): print('Logging into Synapse...') self._username = self._username or os.getenv('SYNAPSE_USERNAME') self._password = self._password or os.getenv('SYNAPSE_PASSWORD') if not self._username: self._username = input('Synapse username: '******'Synapse password: '******'Synapse login failed: {0}'.format(str(ex))) return self._synapse_client is not None
def submit(folder, id, password): syn = synapseclient.Synapse() syn.login(email=id, password=password) project = syn.get(7118431) # Nabla #project = syn.get(7341111) # Stackd submission_filenames = [fname for fname in os.listdir(folder)] for filename in submission_filenames: if filename.startswith('L'): evaluation = syn.getEvaluation(7071644) else: evaluation = syn.getEvaluation(7212779) print "Submitting", filename filename = os.path.join(folder, filename) f_handler = synapseclient.File(filename, parent=project) entity = syn.store(f_handler) syn.submit(evaluation, entity, name='test', team='Nabla')
def main(wf_name, to_upload, synapse_parent_id): syn = synapseclient.Synapse() syn.login(os.environ["SYNAPSE_LOGIN"], apiKey=os.environ["SYNAPSE_API_KEY"]) s_base_folder, remotes = _accumulate_remotes(synapse_parent_id, syn) for dirpath, _, filenames in os.walk(to_upload): remote_dirpath = os.path.join(s_base_folder.name, dirpath) if filenames: s_folder, remotes = _remote_folder(remote_dirpath, remotes, syn) for filename in filenames: remote_filename = os.path.join(remote_dirpath, filename) if remote_filename not in remotes: filename = os.path.join(dirpath, filename) if os.path.getsize(filename) > 0: print("Uploading %s" % filename) f = synapseclient.File(filename, parent=s_folder) f.workflow = wf_name f.workflowOption = "all" s_filename = syn.store(f) remotes[remote_filename] = s_filename.id
def main(): import argparse import sys parser = argparse.ArgumentParser() parser.add_argument('inputfile', nargs='?', type=argparse.FileType('r'), default=sys.stdout) parser.add_argument( '--overwrite', action="store_true", default=False, help="Overwrite existing annotations [default: %(default)s]") parser.add_argument( '--dryrun', action="store_true", default=False, help="Run without making changes [default: %(default)s]") parser.add_argument('--verbose', action="store_true", default=False, help="Output status [default: %(default)s]") args = parser.parse_args() syn = synapseclient.Synapse(debug=False, skip_checks=True) syn.login(silent=True) bootstrapped_data = list(csv.DictReader(args.inputfile, delimiter='\t')) updater = UpdatePCBCAnnotations(syn, bootstrapped_data) updater.update_annotations(overwrite=args.overwrite, dryrun=args.dryrun, verbose=args.verbose)
def main(): parser = argparse.ArgumentParser( description='Plot experiment results for given mutation classifier.') parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('gene', help='a mutated gene') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', default='Form_base__Exon') parser.add_argument('--samp_cutoff', default=20) # parse command-line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs(os.path.join(plot_dir, args.cohort, args.gene), exist_ok=True) prob_df = load_infer_output( os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)).applymap(np.mean) # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=None, samp_cutoff=20, mut_levels=['Gene'] + args.mut_levels.split('__'), expr_source='Firehose', expr_dir=firehose_dir, syn=syn, cv_prop=1.0) singl_mtypes = [ mtype for mtype in prob_df.index if len(mtype.subkeys()) == 1 ] for singl_mtype in singl_mtypes: plot_mtype_positions(prob_df.loc[singl_mtype, :], args, cdata)
def main(): parser = argparse.ArgumentParser( "Plot the inferred CNA scores for a cohort's samples against their " "actual CNA scores for a given set of cutoffs.") parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('gene', help='a mutated gene') parser.add_argument('classif', help='a mutation classifier') # parse command-line arguments, create directory where plots will be saved args = parser.parse_args() os.makedirs(plot_dir, exist_ok=True) # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=[args.gene], mut_levels=['Gene'], expr_source='Firehose', var_source='mc3', expr_dir=firehose_dir, copy_source='Firehose', copy_dir=copy_dir, copy_discrete=False, syn=syn, cv_prop=1.0) iso_df = load_infer_output( os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif)) loss_df, gain_df = get_aucs(iso_df, args, cdata) plot_cna_scores(iso_df.loc[loss_df['CNA'].idxmax(), :], args, cdata) plot_cna_scores(iso_df.loc[gain_df['CNA'].idxmax(), :], args, cdata) plot_cna_scores(iso_df.loc[(loss_df['CNA'] - loss_df['Mut']).idxmax(), :], args, cdata)
def main(argv): """Runs the experiment.""" # gets the directory where output will be saved and the name of the TCGA # cohort under consideration, loads the list of gene sub-variants print(argv) out_dir = os.path.join(base_dir, 'output', argv[0], argv[1], argv[2]) coh_lbl = 'TCGA-{}'.format(argv[0]) # loads the expression data and gene mutation data for the given TCGA # cohort, with the training/testing cohort split defined by the # cross-validation id for this task syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() cdata = VariantCohort(cohort=coh_lbl, mut_genes=[argv[1]], mut_levels=('Gene', 'Form', 'Exon', 'Location', 'Protein'), syn=syn, cv_seed=(int(argv[3]) + 3) * 17) base_mtype = MuType({('Gene', argv[1]): None}) optim = PartitionOptim(cdata, base_mtype, eval(argv[2]), ('Form', 'Exon', 'Location', 'Protein')) while optim.traverse_branch(): optim_mtypes = optim.best_optim() # saves classifier results to file out_file = os.path.join(out_dir, 'results', 'out__cv-{}.p'.format(argv[3])) pickle.dump( { 'best': optim.best_mtypes, 'hist': optim.mtype_scores, 'pred': optim.pred_scores, 'optim': optim.best_optim() }, open(out_file, 'wb'))
def main(): parent_parser = argparse.ArgumentParser(add_help=False) parent_parser.add_argument("--config_file", type=argparse.FileType("r"), help="Full pathname for the YAML config file") parent_parser.add_argument("--consortium", type=str, default=None, help="Consortium to create the table for") parser = argparse.ArgumentParser(parents=[parent_parser], add_help=True) subparsers = parser.add_subparsers() parser_new_table = subparsers.add_parser("new_table", help="New table help") parser_new_table.add_argument("--parent_synapse_id", type=str, help="Synapse ID of the parent project") parser_new_table.add_argument("--synapse_table_name", type=str, help="Name of the Synapse table") parser_new_table.set_defaults(func=process_new_table) parser_overwrite_table = subparsers.add_parser("overwrite_table", help="Overwrite table help") parser_overwrite_table.add_argument( "--table_synapse_id", type=str, help="Synapse ID of the table to be overwritten") parser_overwrite_table.set_defaults(func=process_overwrite_table) args = parser.parse_args() dccv_syn = synapseclient.Synapse() dccv_syn.login(silent=True) args.func(args, dccv_syn)
def main(): parser = argparse.ArgumentParser( "Plot the distributions of gene weight coefficients inferred by a " "given Stan classifier trained to predict the mutation status of a " "gene in a given TCGA cohort." ) parser.add_argument('model_name', type=str, help="label of a Stan model") parser.add_argument('solve_method', type=str, help=("method used to obtain estimates for the " "parameters of the model")) parser.add_argument('cohort', type=str, help="a TCGA cohort") parser.add_argument('gene', type=str, help="a mutated gene") args = parser.parse_args() os.makedirs(plot_dir, exist_ok=True) vars_dict = load_vars(args.model_name, args.solve_method, args.cohort, args.gene) if 'gn_wghts' not in vars_dict: raise ValueError("Can only plot inferred gene weights for a model " "that includes them as variables!") # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = ('/home/exacloud/lustre1/CompBio' '/mgrzad/input-data/synapse') syn.login() cdata = MutationCohort( cohort=args.cohort, mut_genes=[args.gene], mut_levels=['Gene'], expr_source='Firehose', expr_dir=firehose_dir, var_source='mc3', syn=syn, cv_prop=1.0 ) wghts_df = pd.DataFrame(vars_dict['gn_wghts'], index=sorted(cdata.genes - {args.gene})) plot_weights_cov(wghts_df, args, cdata)
def main(): parser = argparse.ArgumentParser( "Plots the clustering done by an unsupervised learning method on a " "TCGA cohort with subtypes of particular genes highlighted.") parser.add_argument('cohort', type=str, help='a cohort in TCGA') parser.add_argument('transform', type=str, help='an unsupervised learning method') parser.add_argument('mut_levels', type=str, help='a set of mutation annotation levels') parser.add_argument('--genes', type=str, nargs='+', default=['TP53'], help='a list of mutated genes') args = parser.parse_args() os.makedirs(plot_dir, exist_ok=True) syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=args.genes, mut_levels=['Gene'] + args.mut_levels.split('__'), expr_source='Firehose', expr_dir=firehose_dir, cv_prop=1.0, syn=syn) mut_trans = eval(args.transform)() trans_expr = mut_trans.fit_transform_coh(cdata) for gene in args.genes: plot_subtype_clustering(trans_expr.copy(), args, cdata, gene)
def test_login(): alt_syn = synapseclient.Synapse() username = "******" password = "******" with patch.object(alt_syn, "login") as mock_login, \ patch.object(alt_syn, "getUserProfile", return_value={"userName": "******", "ownerId": "ownerId"})\ as mock_get_user_profile: run('synapse', '--skip-checks', 'login', '-u', username, '-p', password, '--rememberMe', syn=alt_syn) mock_login.assert_called_once_with(username, password, forced=True, rememberMe=True, silent=False) mock_get_user_profile.assert_called_once_with()
def synapse_login(self): logging.info('Logging into Synapse...') self._username = self._username or os.getenv('SYNAPSE_USERNAME') self._password = self._password or os.getenv('SYNAPSE_PASSWORD') if not self._username: self._username = input('Synapse username: '******'Synapse password: '******'Synapse login failed: {0}'.format(str(ex))) return self._synapse_client is not None
def login(cls, username=None, password=None): username = username or os.getenv('SYNAPSE_USERNAME') password = password or os.getenv('SYNAPSE_PASSWORD') if not username: username = input('Synapse username: '******'Synapse password: '******'Logging into Synapse as: {0}'.format(username)) try: # Disable the synapseclient progress output. syn.core.utils.printTransferProgress = lambda *a, **k: None cls._synapse_client = syn.Synapse(skip_checks=True) cls._synapse_client.login(username, password, silent=True) except Exception as ex: cls._synapse_client = None logging.error('Synapse login failed: {0}'.format(str(ex))) return cls._synapse_client is not None
def synapse_login(synapse_config=synapseclient.client.CONFIG_FILE): """Login to Synapse Args: synapse_config: Path to synapse configuration file. Defaults to ~/.synapseConfig Returns: Synapse connection """ try: syn = synapseclient.Synapse(configPath=synapse_config) syn.login(silent=True) except (SynapseNoCredentialsError, SynapseAuthenticationError): raise ValueError( "Login error: please make sure you have correctly " "configured your client. Instructions here: " "https://help.synapse.org/docs/Client-Configuration.1985446156.html. " "You can also create a Synapse Personal Access Token and set it " "as an environmental variable: " "SYNAPSE_AUTH_TOKEN='<my_personal_access_token>'") return syn
def __init__( self, main_confs, user, push_config, ): """Initialize and validate basic information for a Push.""" log.debug("Initializing Push obj.") self.main_confs = main_confs self.user = self._process_user(user=user, users=self.main_confs.USERS) self.push_id = None self.push_time = None self.push_config_path = push_config self.push_config = self._process_push_config(push_config=push_config) log.info("Initializing Synapse client.") self.syn = synapse.Synapse() self.dag = None log.info("Creating interaction instances.") self._create_interactions()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-u", "--username", dest = "username", \ help="User name", required=True) parser.add_argument("-p", "--password", dest = "password", help="Password",\ required=True) parser.add_argument("-c", "--cancer", nargs = "*", type = str, \ dest = "cancers", help="Cancer Types", \ required=True) args = parser.parse_args() for i, cancer in enumerate(args.cancers): args.cancers[i] = cancer.strip().upper() # use the synapse client to download relevant files syn = synapseclient.Synapse() syn.login(args.username, args.password) combinedbed = syn.get('syn13251251') mutationsextended = syn.get('syn13251247') clinicalpatient = syn.get('syn13251229') # parse all files and extract relevant information panels = make_panel_dictionary(combinedbed.path) # panel to gene tumors = find_tumor_ids(clinicalpatient.path, args.cancers) sample_data = pull_sample_mutations(mutationsextended.path, tumors) mutations = make_mutations_list(sample_data) # find all mutations panel_to_muts = make_panel_to_muts(panels, mutations) # panel to mutation tumor_mutations = make_tumor_mutations(sample_data, mutations, \ panel_to_muts, tumors) # parse dict containing all mutations for each tumor, output to tsv outname = "complete_mutations_table_%s.txt" % "_".join(args.cancers) with open(outname, "w") as output: output.write("\t".join(["Sample"] + mutations) + "\n") for i in tumor_mutations: output.write("\t".join([i] + list(map(str, tumor_mutations[i]))) + "\n")
def test_login(): if not other_user['username']: raise SkipTest( "Skipping test for login command: No [test-authentication] in %s" % client.CONFIG_FILE) with patch("synapseclient.client.Synapse._writeSessionCache" ) as write_session_cache_mock: alt_syn = synapseclient.Synapse() output = run('synapse', '--skip-checks', 'login', '-u', other_user['username'], '-p', other_user['password'], '--rememberMe', syn=alt_syn) cached_sessions = write_session_cache_mock.call_args[0][0] assert cached_sessions["<mostRecent>"] == other_user['username'] assert other_user['username'] in cached_sessions assert alt_syn.username == other_user['username'] assert alt_syn.apiKey is not None
def __init__( self, storageFileview: str, syn: synapseclient = None, ) -> None: """Instantiates a SynapseStorage object Args: syn: synapse client; if not provided instantiate one storageFileview: synapse ID of fileview containing administrative storage metadata; TODO: move away from specific project setup and work with an interface that Synapse specifies (e.g. based on schemas) """ if not syn: self.syn = synapseclient.Synapse() self.syn.login() else: self.syn = syn self.storageFileview = storageFileview # get data in administrative fileview for this pipeline self.setStorageFileviewTable()
def test_login(): try: config = ConfigParser.ConfigParser() config.read(client.CONFIG_FILE) other_user = {} other_user['username'] = config.get('test-authentication', 'username') other_user['password'] = config.get('test-authentication', 'password') with patch("synapseclient.client.Synapse._writeSessionCache") as write_session_cache_mock: alt_syn = synapseclient.Synapse() output = run('synapse', '--skip-checks', 'login', '-u', other_user['username'], '-p', other_user['password'], '--rememberMe', syn=alt_syn) cached_sessions = write_session_cache_mock.call_args[0][0] assert cached_sessions["<mostRecent>"] == other_user['username'] assert other_user['username'] in cached_sessions assert alt_syn.username == other_user['username'] assert alt_syn.apiKey is not None except ConfigParser.Error: print("Skipping test for login command: No [test-authentication] in %s" % client.CONFIG_FILE)
def main(): parser = argparse.ArgumentParser( "Plot how well expression signatures separate isolated mutation " "subtypes from non-mutated samples relative to how they separate " "mutated samples not belonging to the subtype.") parser.add_argument('cohort', help='a TCGA cohort') parser.add_argument('gene', help='a mutated gene') parser.add_argument('classif', help='a mutation classifier') parser.add_argument('mut_levels', default='Form_base__Exon', help='a set of mutation annotation levels') parser.add_argument('--samp_cutoff', type=int, default=20) args = parser.parse_args() os.makedirs(plot_dir, exist_ok=True) # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = syn_root syn.login() cdata = MutationCohort(cohort=args.cohort, mut_genes=[args.gene], mut_levels=args.mut_levels.split('__'), expr_source='Firehose', expr_dir=firehose_dir, syn=syn, cv_prop=1.0) infer_df = load_infer_output( os.path.join(base_dir, 'output', args.cohort, args.gene, args.classif, 'samps_{}'.format(args.samp_cutoff), args.mut_levels)) auc_vals, sep_vals, prop_vals = get_separation(infer_df, args, cdata) plot_separation(auc_vals, sep_vals, prop_vals, args, cdata)
def main(): parser = argparse.ArgumentParser() parser.add_argument('cohort', type=str, help='a cohort in TCGA') args = parser.parse_args() os.makedirs(plot_dir, exist_ok=True) # logs into Synapse using locally-stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() cdata = MutFreqCohort(cohort=args.cohort, expr_source='Firehose', expr_dir=firehose_dir, cv_prop=1.0, syn=syn) mut_trans = [('PCA', OmicPCA()), ('t-SNE', OmicTSNE()), ('UMAP', OmicUMAP())] trans_dict = [(trs_lbl, trs.fit_transform_coh(cdata)) for trs_lbl, trs in mut_trans] plot_freq_clustering(trans_dict, args, cdata)
def main(): import argparse import sys parser = argparse.ArgumentParser() parser.add_argument("--configPath", type=str, default=synapseclient.client.CONFIG_FILE) parser.add_argument("--filename", type=str, default=None) parser.add_argument('--dryrun', action="store_true", default=False, help="Run without making changes [default: %(default)s]") parser.add_argument('--parentId', type=str) parser.add_argument('--storetable', action="store_true", default=False) parser.add_argument('--projectId', type=str) args = parser.parse_args() syn=synapseclient.Synapse(skip_checks=True, configPath=args.configPath) syn.login(silent=True) annots = PCBCAnnotations.PCBCAllAnnotations(syn) a = PCBCAnnotations.PCBCAllAnnotationTableUpdate(syn, annots) my_synapse_id = None # if not args.dryrun: # my_synapse_id = synapseHelpers.thisCodeInSynapse(parentId='syn2758110', syn=syn) a.update_annots_synapse(filename=args.filename, parentId=args.parentId, executed=my_synapse_id, dryrun=args.dryrun) if args.storetable: tbl = a.update_annots_table_synapse(projectId=args.projectId, dryrun=args.dryrun)
def main(argv): # use your own Synapse cache and credentials here syn = synapseclient.Synapse() syn.cache.cache_root_dir = ("/home/exacloud/lustre1/CompBio/" "mgrzad/input-data/synapse") syn.login() # loads the challenge data cdata = TransferDreamCohort(syn, argv[0], intx_types=[argv[1]], cv_seed=(int(argv[2]) * 41) + 1, cv_prop=0.8) # initializes the model and fits it using all of the genes in the # `inter`section of the RNA genes, CNA genes, and proteome genes clf = mpt.StanDefault(argv[1]) # finds the best combination of model hyper-parameters, uses these # parameters to fit to the data #clf.tune_coh(cdata, pheno='inter', # tune_splits=4, test_count=4, parallel_jobs=16) clf.fit_coh(cdata, pheno='inter') out_file = os.path.join(base_dir, 'output', 'intx', argv[0], 'results', 'out_{}_{}.p'.format(argv[1], argv[2])) # saves the classifier performance, and the fitted posterior means of the # model variables and their names to file pickle.dump( { 'Eval': clf.eval_coh(cdata, pheno='inter'), 'PostMeans': clf.named_steps['fit'].post_means, 'VarNames': clf.named_steps['fit'].var_names }, open(out_file, 'wb'))
def download_data_synapse(list_datasets): """Download data from Synapse """ # Connect to Synapse server print("Connecting to Synapse database...\n") syn = synapseclient.Synapse() syn.login('Machine_learning_project_70', 'Group_70') # Obtain a pointer and download the data print("--- Downloading datasets ---") for cancer_type in list_datasets: print(cancer_type) print("\tDataset ID:\t" + list_datasets[cancer_type][0]) list_datasets[cancer_type][0] = syn.get( entity=list_datasets[cancer_type][0]) print("\tLocal path:\t" + str(list_datasets[cancer_type][0].path)) print("\tAnnotation ID:\t" + list_datasets[cancer_type][1]) list_datasets[cancer_type][1] = syn.get( entity=list_datasets[cancer_type][1]) print("\tLocal path:\t" + str(list_datasets[cancer_type][1].path)) print("--- DONE ---\n") return list_datasets
def run_synapse(docstore, parent, workdir): doc = FileDocStore(file_path=docstore) syn = synapseclient.Synapse() syn.login() for id, entry in doc.filter(visible=True, data_type='galaxy.datatypes.tabular.Vcf'): if 'tags' in entry: sample = None for s in entry['tags']: tmp = s.split(":") if tmp[0] == 'sample': sample = tmp[1] name = entry['name'] name = re.sub(r'.vcf$', '', name) file_name = sample + "." + name + ".snv_mnv.vcf" target = Target(uuid=entry['uuid']) if doc.size(target) > 0: src_file = doc.get_filename(target) dst_file = os.path.join(workdir, file_name) query = "select * from entity where parentId=='%s' and name=='%s'" % (parent, file_name + ".gz") r = syn.query(query)['results'] if len(r) == 0: #print r print dst_file shutil.copy(src_file, dst_file) subprocess.check_call("bgzip %s" % (dst_file), shell=True) f = synapseclient.File(dst_file + ".gz", parentId = parent, name=file_name + ".gz" ) f.fileType = 'vcf' f.pipeline = 'UCSC' f.variant_type = "snv" f = syn.store(f, executed="https://github.com/ucsccancer/pcawg_tools" ) else: print "Skipping", file_name
def genie_fusion_counts(synapse_credentials_file, synapse_genie_release_id, filename=None): # synapse client syn = synapseclient.Synapse() # login, credential file: {"email": "", "password": ""} syn.login(**json.loads(open(synapse_credentials_file, 'r').read())) # make dict of entities in genie release, keyed by 'name' # provide syn id of folder of the release to be examined genie_syn_entities = {entity['name']: entity for entity in syn.getChildren(synapse_genie_release_id)} # genie_syn_entities.keys() # get fusion data fusions_df = get_data(syn, genie_syn_entities['data_fusions.txt']) # get sample data sample_df = get_data(syn, genie_syn_entities['data_clinical_sample.txt']) # seq assay id info assay_df = get_data(syn, genie_syn_entities['assay_information.txt']) # all types of alterations # alt_types = np.unique([_ for l in seq_df['alteration_types'].str.split(';').values for _ in l]) alt_type = 'structural_variants' # list of seq assay types with reported fusion data fusion_samples = pd.merge(fusions_df['Tumor_Sample_Barcode'].drop_duplicates(), sample_df[['SAMPLE_ID', 'SEQ_ASSAY_ID']], left_on='Tumor_Sample_Barcode', right_on='SAMPLE_ID', how='left') fusion_samples = fusion_samples['SEQ_ASSAY_ID'].value_counts().reset_index() fusion_samples.rename(columns={'index': 'SEQ_ASSAY_ID', 'SEQ_ASSAY_ID': 'Samples with fusion calls'}, inplace=True) # add flag as to whether SEQ_ASSAY_ID is described as covering the alt_type fusion_samples['Alteration type includes "structural_variants"'] = fusion_samples['SEQ_ASSAY_ID'].isin(assay_df['SEQ_ASSAY_ID'][assay_df['alteration_types'].str.contains(alt_type)]) # write to file if filename is not None: fusion_samples.to_excel(filename, index=False)
import os import synapseclient from synapseclient import File syn = synapseclient.Synapse(skip_checks=True) syn.login(silent=True) consortium = 'AMP-AD' study = 'MSBB' center = 'MSSM' disease = 'Alzheimers Disease' fileType = 'genomicMatrix' organism = 'human' toMove = { 'syn3157412': { 'parentId': 'syn3157740', #'traits_for_RNA-seq_age_censored.tsv' 'dataType': 'metaData', 'tissueType': ['Frontal Pole', 'Superior Temporal Gyrus', 'Parahippocampal Gyrus'], 'tissueTypeAbrv': ['FP', 'STG', 'PHG'], 'platform': '', 'fileType': 'genomicMatrix', 'name': 'AMP-AD_MSBB_MSSM_metaData_mRNA_IlluminaHiSeq2500_age_censored.tsv' }, 'syn3157409': { 'parentId': 'syn3157740', #'traits_for_array_data_age_censored.tsv'
#!/usr/bin/env python import synapseclient import argparse parser = argparse.ArgumentParser() parser.add_argument("-c", "--synapse_config", required=True) parser.add_argument("-u", "--userid", required=True) parser.add_argument("-s", "--subject", required=True) parser.add_argument("-b", "--body", required=True) parser.add_argument("-t", "--content_type", default="text") args = parser.parse_args() syn = synapseclient.Synapse(configPath=args.synapse_config) syn.login() syn.sendMessage(userIds=[args.userid], messageSubject=args.subject, messageBody=args.body, contentType=args.content_type)
def main(): if conf.CHALLENGE_SYN_ID == "": sys.stderr.write("Please configure your challenge. See sample_challenge.py for an example.") global syn parser = argparse.ArgumentParser() parser.add_argument("-u", "--user", help="UserName", default=None) parser.add_argument("-p", "--password", help="Password", default=None) parser.add_argument("--notifications", help="Send error notifications to challenge admins", action="store_true", default=False) parser.add_argument("--send-messages", help="Send validation and scoring messages to participants", action="store_true", default=False) parser.add_argument("--acknowledge-receipt", help="Send confirmation message on passing validation to participants", action="store_true", default=False) parser.add_argument("--dry-run", help="Perform the requested command without updating anything in Synapse", action="store_true", default=False) parser.add_argument("--debug", help="Show verbose error output from Synapse API calls", action="store_true", default=False) parser.add_argument("--threads", help="Number of parallel processes to use for validation and scoring", type=int, default=1) subparsers = parser.add_subparsers(title="subcommand") parser_list = subparsers.add_parser('list', help="List submissions to an evaluation or list evaluations") parser_list.add_argument("evaluation", metavar="EVALUATION-ID", nargs='?', default=None) parser_list.add_argument("--challenge-project", "--challenge", "--project", metavar="SYNAPSE-ID", default=None) parser_list.add_argument("-s", "--status", default=None) parser_list.add_argument("--all", action="store_true", default=False) parser_list.set_defaults(func=command_list) parser_status = subparsers.add_parser('status', help="Check the status of a submission") parser_status.add_argument("submission") parser_status.set_defaults(func=command_check_status) parser_reset = subparsers.add_parser('reset', help="Reset a submission to RECEIVED for re-scoring (or set to some other status)") parser_reset.add_argument("submission", metavar="SUBMISSION-ID", type=int, nargs='*', help="One or more submission IDs, or omit if using --rescore-all") parser_reset.add_argument("-s", "--status", default='RECEIVED') parser_reset.add_argument("--rescore-all", action="store_true", default=False) parser_reset.add_argument("--rescore", metavar="EVALUATION-ID", type=int, nargs='*', help="One or more evaluation IDs to rescore") parser_reset.set_defaults(func=command_reset) parser_validate = subparsers.add_parser('validate', help="Validate all RECEIVED submissions to an evaluation") parser_validate.add_argument("evaluation", metavar="EVALUATION-ID", nargs='?', default=None) parser_validate.add_argument("--all", action="store_true", default=False) parser_validate.add_argument("--canCancel", action="store_true", default=False) parser_validate.set_defaults(func=command_validate) parser_score = subparsers.add_parser('score', help="Score all VALIDATED submissions to an evaluation") parser_score.add_argument("evaluation", metavar="EVALUATION-ID", nargs='?', default=None) parser_score.add_argument("--all", action="store_true", default=False) parser_score.add_argument("--canCancel", action="store_true", default=False) parser_score.set_defaults(func=command_score) parser_rank = subparsers.add_parser('rank', help="Rank all SCORED submissions to an evaluation") parser_rank.add_argument("evaluation", metavar="EVALUATION-ID", default=None) parser_rank.set_defaults(func=command_rank) parser_archive = subparsers.add_parser('archive', help="Archive submissions to a challenge") parser_archive.add_argument("evaluation", metavar="EVALUATION-ID", default=None) parser_archive.add_argument("archiveType",metavar="TYPE", choices=["submission","writeup"]) parser_archive.add_argument("destination", metavar="FOLDER-ID", default=None) parser_archive.add_argument("-q", "--query", default=None) parser_archive.add_argument("-n", "--name", default=None) parser_archive.set_defaults(func=command_archive) parser_leaderboard = subparsers.add_parser('leaderboard', help="Print the leaderboard for an evaluation") parser_leaderboard.add_argument("evaluation", metavar="EVALUATION-ID", default=None) parser_leaderboard.add_argument("--out", default=None) parser_leaderboard.set_defaults(func=command_leaderboard) args = parser.parse_args() print "\n" * 2, "=" * 75 print datetime.utcnow().isoformat() ## Acquire lock, don't run two scoring scripts at once try: update_lock = lock.acquire_lock_or_fail('challenge', max_age=timedelta(hours=4)) except lock.LockedException: print u"Is the scoring script already running? Can't acquire lock." # can't acquire lock, so return error code 75 which is a # temporary error according to /usr/include/sysexits.h return 75 try: syn = synapseclient.Synapse(debug=args.debug) if not args.user: args.user = os.environ.get('SYNAPSE_USER', None) if not args.password: args.password = os.environ.get('SYNAPSE_PASSWORD', None) syn.login(email=args.user, password=args.password) ## initialize messages messages.syn = syn messages.dry_run = args.dry_run messages.send_messages = args.send_messages messages.send_notifications = args.notifications messages.acknowledge_receipt = args.acknowledge_receipt args.func(args) except Exception as ex1: sys.stderr.write('Error in scoring script:\n') st = StringIO() traceback.print_exc(file=st) sys.stderr.write(st.getvalue()) sys.stderr.write('\n') if conf.ADMIN_USER_IDS: messages.error_notification(userIds=conf.ADMIN_USER_IDS, message=st.getvalue(), queue_name=conf.CHALLENGE_NAME) finally: update_lock.release() print "\ndone: ", datetime.utcnow().isoformat() print "=" * 75, "\n" * 2
def main(): parser = argparse.ArgumentParser() parser.add_argument('cohort', type=str, help='a TCGA cohort') parser.add_argument('gene', type=str, help='a mutated gene') parser.add_argument('classif', type=str, help='a mutated gene') parser.add_argument( 'toil_dir', type=str, help='the directory where toil expression data is saved') parser.add_argument('syn_root', type=str, help='Synapse cache root directory') parser.add_argument( 'patient_dir', type=str, help='directy where SMMART patient RNAseq abundances are stored') parser.add_argument( '--tune_splits', type=int, default=4, help='how many training cohort splits to use for tuning') parser.add_argument( '--test_count', type=int, default=16, help='how many hyper-parameter values to test in each tuning split') parser.add_argument( '--infer_splits', type=int, default=20, help='how many cohort splits to use for inference bootstrapping') parser.add_argument( '--infer_folds', type=int, default=4, help=('how many parts to split the cohort into in each inference ' 'cross-validation run')) parser.add_argument( '--parallel_jobs', type=int, default=4, help='how many parallel CPUs to allocate the tuning tests across') parser.add_argument('--cv_id', type=int, default=0) parser.add_argument('--verbose', '-v', action='store_true', help='turns on diagnostic messages') args = parser.parse_args() out_dir = os.path.join(base_dir, 'output', 'gene_models', args.cohort, args.gene) os.makedirs(out_dir, exist_ok=True) out_file = os.path.join(out_dir, '{}__cv-{}.p'.format(args.classif, args.cv_id)) if args.classif[:6] == 'Stan__': use_module = import_module('HetMan.experiments.utilities' '.stan_models.{}'.format( args.classif.split('Stan__')[1])) mut_clf = getattr(use_module, 'UsePipe') else: mut_clf = eval(args.classif) base_mtype = MuType({('Gene', args.gene): None}) clf = mut_clf() # log into Synapse using locally stored credentials syn = synapseclient.Synapse() syn.cache.cache_root_dir = args.syn_root syn.login() cdata = CancerCohort(cancer=args.cohort, mut_genes=[args.gene], mut_levels=['Gene'], tcga_dir=args.toil_dir, patient_dir=args.patient_dir, syn=syn, collapse_txs=True, cv_seed=(args.cv_id * 59) + 121, cv_prop=1.0) smrt_samps = {samp for samp in cdata.samples if samp[:4] != 'TCGA'} clf.tune_coh(cdata, base_mtype, exclude_genes={args.gene}, exclude_samps=smrt_samps, tune_splits=args.tune_splits, test_count=args.test_count, parallel_jobs=args.parallel_jobs) clf_params = clf.get_params() tuned_params = {par: clf_params[par] for par, _ in mut_clf.tune_priors} infer_mat = clf.infer_coh(cdata, base_mtype, force_test_samps=smrt_samps, exclude_genes={args.gene}, infer_splits=args.infer_splits, infer_folds=args.infer_folds) pickle.dump( { 'Infer': infer_mat, 'Info': { 'TunePriors': mut_clf.tune_priors, 'TuneSplits': args.tune_splits, 'TestCount': args.test_count, 'TunedParams': tuned_params } }, open(out_file, 'wb'))