def main(): import argparse import sys parser = argparse.ArgumentParser() parser.add_argument( '--dryrun', action="store_true", default=False, help="Run without making changes [default: %(default)s]") parser.add_argument('--storetable', action="store_true", default=False) parser.add_argument('--projectId', type=str) args = parser.parse_args() syn = synapseclient.login(silent=True) annots = PCBCAnnotations.PCBCAllAnnotations(syn) a = PCBCAnnotations.PCBCAllAnnotationTableUpdate(syn, annots) my_synapse_id = None if not args.dryrun: my_synapse_id = synapseHelpers.thisCodeInSynapse(parentId='syn2758110', syn=syn) a.update_annots_synapse(executed=my_synapse_id, dryrun=args.dryrun) if args.storetable: tbl = a.update_annots_table_synapse(projectId=args.projectId, dryrun=args.dryrun)
def main(): import argparse import sys parser = argparse.ArgumentParser() parser.add_argument('--dryrun', action="store_true", default=False, help="Run without making changes [default: %(default)s]") parser.add_argument('--storetable', action="store_true", default=False) parser.add_argument('--projectId', type=str) args = parser.parse_args() syn = synapseclient.login(silent=True) annots = PCBCAnnotations.PCBCAnnotations(syn) a = PCBCAnnotations.PCBCAnnotationTableUpdate(syn, annots) my_synapse_id = None if not args.dryrun: my_synapse_id = synapseHelpers.thisCodeInSynapse(parentId='syn2758110', syn=syn) a.update_annots_synapse(executed=my_synapse_id, dryrun=args.dryrun) if args.storetable: tbl = a.update_annots_table_synapse(projectId=args.projectId, dryrun=args.dryrun)
if isUptodate(name, files, args.parentId): print " is up to date" continue if list(set(filteredMeta.fileType))[0] in ["seg", "bed"]: dfs = mp.map(lambda f: pd.read_csv(f.path, sep="\t"), files) df = pd.concat(dfs, axis=0) df.to_csv(args.filepath + name, sep="\t", index=False) nSamples = len(set(df.Sample)) nFeatures = 0 else: # All other fileTypes dfs = mp.map(lambda f: pd.read_csv(f.path, sep="\t", index_col=0), files) df = pd.concat(dfs, axis=1) df.to_csv(args.filepath + name, sep="\t") nFeatures, nSamples = df.shape print "Created", name, df.shape # Add file to Synapse entity = synapseclient.File(args.filepath + name, parentId=args.parentId) # Set annotations entity.platform = platform entity.dataSubType = dataSubType entity.acronym = "PANCAN" entity.dataProducer = "TCGA" entity.disease = "cancer" entity.center = list(set(filteredMeta.center)) entity.centerTitle = list(set(filteredMeta.centerTitle)) entity.fileType = list(set(filteredMeta.fileType)) entity.platformTitle = list(set(filteredMeta.platformTitle)) entity.nSamples = nSamples entity.nFeatures = nFeatures entity = syn.store(entity, used=files, executed=thisCodeInSynapse(parentId=args.parentId))
if list(set(filteredMeta.fileType))[0] in ['seg', 'bed']: dfs = mp.map(lambda f: pd.read_csv(f.path, sep='\t'), files) df = pd.concat(dfs, axis=0) df.to_csv(args.filepath + name, sep='\t', index=False) nSamples = len(set(df.Sample)) nFeatures = 0 else: #All other fileTypes dfs = mp.map(lambda f: pd.read_csv(f.path, sep='\t', index_col=0), files) df = pd.concat(dfs, axis=1) df.to_csv(args.filepath + name, sep='\t') nFeatures, nSamples = df.shape print 'Created', name, df.shape #Add file to Synapse entity = synapseclient.File(args.filepath + name, parentId=args.parentId) #Set annotations entity.platform = platform entity.dataSubType = dataSubType entity.acronym = 'PANCAN' entity.dataProducer = 'TCGA' entity.disease = 'cancer' entity.center = list(set(filteredMeta.center)) entity.centerTitle = list(set(filteredMeta.centerTitle)) entity.fileType = list(set(filteredMeta.fileType)) entity.platformTitle = list(set(filteredMeta.platformTitle)) entity.nSamples = nSamples entity.nFeatures = nFeatures entity = syn.store(entity, used=files, executed=thisCodeInSynapse(parentId=args.parentId))
to a specific platform.""" old_whitelist = syn.get(WHITELISTID, version=version) whitelist = pd.read_csv(whitelistEntity.path, sep='\t') oldToRemove = set(whitelist.ix[whitelist.Do_not_use & (whitelist.platform==platform), 'aliquot_barcode']) return oldToRemove #mp = Pool(8) syn = synapseclient.login(silent=True) whitelistEntity = syn.get(WHITELISTID) whitelist = pd.read_csv(whitelistEntity.path, sep='\t') inputFiles = synapseHelpers.query2df(syn.chunkedQuery(QUERY_STR)) code=synapseHelpers.thisCodeInSynapse(parentId='syn1774100') for i, row in inputFiles.iterrows(): print row.id, row['name'], inputFileEntity = syn.get(row.id) outFileName = row['name'][:-4]+'_whitelisted'+row['name'][-4:] toRemove = set(whitelist.ix[whitelist.Do_not_use & (whitelist.platform == row['platform']), 'aliquot_barcode']) if isUptodate(outFileName, [inputFileEntity], toRemove, row['platform']): print ' is up to date - but update provenance' e = syn.get(getFileIdFromName(outFileName), downloadFile=False) syn.store(e, used=[inputFileEntity, whitelistEntity], executed=code) continue if row.fileType =='bed5': #Do the filtering for bed files df = pd.read_csv(inputFileEntity.path, sep='\t')