def storeFile(syn, fileName, parentId, center, fileFormat, dataSubType, platform=None, cBioFileFormat=None, used=None): logger.info("STORING FILES") fileEnt = File(fileName, parent=parentId) fileEnt.center = center fileEnt.species = "Human" fileEnt.consortium = 'GENIE' fileEnt.dataType = "genomicVariants" fileEnt.fundingAgency = "AACR" fileEnt.assay = 'targetGeneSeq' fileEnt.fileFormat = fileFormat fileEnt.dataSubType = dataSubType fileEnt.fileStage = "staging" fileEnt.platform = platform if platform is not None: fileEnt.platform = platform if cBioFileFormat is not None: fileEnt.cBioFileFormat = cBioFileFormat ent = syn.store(fileEnt, used=used) return (ent)
def uploadToSynapse(f): """Given a filepath extracts metadata and uploads to Synapse""" center, sample_id, workflow_name, date, call_type, dataType, fileType = ['']*7 url = URLBASE+f if 'OICR_BL' in f: center = 'oicr_bl' elif 'CRG/clindel/somatic' in f: center = 'crg_clindel' else: center = f.split('/')[4] filename = f.split('/')[-1] if center in ('yale', 'wustl', 'LOHcomplete'): if filename =='bd829214-f230-4331-b234-def10bbe7938CNV.vcf.gz': sample_id, dataType, fileType='bd829214-f230-4331-b234-def10bbe7938', 'cnv', 'vcf' else: sample_id, dataType = filename.lower().split('.')[:2] fileType = [i for i in filename.split('.')[2:] if i != 'gz'][-1] elif center in ('broad', 'BSC', 'oicr_sga', 'mda_kchen', 'MDA_HGSC', 'mcgill_popsv', 'sfu', 'UCSC', 'oicr_bl', 'Synteka_pgm21', 'crg_clindel'): sample_id, workflow_name, date, call_type, dataType = filename.replace('indels', 'indel', split('.')[:5]) fileType = [i for i in filename.split('.')[5:] if i != 'gz'][-1] else: print 'Not uploading:', f return print center, workflow_name, date, call_type, dataType, fileType file = File(url, parentId=DIRS[center], synapseStore=False) file.center = center.lower() file.sample_id = sample_id file.workflow_name = workflow_name file.date = date file.call_type = call_type file.dataType = 'DNA' file.disease = 'Cancer' file.dataSubType = dataType file.fileType = fileType #file.analysis_id_tumor = ????? syn.store(file, forceVersion=False)
def storeFile(fileName, stagingID, used, center, annotations, meta=False): print("STORING FILES") fileEnt = File(fileName, parent = stagingID) fileEnt.center = center fileEnt.dataSubType = annotations.get("dataSubType",'') fileEnt.dataType = annotations.get("dataType",'') fileEnt.disease = 'cancer' fileEnt.fileType = annotations.get("fileType",'') fileEnt.organism = 'H**o Sapiens' fileEnt.platform = annotations.get("platform",'') fileEnt.tissueSource = annotations.get("tissueSource",'') fileEnt.consortium = 'GENIE' if meta: fileEnt.fileType = "txt" fileEnt.dataType = "meta" fileEnt.fileStage = "staging" ent = syn.store(fileEnt,annotations = used) return(ent)
PLATFORM_MAP = {'133AB': 'AffymetrixU133AB', 'Plus2': 'AffymetrixU133Plus2'} query = 'select id, name from entity where parentId=="%s"' %OLDPARENTID df = synapseHelpers.query2df(syn.chunkedQuery(query)) for i in range(1,df.shape[0]): row = df.ix[i, :] ent = syn.get(row.id) fStudy, fTissue, fPlatform, fDatatype, fRest = ent.name.split('_') name = 'AMP-AD_MSBB_MSSM_%s_%s_%s' % (PLATFORM_MAP[fPlatform], TISSUEABRMAP[fTissue][0], fRest) print name os.rename(ent.path, name) f = File(name, parentId=NEWPARENTID, name=name[7:]) f.consortium = 'AMP-AD' f.study = 'MSBB' f.center = 'MSSM' f.dataType = 'mRNA' f.disease = 'Alzheimers Disease' f.platfrom = PLATFORM_MAP[fPlatform] f.tissueTypeAbrv = TISSUEABRMAP[fTissue][1] f.tissueType = TISSUEABRMAP[fTissue][0] f.dataSubType = 'geneExp' f.fileType = 'genomicMatrix' f.organism = 'human' f = syn.store(f, used = [ent], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/4d7d6b78b1e73058483354a1a18bff7422966a4b/Mount-Sinai/migrateMSBBExpression.py'], activityName='Data migration')