Beispiel #1
0
def storeFile(syn,
              fileName,
              parentId,
              center,
              fileFormat,
              dataSubType,
              platform=None,
              cBioFileFormat=None,
              used=None):
    logger.info("STORING FILES")
    fileEnt = File(fileName, parent=parentId)
    fileEnt.center = center
    fileEnt.species = "Human"
    fileEnt.consortium = 'GENIE'
    fileEnt.dataType = "genomicVariants"
    fileEnt.fundingAgency = "AACR"
    fileEnt.assay = 'targetGeneSeq'
    fileEnt.fileFormat = fileFormat
    fileEnt.dataSubType = dataSubType
    fileEnt.fileStage = "staging"
    fileEnt.platform = platform
    if platform is not None:
        fileEnt.platform = platform
    if cBioFileFormat is not None:
        fileEnt.cBioFileFormat = cBioFileFormat
    ent = syn.store(fileEnt, used=used)
    return (ent)
def uploadToSynapse(f):
    """Given a filepath extracts metadata and uploads to Synapse"""
    center, sample_id, workflow_name, date, call_type, dataType, fileType = ['']*7
    url = URLBASE+f
    if   'OICR_BL' in f: center = 'oicr_bl'
    elif 'CRG/clindel/somatic' in f:  center = 'crg_clindel'
    else: center = f.split('/')[4]
    filename =  f.split('/')[-1]
    if center in ('yale', 'wustl', 'LOHcomplete'):
        if filename =='bd829214-f230-4331-b234-def10bbe7938CNV.vcf.gz':
            sample_id, dataType, fileType='bd829214-f230-4331-b234-def10bbe7938', 'cnv', 'vcf'
        else:
            sample_id, dataType = filename.lower().split('.')[:2]
            fileType =  [i for i in filename.split('.')[2:] if i != 'gz'][-1]
    elif center in ('broad', 'BSC', 'oicr_sga', 'mda_kchen', 'MDA_HGSC', 'mcgill_popsv', 'sfu', 'UCSC', 'oicr_bl', 'Synteka_pgm21', 'crg_clindel'):
        sample_id, workflow_name, date, call_type, dataType  =  filename.replace('indels', 'indel', split('.')[:5])
        fileType =  [i for i in filename.split('.')[5:] if i != 'gz'][-1]
    else:
        print 'Not uploading:', f
        return 
    print center, workflow_name, date, call_type, dataType, fileType
    file = File(url, parentId=DIRS[center], synapseStore=False)
    file.center = center.lower()
    file.sample_id = sample_id
    file.workflow_name = workflow_name
    file.date = date
    file.call_type = call_type
    file.dataType = 'DNA'
    file.disease = 'Cancer'
    file.dataSubType = dataType
    file.fileType = fileType
    #file.analysis_id_tumor = ?????
    syn.store(file, forceVersion=False)
Beispiel #3
0
def storeFile(fileName, stagingID, used, center, annotations, meta=False):
	print("STORING FILES")
	fileEnt = File(fileName, parent = stagingID)
	fileEnt.center = center
	fileEnt.dataSubType = annotations.get("dataSubType",'')
	fileEnt.dataType = annotations.get("dataType",'')
	fileEnt.disease = 'cancer'
	fileEnt.fileType = annotations.get("fileType",'')
	fileEnt.organism = 'H**o Sapiens'
	fileEnt.platform = annotations.get("platform",'')
	fileEnt.tissueSource = annotations.get("tissueSource",'')
	fileEnt.consortium = 'GENIE'
	if meta:
		fileEnt.fileType = "txt"
		fileEnt.dataType = "meta"
	fileEnt.fileStage = "staging"
	ent = syn.store(fileEnt,annotations = used)
	return(ent)
Beispiel #4
0
PLATFORM_MAP = {'133AB': 'AffymetrixU133AB', 
                'Plus2': 'AffymetrixU133Plus2'}
	
query = 'select id, name from entity where parentId=="%s"' %OLDPARENTID
df = synapseHelpers.query2df(syn.chunkedQuery(query))
for i in range(1,df.shape[0]):
    row =  df.ix[i, :]
    ent = syn.get(row.id)
    fStudy, fTissue, fPlatform, fDatatype,  fRest = ent.name.split('_')
    name = 'AMP-AD_MSBB_MSSM_%s_%s_%s' % (PLATFORM_MAP[fPlatform],   
                                          TISSUEABRMAP[fTissue][0], fRest)
    print name
    os.rename(ent.path, name)

    f = File(name, parentId=NEWPARENTID, name=name[7:])
    f.consortium = 'AMP-AD'
    f.study = 'MSBB'
    f.center = 'MSSM'
    f.dataType =  'mRNA'
    f.disease = 'Alzheimers Disease'
    f.platfrom = PLATFORM_MAP[fPlatform]
    f.tissueTypeAbrv = TISSUEABRMAP[fTissue][1]
    f.tissueType = TISSUEABRMAP[fTissue][0]
    f.dataSubType = 'geneExp'
    f.fileType =  'genomicMatrix'
    f.organism =  'human'
    f = syn.store(f, used = [ent], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/4d7d6b78b1e73058483354a1a18bff7422966a4b/Mount-Sinai/migrateMSBBExpression.py'], activityName='Data migration')
    

    ent = syn.get(row.id)

    # fStudy, fTissue, fPlatform, fDatatype, fProcess,  fRest = ent.name.split('_')
    # name = 'AMP-AD_MSBB_MSSM_%s_%s_%s' % (PLATFORM_MAP[fPlatform],   
    #                                       TISSUEABRMAP[fTissue][0], fRest
    fStudy, fTissue, fPlatform, fDatatype,  fProcess, fRest = ent.name.split('_')
    name = 'AMP-AD_MSBB_MSSM_%s_%s_%s_%s' % (PLATFORM_MAP[fPlatform],   
                                             TISSUEABRMAP[fTissue][0], 
                                             fProcess, 
                                             fRest)
    print name
    used =  syn.query('select id from file where parentId=="%s" and tissueType=="%s" ' %(EXPRFILE_PARENTID, TISSUEABRMAP[fTissue][0]))['results'][0].values()[0]
    os.rename(ent.path, name)

    f = File(name, parentId=NEWPARENTID, name=name[7:])
    f.consortium = 'AMP-AD'
    f.study = 'MSBB'
    f.center = 'MSSM'
    f.dataType =  'mRNA'
    f.disease = 'Alzheimers Disease'
    f.platfrom = PLATFORM_MAP[fPlatform]
    f.tissueTypeAbrv = TISSUEABRMAP[fTissue][1]
    f.tissueType = TISSUEABRMAP[fTissue][0]
    f.dataSubType = 'CoExpression'
    f.fileType =  'genomicMatrix'
    f.organism =  'human'
    f = syn.store(f, used = [used], executed=['syn2731322'], 
                  activityName='Weighted coexpression network modules using Coexpp v 0.1.0')
    

    # name = 'AMP-AD_MSBB_MSSM_%s_%s_%s' % (PLATFORM_MAP[fPlatform],
    #                                       TISSUEABRMAP[fTissue][0], fRest
    fStudy, fTissue, fPlatform, fDatatype, fProcess, fRest = ent.name.split(
        '_')
    name = 'AMP-AD_MSBB_MSSM_%s_%s_%s_%s' % (
        PLATFORM_MAP[fPlatform], TISSUEABRMAP[fTissue][0], fProcess, fRest)
    print name
    used = syn.query(
        'select id from file where parentId=="%s" and tissueType=="%s" ' %
        (EXPRFILE_PARENTID,
         TISSUEABRMAP[fTissue][0]))['results'][0].values()[0]
    os.rename(ent.path, name)

    f = File(name, parentId=NEWPARENTID, name=name[7:])
    f.consortium = 'AMP-AD'
    f.study = 'MSBB'
    f.center = 'MSSM'
    f.dataType = 'mRNA'
    f.disease = 'Alzheimers Disease'
    f.platfrom = PLATFORM_MAP[fPlatform]
    f.tissueTypeAbrv = TISSUEABRMAP[fTissue][1]
    f.tissueType = TISSUEABRMAP[fTissue][0]
    f.dataSubType = 'CoExpression'
    f.fileType = 'genomicMatrix'
    f.organism = 'human'
    f = syn.store(f,
                  used=[used],
                  executed=['syn2731322'],
                  activityName=
                  'Weighted coexpression network modules using Coexpp v 0.1.0')