Beispiel #1
0
def storeFile(syn,
              fileName,
              parentId,
              center,
              fileFormat,
              dataSubType,
              platform=None,
              cBioFileFormat=None,
              used=None):
    logger.info("STORING FILES")
    fileEnt = File(fileName, parent=parentId)
    fileEnt.center = center
    fileEnt.species = "Human"
    fileEnt.consortium = 'GENIE'
    fileEnt.dataType = "genomicVariants"
    fileEnt.fundingAgency = "AACR"
    fileEnt.assay = 'targetGeneSeq'
    fileEnt.fileFormat = fileFormat
    fileEnt.dataSubType = dataSubType
    fileEnt.fileStage = "staging"
    fileEnt.platform = platform
    if platform is not None:
        fileEnt.platform = platform
    if cBioFileFormat is not None:
        fileEnt.cBioFileFormat = cBioFileFormat
    ent = syn.store(fileEnt, used=used)
    return (ent)
Beispiel #2
0
def upload(args,syn):
	if args.dataType == "rnaseq":
		parentId = "syn6034916"
		pipeline = "syn6126122"
		dataType = "RNASeq"
	elif args.dataType == "dnaseq":
		parentId = "syn6034751"
		pipeline = "syn6126123"
		dataType = "TargDNASeq"
	elif args.dataType == "snparray":
		parentId = "syn6038475"
		pipeline = "syn6126121"
		dataType = "SNParray"
	elif args.dataType == "exparray":
		parentId = "syn6038915"
		pipeline = "syn6126120"
		dataType = "expression_microarray"
	elif args.dataType == "exome":
		parentId = "syn6115597"
		dataType = "exome"
		pipeline = ""
	else:
		raise ValueError("dataType needs to be rnaseq/dnaseq/snparray/exparray/exome")
	if args.workflow is not None:
		workflow = syn.get(pipeline,downloadFile=False)
		workflow.path = args.workflow
		workflow.name = os.path.basename(args.workflow)
		workflow = syn.store(workflow)
		pipeline = workflow.id
	fileEnt = File(args.input,parent=parentId)
	#fileEnt.annotations = temp.to_dict('index').values()[0]
	fileEnt.dataType = dataType
	fileEnt.sampleId = sampleId
	fileEnt = syn.store(fileEnt,used = pipeline)
	return(fileEnt.id)
def uploadToSynapse(f):
    """Given a filepath extracts metadata and uploads to Synapse"""
    center, sample_id, workflow_name, date, call_type, dataType, fileType = ['']*7
    url = URLBASE+f
    if   'OICR_BL' in f: center = 'oicr_bl'
    elif 'CRG/clindel/somatic' in f:  center = 'crg_clindel'
    else: center = f.split('/')[4]
    filename =  f.split('/')[-1]
    if center in ('yale', 'wustl', 'LOHcomplete'):
        if filename =='bd829214-f230-4331-b234-def10bbe7938CNV.vcf.gz':
            sample_id, dataType, fileType='bd829214-f230-4331-b234-def10bbe7938', 'cnv', 'vcf'
        else:
            sample_id, dataType = filename.lower().split('.')[:2]
            fileType =  [i for i in filename.split('.')[2:] if i != 'gz'][-1]
    elif center in ('broad', 'BSC', 'oicr_sga', 'mda_kchen', 'MDA_HGSC', 'mcgill_popsv', 'sfu', 'UCSC', 'oicr_bl', 'Synteka_pgm21', 'crg_clindel'):
        sample_id, workflow_name, date, call_type, dataType  =  filename.replace('indels', 'indel', split('.')[:5])
        fileType =  [i for i in filename.split('.')[5:] if i != 'gz'][-1]
    else:
        print 'Not uploading:', f
        return 
    print center, workflow_name, date, call_type, dataType, fileType
    file = File(url, parentId=DIRS[center], synapseStore=False)
    file.center = center.lower()
    file.sample_id = sample_id
    file.workflow_name = workflow_name
    file.date = date
    file.call_type = call_type
    file.dataType = 'DNA'
    file.disease = 'Cancer'
    file.dataSubType = dataType
    file.fileType = fileType
    #file.analysis_id_tumor = ?????
    syn.store(file, forceVersion=False)
Beispiel #4
0
def storeFile(fileName, stagingID, used, center, annotations, meta=False):
	print("STORING FILES")
	fileEnt = File(fileName, parent = stagingID)
	fileEnt.center = center
	fileEnt.dataSubType = annotations.get("dataSubType",'')
	fileEnt.dataType = annotations.get("dataType",'')
	fileEnt.disease = 'cancer'
	fileEnt.fileType = annotations.get("fileType",'')
	fileEnt.organism = 'H**o Sapiens'
	fileEnt.platform = annotations.get("platform",'')
	fileEnt.tissueSource = annotations.get("tissueSource",'')
	fileEnt.consortium = 'GENIE'
	if meta:
		fileEnt.fileType = "txt"
		fileEnt.dataType = "meta"
	fileEnt.fileStage = "staging"
	ent = syn.store(fileEnt,annotations = used)
	return(ent)
Beispiel #5
0
def upload(args, syn):
    if args.dataType == "rnaseq":
        parentId = "syn6034916"
        pipeline = "syn6126122"
        dataType = "RNASeq"
    elif args.dataType == "dnaseq":
        parentId = "syn6034751"
        pipeline = "syn6126123"
        dataType = "TargDNASeq"
    elif args.dataType == "snparray":
        parentId = "syn6038475"
        pipeline = "syn6126121"
        dataType = "SNParray"
    elif args.dataType == "exparray":
        parentId = "syn6038915"
        pipeline = "syn6126120"
        dataType = "expression_microarray"
    elif args.dataType == "exome":
        parentId = "syn6115597"
        dataType = "exome"
        pipeline = ""
    else:
        raise ValueError(
            "dataType needs to be rnaseq/dnaseq/snparray/exparray/exome")
    if args.workflow is not None:
        workflow = syn.get(pipeline, downloadFile=False)
        workflow.path = args.workflow
        workflow.name = os.path.basename(args.workflow)
        workflow = syn.store(workflow)
        pipeline = workflow.id
    fileEnt = File(args.input, parent=parentId)
    #fileEnt.annotations = temp.to_dict('index').values()[0]
    fileEnt.dataType = dataType
    fileEnt.sampleId = sampleId
    fileEnt = syn.store(fileEnt, used=pipeline)
    return (fileEnt.id)
        'tissueType':
        ['Frontal Pole', 'Superior Temporal Gyrus', 'Parahippocampal Gyrus'],
        'tissueTypeAbrv': ['FP', 'STG', 'PHG'],
        'name':
        'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_normalized-sex-race-age-RIN-PMI-batch-site.corrected.csv'
    },
}

for id, v in toMove.items():
    ent = syn.get(id)
    print v['name']
    os.rename(ent.path, v['name'])

    f = File(v['name'], parentId=v['parentId'], name=v['name'][7:-4])
    print f.name
    f.consortium, f.study, f.center, f.disease = consortium, study, center, disease
    f.dataType = v['dataType']
    f.platfrom = v['platform']
    if 'tissueTypeAbrv' in v:
        f.tissueTypeAbrv = v['tissueTypeAbrv']
        f.tissueType = v['tissueType']
    f.fileType = fileType
    f.organism = organism
    f = syn.store(
        f,
        used=[id],
        executed=[
            'https://github.com/Sage-Bionetworks/ampAdScripts/blob/e71bbde262625e6999ea9defd98e10fce8f3c542/Mount-Sinai/migrateMSBBMetaAndRNASeq.py'
        ],
        activityName='Data migration')
Beispiel #7
0
PLATFORM_MAP = {'133AB': 'AffymetrixU133AB', 
                'Plus2': 'AffymetrixU133Plus2'}
	
query = 'select id, name from entity where parentId=="%s"' %OLDPARENTID
df = synapseHelpers.query2df(syn.chunkedQuery(query))
for i in range(1,df.shape[0]):
    row =  df.ix[i, :]
    ent = syn.get(row.id)
    fStudy, fTissue, fPlatform, fDatatype,  fRest = ent.name.split('_')
    name = 'AMP-AD_MSBB_MSSM_%s_%s_%s' % (PLATFORM_MAP[fPlatform],   
                                          TISSUEABRMAP[fTissue][0], fRest)
    print name
    os.rename(ent.path, name)

    f = File(name, parentId=NEWPARENTID, name=name[7:])
    f.consortium = 'AMP-AD'
    f.study = 'MSBB'
    f.center = 'MSSM'
    f.dataType =  'mRNA'
    f.disease = 'Alzheimers Disease'
    f.platfrom = PLATFORM_MAP[fPlatform]
    f.tissueTypeAbrv = TISSUEABRMAP[fTissue][1]
    f.tissueType = TISSUEABRMAP[fTissue][0]
    f.dataSubType = 'geneExp'
    f.fileType =  'genomicMatrix'
    f.organism =  'human'
    f = syn.store(f, used = [ent], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/4d7d6b78b1e73058483354a1a18bff7422966a4b/Mount-Sinai/migrateMSBBExpression.py'], activityName='Data migration')
    

Beispiel #8
0
    meta_data = json.loads(handle.read())

DST_FOLDER = 'syn3079564' #test upload folder

#Create Provenance log
provenance = Activity(name=meta_data['activity'],
                      desciption=meta_data['description'],
                      used = meta_data['used']
                      exectuted = meta_data['used']
                )
#prov = syn.store(prov)

name  = of.path.basename(input_path)
#Add metadata to files to be uploaded
f = File(input_path, name = name, parentId=DST_FOLDER)
f.dataType = meta_data['dataType']
f.fileType = meta_data['dataType']
f.variant_workflow = meta_data['workflow']
f.variant_workflow_version = meta_data['workflowVersion']
f.call_type = call_type
f.reference_build = meta_data['referenceBuild']
f.center_name = meta_data['center_name']
f.file_md5 = synapseclient.utils.md5_for_file(input_path)
f.study = 'PCAWG 2.0'
f.submitter_donor_id = meta_data['donor_id']
f.alignment_workflow_name='Workflow_Bundle_BWA (UCSC Implementation)'
f.alignment_workflow_source_url='https://github.com/kellrott/tcga_realign'
f.alignment_workflow_version='2.6.0'


#Store metadata and file to Synapse
                  'tissueTypeAbrv': ['FP', 'STG', 'PHG'],
                  'name' :'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_rawCounts.tsv'},
    'syn2920161':{'parentId' :'syn3157743',  #'normalized.sex_race_age_RIN_PMI_batch_site.corrected.csv'
                  'dataType': 'mRNA',                  
                  'platform': 'IlluminaHiSeq2500',
                  'tissueType':['Frontal Pole', 'Superior Temporal Gyrus','Parahippocampal Gyrus'],
                  'tissueTypeAbrv': ['FP', 'STG', 'PHG'],
                  'name' :'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_normalized-sex-race-age-RIN-PMI-batch-site.corrected.csv'},
    }


for id, v in toMove.items():
    ent = syn.get(id)
    print v['name']
    os.rename(ent.path, v['name'])

    f = File(v['name'], parentId=v['parentId'], name=v['name'][7:-4])
    print f.name
    f.consortium, f.study, f.center, f.disease = consortium, study, center, disease
    f.dataType =  v['dataType']
    f.platfrom = v['platform']
    if 'tissueTypeAbrv' in v:
        f.tissueTypeAbrv = v['tissueTypeAbrv']
        f.tissueType = v['tissueType']
    f.fileType =  fileType
    f.organism =  organism
    f = syn.store(f, used = [id], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/e71bbde262625e6999ea9defd98e10fce8f3c542/Mount-Sinai/migrateMSBBMetaAndRNASeq.py'], 
                  activityName='Data migration')