def uploadToSynapse(f): """Given a filepath extracts metadata and uploads to Synapse""" center, sample_id, workflow_name, date, call_type, dataType, fileType = ['']*7 url = URLBASE+f if 'OICR_BL' in f: center = 'oicr_bl' elif 'CRG/clindel/somatic' in f: center = 'crg_clindel' else: center = f.split('/')[4] filename = f.split('/')[-1] if center in ('yale', 'wustl', 'LOHcomplete'): if filename =='bd829214-f230-4331-b234-def10bbe7938CNV.vcf.gz': sample_id, dataType, fileType='bd829214-f230-4331-b234-def10bbe7938', 'cnv', 'vcf' else: sample_id, dataType = filename.lower().split('.')[:2] fileType = [i for i in filename.split('.')[2:] if i != 'gz'][-1] elif center in ('broad', 'BSC', 'oicr_sga', 'mda_kchen', 'MDA_HGSC', 'mcgill_popsv', 'sfu', 'UCSC', 'oicr_bl', 'Synteka_pgm21', 'crg_clindel'): sample_id, workflow_name, date, call_type, dataType = filename.replace('indels', 'indel', split('.')[:5]) fileType = [i for i in filename.split('.')[5:] if i != 'gz'][-1] else: print 'Not uploading:', f return print center, workflow_name, date, call_type, dataType, fileType file = File(url, parentId=DIRS[center], synapseStore=False) file.center = center.lower() file.sample_id = sample_id file.workflow_name = workflow_name file.date = date file.call_type = call_type file.dataType = 'DNA' file.disease = 'Cancer' file.dataSubType = dataType file.fileType = fileType #file.analysis_id_tumor = ????? syn.store(file, forceVersion=False)
#Create Provenance log provenance = Activity(name=meta_data['activity'], desciption=meta_data['description'], used = meta_data['used'] exectuted = meta_data['used'] ) #prov = syn.store(prov) name = of.path.basename(input_path) #Add metadata to files to be uploaded f = File(input_path, name = name, parentId=DST_FOLDER) f.dataType = meta_data['dataType'] f.fileType = meta_data['dataType'] f.variant_workflow = meta_data['workflow'] f.variant_workflow_version = meta_data['workflowVersion'] f.call_type = call_type f.reference_build = meta_data['referenceBuild'] f.center_name = meta_data['center_name'] f.file_md5 = synapseclient.utils.md5_for_file(input_path) f.study = 'PCAWG 2.0' f.submitter_donor_id = meta_data['donor_id'] f.alignment_workflow_name='Workflow_Bundle_BWA (UCSC Implementation)' f.alignment_workflow_source_url='https://github.com/kellrott/tcga_realign' f.alignment_workflow_version='2.6.0' #Store metadata and file to Synapse #f = syn.store(f, activity = provenance) #Add Description #wiki = synapseclient.Wiki(TITLE, f, DESCRIPTION)