def _manifest_upload(syn, df): items = [] for i, row in df.iterrows(): file = File( path=row['path'], parent=row['parent'], **{key: row[key] for key in FILE_CONSTRUCTOR_FIELDS if key in row}, ) file.annotations = dict( row.drop(FILE_CONSTRUCTOR_FIELDS + STORE_FUNCTION_FIELDS + REQUIRED_FIELDS + PROVENANCE_FIELDS, errors='ignore')) item = _SyncUploadItem( file, row['used'] if 'used' in row else [], row['executed'] if 'executed' in row else [], {key: row[key] for key in STORE_FUNCTION_FIELDS if key in row}, ) items.append(item) with _sync_executor(syn) as executor: uploader = _SyncUploader(syn, executor) uploader.upload(items) return True
def upload(args,syn): if args.dataType == "rnaseq": parentId = "syn6034916" pipeline = "syn6126122" dataType = "RNASeq" elif args.dataType == "dnaseq": parentId = "syn6034751" pipeline = "syn6126123" dataType = "TargDNASeq" elif args.dataType == "snparray": parentId = "syn6038475" pipeline = "syn6126121" dataType = "SNParray" elif args.dataType == "exparray": parentId = "syn6038915" pipeline = "syn6126120" dataType = "expression_microarray" elif args.dataType == "exome": parentId = "syn6115597" dataType = "exome" pipeline = "" else: raise ValueError("dataType needs to be rnaseq/dnaseq/snparray/exparray/exome") if args.workflow is not None: workflow = syn.get(pipeline,downloadFile=False) workflow.path = args.workflow workflow.name = os.path.basename(args.workflow) workflow = syn.store(workflow) pipeline = workflow.id fileEnt = File(args.input,parent=parentId) fileEnt.annotations = temp.to_dict('index').values()[0] fileEnt.dataType = dataType fileEnt.sampleId = sampleId fileEnt = syn.store(fileEnt,used = pipeline) return(fileEnt.id)
def _manifest_upload(syn, df): for i, row in df.iterrows(): #Todo extract known constructor variables kwargs = {key: row[key] for key in FILE_CONSTRUCTOR_FIELDS if key in row } entity = File(row['path'], parent=row['parent'], **kwargs) entity.annotations = dict(row.drop(FILE_CONSTRUCTOR_FIELDS+STORE_FUNCTION_FIELDS+REQUIRED_FIELDS, errors = 'ignore')) #Update provenance list again to replace all file references that were uploaded if 'used' in row: row['used'] = syn._convertProvenanceList(row['used']) if 'executed' in row: row['executed'] = syn._convertProvenanceList(row['executed']) kwargs = {key: row[key] for key in STORE_FUNCTION_FIELDS if key in row} entity = syn.store(entity, **kwargs) return True
def create_entity(syn: Synapse, name: str, link: str, annotations: dict) -> File: """Creates evaluation queue Args: name: Name of queue Returns: a synapseclient.Evaluation """ file_ent = File(name=name, path=link, parentId="syn21897226", synapseStore=False) file_ent.annotations = annotations return syn.store(file_ent)
def upload_fastq(fastq, bid, annot, theFilepath): print 'Uploading {}'.format(fastq) barcode = '_'.join(fastq.split('_')[4:7]) platform = annot['platform'] filename = synapse_filename_template.format(bid=bid, platform=platform, barcode=barcode) fastq_synapse = File(theFilepath, name=filename, parent=FASTQ_FOLDER_SYNAPSE) # Deprecated. Will error out trying to upload_fastq # fastq_synapse.properties.fileNameOverride = filename fastq_synapse.annotations = annot print "Uploading" syn.store(fastq_synapse)
def upload_file(self, file_path, overwrite=False): """Saves file to Synapse""" fp = file_path['file_p'] ap = _get_anno_p(fp) local_md5 = calc_content_md5(fp) parent = self.get_or_create_folder(fp.parent, self.parent_synid) file = File(path=str(fp), parent=parent) annotations = load_json(ap) annotations = { k.replace('[', 'LFTB').replace(']', 'RGTB'): v for k, v in annotations.items() } file.annotations = annotations file_r = self.get_or_create_entity(file, returnid=False) if overwrite: print(local_md5) print(file_r.md5) if local_md5 != file_r.md5: print('mismatch. overwriting.') self.syn.store(file)
# Get parentFolder def getFileFolder(fileName): if fileName.endswith(('.fastq', '.trimmed.fq')): return "raw" if fileName.endswith(('.bam', '.bai', '.unmapped.fq')): return "aligned" if fileName.endswith(('.readsLength.freq', '.txt', '.16_25nt.fq', '.miRanalyzer.zip')) or re.search(r'_vs_', fileName): return "analyzed" return "miRNA" f['fileFolder'] = f['fileName'].apply(lambda x: getFileFolder(x)) # Get the folder synapse Ids with open(args.folderInfo) as info: folderIds = yaml.load(info) # Link files for index, row in f.iterrows(): entityFile = File(parentId=folderIds[row['fileFolder']], name=row['fileName']) entityFile.annotations = dict(assay=row['assay'], fileFormat=row['fileFormat'], cellLine=row['cellLine']) newEntity = storePSON(entityFile, row['filePath'], contentSize=row['fileSize'], md5=row['md5'], syn=syn)
def _copyFile(syn, entity, destinationId, version=None, updateExisting=False, setProvenance="traceback", skipCopyAnnotations=False): """ Copies most recent version of a file to a specified synapse ID. :param entity: A synapse ID of a File entity :param destinationId: Synapse ID of a folder/project that the file wants to be copied to :param version: Can specify version of a file. Default to None :param updateExisting: Can choose to update files that have the same name Default to False :param setProvenance: Has three values to set the provenance of the copied entity: traceback: Sets to the source entity existing: Sets to source entity's original provenance (if it exists) None: No provenance is set :param skipCopyAnnotations: Skips copying the annotations Default is False """ ent = syn.get(entity, downloadFile=False, version=version, followLink=False) # CHECK: If File is in the same parent directory (throw an error) (Can choose to update files) if not updateExisting: existingEntity = syn.findEntityId(ent.name, parent=destinationId) if existingEntity is not None: raise ValueError( 'An entity named "%s" already exists in this location. File could not be copied' % ent.name) profile = syn.getUserProfile() # get provenance earlier to prevent errors from being called in the end # If traceback, set activity to old entity if setProvenance == "traceback": act = Activity("Copied file", used=ent) # if existing, check if provenance exists elif setProvenance == "existing": try: act = syn.getProvenance(ent.id) except SynapseHTTPError as e: if e.response.status_code == 404: act = None else: raise e elif setProvenance is None or setProvenance.lower() == 'none': act = None else: raise ValueError( 'setProvenance must be one of None, existing, or traceback') # Grab entity bundle bundle = syn._getEntityBundle(ent.id, version=ent.versionNumber, requestedObjects={ 'includeEntity': True, 'includeFileHandles': True }) fileHandle = synapseclient.core.utils.find_data_file_handle(bundle) createdBy = fileHandle['createdBy'] # CHECK: If the user created the file, copy the file by using fileHandleId else copy the fileHandle if profile.ownerId == createdBy: newdataFileHandleId = ent.dataFileHandleId else: copiedFileHandle = copyFileHandles(syn, [fileHandle], ["FileEntity"], [bundle['entity']['id']], [fileHandle['contentType']], [fileHandle['fileName']]) # Check if failurecodes exist copyResult = copiedFileHandle[0] if copyResult.get("failureCode") is not None: raise ValueError("%s dataFileHandleId: %s" % (copyResult["failureCode"], copyResult['originalFileHandleId'])) newdataFileHandleId = copyResult['newFileHandle']['id'] new_ent = File(dataFileHandleId=newdataFileHandleId, name=ent.name, parentId=destinationId) # Set annotations here if not skipCopyAnnotations: new_ent.annotations = ent.annotations # Store provenance if act is not None if act is not None: new_ent = syn.store(new_ent, activity=act) else: new_ent = syn.store(new_ent) # Leave this return statement for test return new_ent['id']
f['fileFolder'] = f['filePath'].apply(lambda x: getFileFolder(x)) f.fillna('None', inplace=True) # Get the folder synapse Ids with open(args.folderInfo) as info: folderIds = yaml.load(info) # Link files for index, row in f.iterrows(): entityFile = File(parentId=folderIds[row['fileFolder']], name=row['fileName']) annotations = dict(assay=row['assay'], fileFormat=row['fileFormat'], cellLine=row['cellLine'], organ=row['organ'], diagnosis=row['diagnosis'], cellType=row['cellType'], tumorType=row['tumorType'], catalogNumber=row['catalogNumber'], dataType="geneExpression", consortium="PSON") entityFile.annotations = dict( (k, v) for k, v in annotations.iteritems() if v != 'None') newEntity = storePSON(entityFile, row['filePath'], contentSize=row['fileSize'], md5=row['md5'], syn=syn)
def _copyFile(syn, entity, destinationId, version=None, updateExisting=False, setProvenance="traceback", skipCopyAnnotations=False): """ Copies most recent version of a file to a specified synapse ID. :param entity: A synapse ID of a File entity :param destinationId: Synapse ID of a folder/project that the file wants to be copied to :param version: Can specify version of a file. Default to None :param updateExisting: Can choose to update files that have the same name Default to False :param setProvenance: Has three values to set the provenance of the copied entity: traceback: Sets to the source entity existing: Sets to source entity's original provenance (if it exists) None: No provenance is set :param skipCopyAnnotations: Skips copying the annotations Default is False """ ent = syn.get(entity, downloadFile=False, version=version, followLink=False) # CHECK: If File is in the same parent directory (throw an error) (Can choose to update files) if not updateExisting: existingEntity = syn.findEntityId(ent.name, parent=destinationId) if existingEntity is not None: raise ValueError('An entity named "%s" already exists in this location. File could not be copied' % ent.name) profile = syn.getUserProfile() # get provenance earlier to prevent errors from being called in the end # If traceback, set activity to old entity if setProvenance == "traceback": act = Activity("Copied file", used=ent) # if existing, check if provenance exists elif setProvenance == "existing": try: act = syn.getProvenance(ent.id) except SynapseHTTPError as e: if e.response.status_code == 404: act = None else: raise e elif setProvenance is None or setProvenance.lower() == 'none': act = None else: raise ValueError('setProvenance must be one of None, existing, or traceback') # Grab entity bundle bundle = syn._getEntityBundle(ent.id, version=ent.versionNumber, bitFlags=0x800 | 0x1) fileHandle = synapseclient.utils.find_data_file_handle(bundle) createdBy = fileHandle['createdBy'] # CHECK: If the user created the file, copy the file by using fileHandleId else copy the fileHandle if profile.ownerId == createdBy: newdataFileHandleId = ent.dataFileHandleId else: copiedFileHandle = copyFileHandles(syn, [fileHandle], ["FileEntity"], [bundle['entity']['id']], [fileHandle['contentType']], [fileHandle['fileName']]) # Check if failurecodes exist copyResult = copiedFileHandle['copyResults'][0] if copyResult.get("failureCode") is not None: raise ValueError("%s dataFileHandleId: %s" % (copyResult["failureCode"], copyResult['originalFileHandleId'])) newdataFileHandleId = copyResult['newFileHandle']['id'] new_ent = File(dataFileHandleId=newdataFileHandleId, name=ent.name, parentId=destinationId) # Set annotations here if not skipCopyAnnotations: new_ent.annotations = ent.annotations # Store provenance if act is not None if act is not None: new_ent = syn.store(new_ent, activity=act) else: new_ent = syn.store(new_ent) # Leave this return statement for test return new_ent['id']
def _copyFile(syn, entity, destinationId, version=None, update=False, setProvenance="traceback"): """ Copies most recent version of a file to a specified synapse ID. :param entity: A synapse ID of a File entity :param destinationId: Synapse ID of a folder/project that the file wants to be copied to :param version: Can specify version of a file. Default to None :param update: Can choose to update files that have the same name Default to False :param setProvenance: Has three values to set the provenance of the copied entity: traceback: Sets to the source entity existing: Sets to source entity's original provenance (if it exists) None: No provenance is set """ ent = syn.get(entity, downloadFile=False, version=version, followLink=False) #CHECK: If File is in the same parent directory (throw an error) (Can choose to update files) if not update: search = syn.query('select name from entity where parentId =="%s"'%destinationId) for i in search['results']: if i['entity.name'] == ent.name: raise ValueError('An item named "%s" already exists in this location. File could not be copied'%ent.name) profile = syn.getUserProfile() # get provenance earlier to prevent errors from being called in the end # If traceback, set activity to old entity if setProvenance == "traceback": act = Activity("Copied file", used=ent) # if existing, check if provenance exists elif setProvenance == "existing": try: act = syn.getProvenance(ent.id) except SynapseHTTPError as e: # Should catch the 404 act = None elif setProvenance is None or setProvenance.lower() == 'none': act = None else: raise ValueError('setProvenance must be one of None, existing, or traceback') #Grab file handle createdBy annotation to see the user that created fileHandle fileHandleList = syn.restGET('/entity/%s/version/%s/filehandles'%(ent.id,ent.versionNumber)) #NOTE: May not always be the first index (need to filter to make sure not PreviewFileHandle) #Loop through to check which dataFileHandles match and return createdBy # Look at convenience function for fileHandle in fileHandleList['list']: if fileHandle['id'] == ent.dataFileHandleId: createdBy = fileHandle['createdBy'] break else: createdBy = None #CHECK: If the user created the file, copy the file by using fileHandleId else hard copy if profile.ownerId == createdBy: new_ent = File(name=ent.name, parentId=destinationId) new_ent.dataFileHandleId = ent.dataFileHandleId else: #CHECK: If the synapse entity is an external URL, change path and store if ent.externalURL is None: #and ent.path == None: #####If you have never downloaded the file before, the path is None store = True #This needs to be here, because if the file has never been downloaded before #there wont be a ent.path ent = syn.get(entity,downloadFile=store,version=version) path = ent.path else: store = False ent = syn.get(entity,downloadFile=store,version=version) path = ent.externalURL new_ent = File(path, name=ent.name, parentId=destinationId, synapseStore=store) #Set annotations here new_ent.annotations = ent.annotations #Store provenance if act is not None if act is not None: new_ent = syn.store(new_ent, activity=act) else: new_ent = syn.store(new_ent) #Leave this return statement for test return new_ent['id']
if fileName.endswith(('.readsLength.freq', '.txt', '.16_25nt.fq', '.miRanalyzer.zip')) or re.search(r'_vs_', fileName): return "analyzed" return "miRNA" f['fileFolder'] = f['fileName'].apply(lambda x: getFileFolder(x)) # Get the folder synapse Ids with open(args.folderInfo) as info: folderIds = yaml.load(info) # Link files for index, row in f.iterrows(): entityFile = File(parentId=folderIds[row['fileFolder']], name=row['fileName']) entityFile.annotations = dict(assay=row['assay'], fileFormat=row['fileFormat'], cellLine=row['cellLine'], catalogNumber=row['catalogNumber'], organ=row['organ'], diagnosis=row['diagnosis'], cellType=row['cellType'], tumorType=row['tumorType'], consortium="PSON") newEntity = storePSON(entityFile, row['filePath'], contentSize=row['fileSize'], md5=row['md5'], syn=syn)