Beispiel #1
0
def _manifest_upload(syn, df):
    items = []
    for i, row in df.iterrows():
        file = File(
            path=row['path'],
            parent=row['parent'],
            **{key: row[key]
               for key in FILE_CONSTRUCTOR_FIELDS if key in row},
        )
        file.annotations = dict(
            row.drop(FILE_CONSTRUCTOR_FIELDS + STORE_FUNCTION_FIELDS +
                     REQUIRED_FIELDS + PROVENANCE_FIELDS,
                     errors='ignore'))

        item = _SyncUploadItem(
            file,
            row['used'] if 'used' in row else [],
            row['executed'] if 'executed' in row else [],
            {key: row[key]
             for key in STORE_FUNCTION_FIELDS if key in row},
        )
        items.append(item)

    with _sync_executor(syn) as executor:
        uploader = _SyncUploader(syn, executor)
        uploader.upload(items)

    return True
Beispiel #2
0
def upload(args,syn):
	if args.dataType == "rnaseq":
		parentId = "syn6034916"
		pipeline = "syn6126122"
		dataType = "RNASeq"
	elif args.dataType == "dnaseq":
		parentId = "syn6034751"
		pipeline = "syn6126123"
		dataType = "TargDNASeq"
	elif args.dataType == "snparray":
		parentId = "syn6038475"
		pipeline = "syn6126121"
		dataType = "SNParray"
	elif args.dataType == "exparray":
		parentId = "syn6038915"
		pipeline = "syn6126120"
		dataType = "expression_microarray"
	elif args.dataType == "exome":
		parentId = "syn6115597"
		dataType = "exome"
		pipeline = ""
	else:
		raise ValueError("dataType needs to be rnaseq/dnaseq/snparray/exparray/exome")
	if args.workflow is not None:
		workflow = syn.get(pipeline,downloadFile=False)
		workflow.path = args.workflow
		workflow.name = os.path.basename(args.workflow)
		workflow = syn.store(workflow)
		pipeline = workflow.id
	fileEnt = File(args.input,parent=parentId)
	fileEnt.annotations = temp.to_dict('index').values()[0]
	fileEnt.dataType = dataType
	fileEnt.sampleId = sampleId
	fileEnt = syn.store(fileEnt,used = pipeline)
	return(fileEnt.id)
Beispiel #3
0
def _manifest_upload(syn, df):
    for i, row in df.iterrows():
        #Todo extract known constructor variables
        kwargs = {key: row[key] for key in FILE_CONSTRUCTOR_FIELDS if key in row }
        entity = File(row['path'], parent=row['parent'], **kwargs)
        entity.annotations = dict(row.drop(FILE_CONSTRUCTOR_FIELDS+STORE_FUNCTION_FIELDS+REQUIRED_FIELDS, errors = 'ignore'))

        #Update provenance list again to replace all file references that were uploaded
        if 'used' in row:
            row['used'] = syn._convertProvenanceList(row['used'])
        if 'executed' in row:
            row['executed'] = syn._convertProvenanceList(row['executed'])
        kwargs = {key: row[key] for key in STORE_FUNCTION_FIELDS if key in row}
        entity = syn.store(entity, **kwargs)
    return True
def create_entity(syn: Synapse, name: str, link: str,
                  annotations: dict) -> File:
    """Creates evaluation queue

    Args:
        name: Name of queue

    Returns:
        a synapseclient.Evaluation
    """
    file_ent = File(name=name,
                    path=link,
                    parentId="syn21897226",
                    synapseStore=False)
    file_ent.annotations = annotations
    return syn.store(file_ent)
def upload_fastq(fastq, bid, annot, theFilepath):
    print 'Uploading {}'.format(fastq)
    barcode = '_'.join(fastq.split('_')[4:7])
    platform = annot['platform']
    filename = synapse_filename_template.format(bid=bid,
                                                platform=platform,
                                                barcode=barcode)
    fastq_synapse = File(theFilepath,
                         name=filename,
                         parent=FASTQ_FOLDER_SYNAPSE)

    # Deprecated. Will error out trying to upload_fastq
    #   fastq_synapse.properties.fileNameOverride = filename

    fastq_synapse.annotations = annot
    print "Uploading"
    syn.store(fastq_synapse)
Beispiel #6
0
 def upload_file(self, file_path, overwrite=False):
     """Saves file to Synapse"""
     fp = file_path['file_p']
     ap = _get_anno_p(fp)
     local_md5 = calc_content_md5(fp)
     parent = self.get_or_create_folder(fp.parent, self.parent_synid)
     file = File(path=str(fp), parent=parent)
     annotations = load_json(ap)
     annotations = {
         k.replace('[', 'LFTB').replace(']', 'RGTB'): v
         for k, v in annotations.items()
     }
     file.annotations = annotations
     file_r = self.get_or_create_entity(file, returnid=False)
     if overwrite:
         print(local_md5)
         print(file_r.md5)
         if local_md5 != file_r.md5:
             print('mismatch. overwriting.')
             self.syn.store(file)
Beispiel #7
0
# Get parentFolder
def getFileFolder(fileName):
    if fileName.endswith(('.fastq', '.trimmed.fq')):
        return "raw"
    if fileName.endswith(('.bam', '.bai', '.unmapped.fq')):
        return "aligned"
    if fileName.endswith(('.readsLength.freq', '.txt', '.16_25nt.fq',
                          '.miRanalyzer.zip')) or re.search(r'_vs_', fileName):
        return "analyzed"
    return "miRNA"


f['fileFolder'] = f['fileName'].apply(lambda x: getFileFolder(x))

# Get the folder synapse Ids
with open(args.folderInfo) as info:
    folderIds = yaml.load(info)

# Link files
for index, row in f.iterrows():
    entityFile = File(parentId=folderIds[row['fileFolder']],
                      name=row['fileName'])
    entityFile.annotations = dict(assay=row['assay'],
                                  fileFormat=row['fileFormat'],
                                  cellLine=row['cellLine'])
    newEntity = storePSON(entityFile,
                          row['filePath'],
                          contentSize=row['fileSize'],
                          md5=row['md5'],
                          syn=syn)
Beispiel #8
0
def _copyFile(syn,
              entity,
              destinationId,
              version=None,
              updateExisting=False,
              setProvenance="traceback",
              skipCopyAnnotations=False):
    """
    Copies most recent version of a file to a specified synapse ID.

    :param entity:              A synapse ID of a File entity

    :param destinationId:       Synapse ID of a folder/project that the file wants to be copied to

    :param version:             Can specify version of a file.
                                Default to None

    :param updateExisting:      Can choose to update files that have the same name
                                Default to False

    :param setProvenance:       Has three values to set the provenance of the copied entity:
                                    traceback: Sets to the source entity
                                    existing: Sets to source entity's original provenance (if it exists)
                                    None: No provenance is set
    :param skipCopyAnnotations: Skips copying the annotations
                                Default is False
    """
    ent = syn.get(entity,
                  downloadFile=False,
                  version=version,
                  followLink=False)
    # CHECK: If File is in the same parent directory (throw an error) (Can choose to update files)
    if not updateExisting:
        existingEntity = syn.findEntityId(ent.name, parent=destinationId)
        if existingEntity is not None:
            raise ValueError(
                'An entity named "%s" already exists in this location. File could not be copied'
                % ent.name)
    profile = syn.getUserProfile()
    # get provenance earlier to prevent errors from being called in the end
    # If traceback, set activity to old entity
    if setProvenance == "traceback":
        act = Activity("Copied file", used=ent)
    # if existing, check if provenance exists
    elif setProvenance == "existing":
        try:
            act = syn.getProvenance(ent.id)
        except SynapseHTTPError as e:
            if e.response.status_code == 404:
                act = None
            else:
                raise e
    elif setProvenance is None or setProvenance.lower() == 'none':
        act = None
    else:
        raise ValueError(
            'setProvenance must be one of None, existing, or traceback')
    # Grab entity bundle
    bundle = syn._getEntityBundle(ent.id,
                                  version=ent.versionNumber,
                                  requestedObjects={
                                      'includeEntity': True,
                                      'includeFileHandles': True
                                  })
    fileHandle = synapseclient.core.utils.find_data_file_handle(bundle)
    createdBy = fileHandle['createdBy']
    # CHECK: If the user created the file, copy the file by using fileHandleId else copy the fileHandle
    if profile.ownerId == createdBy:
        newdataFileHandleId = ent.dataFileHandleId
    else:
        copiedFileHandle = copyFileHandles(syn, [fileHandle], ["FileEntity"],
                                           [bundle['entity']['id']],
                                           [fileHandle['contentType']],
                                           [fileHandle['fileName']])
        # Check if failurecodes exist
        copyResult = copiedFileHandle[0]
        if copyResult.get("failureCode") is not None:
            raise ValueError("%s dataFileHandleId: %s" %
                             (copyResult["failureCode"],
                              copyResult['originalFileHandleId']))
        newdataFileHandleId = copyResult['newFileHandle']['id']

    new_ent = File(dataFileHandleId=newdataFileHandleId,
                   name=ent.name,
                   parentId=destinationId)
    # Set annotations here
    if not skipCopyAnnotations:
        new_ent.annotations = ent.annotations
    # Store provenance if act is not None
    if act is not None:
        new_ent = syn.store(new_ent, activity=act)
    else:
        new_ent = syn.store(new_ent)
    # Leave this return statement for test
    return new_ent['id']
Beispiel #9
0
f['fileFolder'] = f['filePath'].apply(lambda x: getFileFolder(x))

f.fillna('None', inplace=True)

# Get the folder synapse Ids
with open(args.folderInfo) as info:
    folderIds = yaml.load(info)

# Link files
for index, row in f.iterrows():
    entityFile = File(parentId=folderIds[row['fileFolder']],
                      name=row['fileName'])
    annotations = dict(assay=row['assay'],
                       fileFormat=row['fileFormat'],
                       cellLine=row['cellLine'],
                       organ=row['organ'],
                       diagnosis=row['diagnosis'],
                       cellType=row['cellType'],
                       tumorType=row['tumorType'],
                       catalogNumber=row['catalogNumber'],
                       dataType="geneExpression",
                       consortium="PSON")
    entityFile.annotations = dict(
        (k, v) for k, v in annotations.iteritems() if v != 'None')
    newEntity = storePSON(entityFile,
                          row['filePath'],
                          contentSize=row['fileSize'],
                          md5=row['md5'],
                          syn=syn)
def _copyFile(syn, entity, destinationId, version=None, updateExisting=False, setProvenance="traceback",
              skipCopyAnnotations=False):
    """
    Copies most recent version of a file to a specified synapse ID.

    :param entity:              A synapse ID of a File entity

    :param destinationId:       Synapse ID of a folder/project that the file wants to be copied to

    :param version:             Can specify version of a file. 
                                Default to None

    :param updateExisting:      Can choose to update files that have the same name 
                                Default to False
    
    :param setProvenance:       Has three values to set the provenance of the copied entity:
                                    traceback: Sets to the source entity
                                    existing: Sets to source entity's original provenance (if it exists)
                                    None: No provenance is set
    :param skipCopyAnnotations: Skips copying the annotations
                                Default is False
    """
    ent = syn.get(entity, downloadFile=False, version=version, followLink=False)
    # CHECK: If File is in the same parent directory (throw an error) (Can choose to update files)
    if not updateExisting:
        existingEntity = syn.findEntityId(ent.name, parent=destinationId)
        if existingEntity is not None:
            raise ValueError('An entity named "%s" already exists in this location. File could not be copied'
                             % ent.name)
    profile = syn.getUserProfile()
    # get provenance earlier to prevent errors from being called in the end
    # If traceback, set activity to old entity
    if setProvenance == "traceback":
        act = Activity("Copied file", used=ent)
    # if existing, check if provenance exists
    elif setProvenance == "existing":
        try:
            act = syn.getProvenance(ent.id)
        except SynapseHTTPError as e:
            if e.response.status_code == 404:
                act = None
            else:
                raise e
    elif setProvenance is None or setProvenance.lower() == 'none':
        act = None
    else:
        raise ValueError('setProvenance must be one of None, existing, or traceback')
    # Grab entity bundle
    bundle = syn._getEntityBundle(ent.id, version=ent.versionNumber, bitFlags=0x800 | 0x1)
    fileHandle = synapseclient.utils.find_data_file_handle(bundle)
    createdBy = fileHandle['createdBy']
    # CHECK: If the user created the file, copy the file by using fileHandleId else copy the fileHandle
    if profile.ownerId == createdBy:
        newdataFileHandleId = ent.dataFileHandleId
    else:
        copiedFileHandle = copyFileHandles(syn, [fileHandle], ["FileEntity"], [bundle['entity']['id']],
                                           [fileHandle['contentType']], [fileHandle['fileName']])
        # Check if failurecodes exist
        copyResult = copiedFileHandle['copyResults'][0]
        if copyResult.get("failureCode") is not None:
            raise ValueError("%s dataFileHandleId: %s" % (copyResult["failureCode"],
                                                          copyResult['originalFileHandleId']))
        newdataFileHandleId = copyResult['newFileHandle']['id']

    new_ent = File(dataFileHandleId=newdataFileHandleId,  name=ent.name, parentId=destinationId)
    # Set annotations here
    if not skipCopyAnnotations:
        new_ent.annotations = ent.annotations
    # Store provenance if act is not None
    if act is not None:
        new_ent = syn.store(new_ent, activity=act)
    else:
        new_ent = syn.store(new_ent)
    # Leave this return statement for test
    return new_ent['id']
Beispiel #11
0
def _copyFile(syn, entity, destinationId, version=None, update=False, setProvenance="traceback"):
    """
    Copies most recent version of a file to a specified synapse ID.

    :param entity:          A synapse ID of a File entity

    :param destinationId:   Synapse ID of a folder/project that the file wants to be copied to

    :param version:         Can specify version of a file. 
                            Default to None

    :param update:          Can choose to update files that have the same name 
                            Default to False
    
    :param setProvenance:   Has three values to set the provenance of the copied entity:
                                traceback: Sets to the source entity
                                existing: Sets to source entity's original provenance (if it exists)
                                None: No provenance is set
    """
    ent = syn.get(entity, downloadFile=False, version=version, followLink=False)
    #CHECK: If File is in the same parent directory (throw an error) (Can choose to update files)
    if not update:
        search = syn.query('select name from entity where parentId =="%s"'%destinationId)
        for i in search['results']:
            if i['entity.name'] == ent.name:
                raise ValueError('An item named "%s" already exists in this location. File could not be copied'%ent.name)
    profile = syn.getUserProfile()
    # get provenance earlier to prevent errors from being called in the end
    # If traceback, set activity to old entity
    if setProvenance == "traceback":
        act = Activity("Copied file", used=ent)
    # if existing, check if provenance exists
    elif setProvenance == "existing":
        try:
            act = syn.getProvenance(ent.id)
        except SynapseHTTPError as e:
            # Should catch the 404
            act = None
    elif setProvenance is None or setProvenance.lower() == 'none':
        act = None
    else:
        raise ValueError('setProvenance must be one of None, existing, or traceback')
    #Grab file handle createdBy annotation to see the user that created fileHandle
    fileHandleList = syn.restGET('/entity/%s/version/%s/filehandles'%(ent.id,ent.versionNumber))
    #NOTE: May not always be the first index (need to filter to make sure not PreviewFileHandle)
    #Loop through to check which dataFileHandles match and return createdBy
    # Look at convenience function
    for fileHandle in fileHandleList['list']:
        if fileHandle['id'] == ent.dataFileHandleId:
            createdBy = fileHandle['createdBy']
            break
    else:
        createdBy = None
    #CHECK: If the user created the file, copy the file by using fileHandleId else hard copy
    if profile.ownerId == createdBy:
        new_ent = File(name=ent.name, parentId=destinationId)
        new_ent.dataFileHandleId = ent.dataFileHandleId
    else:
        #CHECK: If the synapse entity is an external URL, change path and store
        if ent.externalURL is None: #and ent.path == None:
            #####If you have never downloaded the file before, the path is None
            store = True
            #This needs to be here, because if the file has never been downloaded before
            #there wont be a ent.path
            ent = syn.get(entity,downloadFile=store,version=version)
            path = ent.path
        else:
            store = False
            ent = syn.get(entity,downloadFile=store,version=version)
            path = ent.externalURL

        new_ent = File(path, name=ent.name, parentId=destinationId, synapseStore=store)
    #Set annotations here
    new_ent.annotations = ent.annotations
    #Store provenance if act is not None
    if act is not None:
        new_ent = syn.store(new_ent, activity=act)
    else:
        new_ent = syn.store(new_ent)
    #Leave this return statement for test
    return new_ent['id']
Beispiel #12
0
    if fileName.endswith(('.readsLength.freq', '.txt', '.16_25nt.fq',
                          '.miRanalyzer.zip')) or re.search(r'_vs_', fileName):
        return "analyzed"
    return "miRNA"


f['fileFolder'] = f['fileName'].apply(lambda x: getFileFolder(x))

# Get the folder synapse Ids
with open(args.folderInfo) as info:
    folderIds = yaml.load(info)

# Link files
for index, row in f.iterrows():
    entityFile = File(parentId=folderIds[row['fileFolder']],
                      name=row['fileName'])
    entityFile.annotations = dict(assay=row['assay'],
                                  fileFormat=row['fileFormat'],
                                  cellLine=row['cellLine'],
                                  catalogNumber=row['catalogNumber'],
                                  organ=row['organ'],
                                  diagnosis=row['diagnosis'],
                                  cellType=row['cellType'],
                                  tumorType=row['tumorType'],
                                  consortium="PSON")
    newEntity = storePSON(entityFile,
                          row['filePath'],
                          contentSize=row['fileSize'],
                          md5=row['md5'],
                          syn=syn)