コード例 #1
0
def loadOneSample(a):
    """Goes through a single json annotation file a and:
        1) Finds the parent Folder where to store the file (or makes directories)
        2) Fetches the md5 of any existing file and compares
        3) If new or different md5 upload file.
    """
    logging.debug("Loading:" + a)
    with open(a) as handle:
        meta = json.load(handle)
    dpath = re.sub(r'.json$', '', a)
    #Skip the rest of the loop if data file is empty or we are not doing the current acronyms
    if os.stat(dpath).st_size == 0 or (
            args.acronym != meta['annotations']['acronym']
            and args.acronym is not None):
        return

    parentId = getParentFolder(syn, args.project, meta)
    #Determine if we are updating an existing file and if we should update based on md5
    query = "select id from entity where parentId=='%s' and name=='%s'" % (
        parentId, meta['name'])
    res = list(syn.chunkedQuery(query))
    if len(res) != 0:
        tmp_ent = syn.get(res[0]['entity.id'], downloadFile=False)
        upload = (tmp_ent.md5 != meta['annotations']['md5'])
        logging.debug("\tFound: %s and upload (MD5 %s match)" %
                      (tmp_ent.id, 'DOESN\'T' if upload else 'does'))
    else:
        logging.debug("\tNot found:" + meta['name'])
        upload = True
    #Prepare the entity for upload
    if upload and not args.push:
        logging.info("\tWILL UPLOAD: %s" % meta['name'])
    if upload and args.push:
        entity = File(dpath,
                      name=meta['name'],
                      parentId=parentId,
                      annotations=meta['annotations'])
        if 'provenance' in meta:
            #Fix labels for urls
            for u in meta['provenance']['used']:
                if 'name' not in u and 'url' in u:
                    u['name'] = u['url']
            prov = Activity(data=meta['provenance'])
            prov.executed('https://github.com/Sage-Bionetworks/tcgaImport')

        else:
            prov = None
        logging.debug('\tUploading:%s' % entity.name)
        entity = syn.store(entity, activity=prov)
        logging.debug('\tCreated/Updated: **** %s ****' % entity.id)
コード例 #2
0
ファイル: seq_loading.py プロジェクト: kdaily/synapse-seq
def add_workflow_step_to_synapse(inFilePath,
                                 stepDict,
                                 step='1',
                                 software=None,
                                 parentid=None,
                                 syn=None,
                                 stepIDs=None,
                                 inFilename=None):
    '''Uploads files with provenance and annotations to Synapse.'''
    usedList = None
    if not inFilename:
        inFilename = os.path.basename(inFilePath.strip())
    if not software:
        software = stepDict['softwareName']
    if 'used' in stepDict:
        usedList = stepDict['used'].strip().split(',')
        if 'depends' in stepDict:
            usedList.append(stepIDs[stepDict['depends']])
    elif 'depends' in stepDict:
        usedList = stepIDs[stepDict['depends']]
    execList = stepDict['executed'].strip().split(';')

    act = Activity(name=stepDict['actName'],
                   description=stepDict['description'])
    if usedList is not None:
        act.used(usedList)
    for item in execList:
        splitItem = item.split(',')
        target = splitItem[0]
        version = 1
        if (len(splitItem) > 1):
            version = splitItem[1]
        if target.startswith('http'):
            act.executed(url=target, name=os.path.basename(target))
        else:
            act.executed(target=target, targetVersion=version)

    step_file = File(path=inFilePath,
                     name=inFilename,
                     description=stepDict['fileDescription'],
                     parentId=parentid,
                     synapseStore=str2bool(stepDict['store']))
    step_file = syn.store(step_file, activity=act, forceVersion=False)
    if 'annotations' in stepDict:
        syn.setAnnotations(step_file, annotations=stepDict['annotations'])
    print 'new entity id %s' % step_file.id
    return (step_file.id)
コード例 #3
0
def loadOneSample(a):
    """Goes through a single json annotation file a and:
        1) Finds the parent Folder where to store the file (or makes directories)
        2) Fetches the md5 of any existing file and compares
        3) If new or different md5 upload file.
    """
    logging.debug( "Loading:" + a )
    with open(a) as handle:
        meta = json.load(handle)
    dpath = re.sub(r'.json$', '', a)
    #Skip the rest of the loop if data file is empty or we are not doing the current acronyms
    if os.stat(dpath).st_size==0 or (args.acronym != meta['annotations']['acronym'] and args.acronym is not None):
        return 

    parentId= getParentFolder(syn, args.project, meta)
    #Determine if we are updating an existing file and if we should update based on md5
    query = "select id from entity where parentId=='%s' and name=='%s'" % (parentId, meta['name'])
    res = list(syn.chunkedQuery(query))
    if len(res) != 0:
        tmp_ent = syn.get(res[0]['entity.id'], downloadFile=False)
        upload = (tmp_ent.md5 != meta['annotations']['md5'])
        logging.debug( "\tFound: %s and upload (MD5 %s match)" %(tmp_ent.id, 'DOESN\'T' if upload else 'does'))
    else:
        logging.debug("\tNot found:" + meta['name'])
        upload = True
    #Prepare the entity for upload
    if upload and not args.push:
        logging.info( "\tWILL UPLOAD: %s" %meta['name'])
    if upload and args.push: 
        entity = File(dpath, name=meta['name'], parentId=parentId, annotations=meta['annotations'])
        if 'provenance' in meta:
            #Fix labels for urls
            for u in meta['provenance']['used']:
                if 'name' not in u and 'url' in u:
                    u['name'] = u['url']
            prov = Activity(data=meta['provenance'])
            prov.executed('https://github.com/Sage-Bionetworks/tcgaImport')

        else:
            prov=None
        logging.debug('\tUploading:%s' %entity.name)
        entity = syn.store(entity, activity=prov)
        logging.debug('\tCreated/Updated: **** %s ****' %entity.id)
コード例 #4
0
def test_activity_used_execute_methods():
    """test activity creation and used and execute methods"""
    a = Activity(name='Fuzz', description='hipster beard dataset')
    a.used({
        'id': 'syn101',
        'versionNumber': 42,
        'concreteType': 'org.sagebionetworks.repo.model.FileEntity'
    })
    a.executed('syn102', targetVersion=1)
    usedEntities = a['used']
    len(usedEntities), 2

    assert a['name'] == 'Fuzz'
    assert a['description'] == 'hipster beard dataset'

    used_syn101 = utils._find_used(
        a, lambda res: res['reference']['targetId'] == 'syn101')
    assert used_syn101['reference']['targetVersionNumber'] == 42
    assert not used_syn101['wasExecuted']

    used_syn102 = utils._find_used(
        a, lambda res: res['reference']['targetId'] == 'syn102')
    assert used_syn102['reference']['targetVersionNumber'] == 1
    assert used_syn102['wasExecuted']
コード例 #5
0
import synapseclient
from synapseclient import File, Activity

syn = synapseclient.Synapse()
syn.login()

### Ensembl raw counts
annotDict = dict()
annotDict['fileType'] = 'count'
annotDict['normalized'] = 'no'
annotDict['summaryLevel'] = 'gene'

act = Activity(name='Counting', description='Raw gene counts using HTSeq.')
act.used(['syn2290932', 'syn2215531'])  # syn2290932 is BAM, syn2215531 is GTF
act.executed('syn2243147')  # syn2243147 is htseq
counts = File(
    path=
    '/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.DataFreeze.CountMatrix_V7.ensemble.Clean.txt',
    name='PFC_CountMatrix_ensembl.txt',
    description=
    'Gene counts for all BAMs summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq are not included.',
    parentId='syn2290933',
    synapseStore=True)
counts = syn.store(counts, activity=act)
syn.setAnnotations(counts, annotations=annotDict)
コード例 #6
0
def test_activity_used_url():
    """test activity creation with UsedURLs"""
    u1 = 'http://xkcd.com'
    u2 = {'name': 'The Onion', 'url': 'http://theonion.com'}
    u3 = {
        'name': 'Seriously advanced code',
        'url':
        'https://github.com/cbare/Pydoku/blob/ef88069f70823808f3462410e941326ae7ffbbe0/solver.py',
        'wasExecuted': True
    }
    u4 = {
        'name': 'Heavy duty algorithm',
        'url': 'https://github.com/cbare/Pydoku/blob/master/solver.py'
    }

    a = Activity(name='Foobarbat',
                 description='Apply foo to a bar and a bat',
                 used=[u1, u2, u3],
                 executed=[u3, u4])

    a.executed(url='http://cran.r-project.org/web/packages/glmnet/index.html',
               name='glm.net')
    a.used(url='http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day',
           name='earthquakes')

    u = utils._find_used(a, lambda res: 'url' in res and res['url'] == u1)
    assert u is not None
    assert u['url'] == u1
    assert not u['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'The Onion')
    assert u is not None
    assert u['url'] == 'http://theonion.com'
    assert not u['wasExecuted']

    u = utils._find_used(
        a,
        lambda res: 'name' in res and res['name'] == 'Seriously advanced code')
    assert u is not None
    assert u['url'] == u3['url']
    assert u['wasExecuted'] == u3['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'Heavy duty algorithm')
    assert u is not None
    assert u['url'] == u4['url']
    assert u['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'glm.net')
    assert u is not None
    assert u[
        'url'] == 'http://cran.r-project.org/web/packages/glmnet/index.html'
    assert u['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'earthquakes')
    assert u is not None
    assert u[
        'url'] == 'http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day'
    assert not u['wasExecuted']
コード例 #7
0
import synapseclient
from synapseclient import File, Activity

syn = synapseclient.Synapse()
syn.login()

### Ensembl raw counts
annotDict = dict()
annotDict['fileType'] = 'count'
annotDict['normalized'] = 'no'
annotDict['summaryLevel'] = 'gene'

act = Activity(name='Counting', description='Raw gene counts using HTSeq.')
act.used(['syn2290932', 'syn2215531']) # syn2290932 is BAM, syn2215531 is GTF
act.executed('syn2243147') # syn2243147 is htseq
counts =
File(path='/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.'
'DataFreeze.CountMatrix_V7.ensemble.Clean.txt',
name='PFC_CountMatrix_ensembl.txt', description='Gene counts for all BAMs'
'summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq'
'are not included.', parentId='syn2290933', synapseStore=True)
counts = syn.store(counts, activity=act)
syn.setAnnotations(counts, annotations=annotDict)


# Need to check:
# - annotations: standards?
# - synapseclient: auto-return synapse id on upload
# - syn.store: specify activity independently?
# - Activity.executed: executable file, or just script?