def test_store_activity():
    # Create a File and an Activity
    path = utils.make_bogus_binary_file()
    schedule_for_cleanup(path)
    entity = File(path, name='Hinkle horn honking holes', parent=project)
    honking = Activity(name='Hinkle horn honking', 
                       description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/')

    # This doesn't set the ID of the Activity
    entity = syn.store(entity, activity=honking)

    # But this does
    honking = syn.getProvenance(entity.id)

    # Verify the Activity
    assert honking['name'] == 'Hinkle horn honking'
    assert len(honking['used']) == 2
    assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][0]['wasExecuted'] == False
    assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482')
    assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][1]['wasExecuted'] == False

    # Store another Entity with the same Activity
    entity = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', 
                  name='Nettlebed Cave', parent=project, synapseStore=False)
    entity = syn.store(entity, activity=honking)

    # The Activities should match
    honking2 = syn.getProvenance(entity)
    assert honking['id'] == honking2['id']
def test_store_activity():
    # Create a File and an Activity
    path = utils.make_bogus_binary_file()
    schedule_for_cleanup(path)
    entity = File(path, name='Hinkle horn honking holes', parent=project)
    honking = Activity(name='Hinkle horn honking', 
                       description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/')

    # This doesn't set the ID of the Activity
    entity = syn.store(entity, activity=honking)

    # But this does
    honking = syn.getProvenance(entity.id)

    # Verify the Activity
    assert honking['name'] == 'Hinkle horn honking'
    assert len(honking['used']) == 2
    assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][0]['wasExecuted'] == False
    assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482')
    assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][1]['wasExecuted'] == False

    # Store another Entity with the same Activity
    entity = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', 
                  name='Nettlebed Cave', parent=project, synapseStore=False)
    entity = syn.store(entity, activity=honking)

    # The Activities should match
    honking2 = syn.getProvenance(entity)
    assert honking['id'] == honking2['id']
Beispiel #3
0
def setProvenance(args, syn):
    """Set provenance information on a synapse entity."""

    activity = Activity(name=args.name, description=args.description)

    if args.used:
        for item in _convertProvenanceList(args.used, args.limitSearch, syn):
            activity.used(item)
    if args.executed:
        for item in _convertProvenanceList(args.executed, args.limitSearch,
                                           syn):
            activity.used(item, wasExecuted=True)
    activity = syn.setProvenance(args.id, activity)

    # Display the activity record, if -o or -output specified
    if args.output:
        if args.output == 'STDOUT':
            sys.stdout.write(json.dumps(activity))
            sys.stdout.write('\n')
        else:
            with open(args.output, 'w') as f:
                f.write(json.dumps(activity))
                f.write('\n')
    else:
        print 'Set provenance record %s on entity %s\n' % (str(
            activity['id']), str(args.id))
def test_store_activity():
    """Test storing entities with Activities"""
    project = create_project()

    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)

    f = File(path, name='Hinkle horn honking holes', parent=project)

    honking = Activity(name='Hinkle horn honking', description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/')

    ## doesn't set the ID of the activity
    f = syn.store(f, activity=honking)

    honking = syn.getProvenance(f.id)
    ## now, we have an activity ID

    assert honking['name'] == 'Hinkle horn honking'
    assert len(honking['used']) == 2
    assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][0]['wasExecuted'] == False
    assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482')
    assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][1]['wasExecuted'] == False

    ## store another entity with the same activity
    f2 = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', name='Nettlebed Cave', parent=project)
    f2 = syn.store(f2, activity=honking)

    honking2 = syn.getProvenance(f2)

    assert honking['id'] == honking2['id']
Beispiel #5
0
def add_workflow_step_to_synapse(inFilePath,
                                 stepDict,
                                 step='1',
                                 software=None,
                                 parentid=None,
                                 syn=None,
                                 stepIDs=None,
                                 inFilename=None):
    '''Uploads files with provenance and annotations to Synapse.'''
    usedList = None
    if not inFilename:
        inFilename = os.path.basename(inFilePath.strip())
    if not software:
        software = stepDict['softwareName']
    if 'used' in stepDict:
        usedList = stepDict['used'].strip().split(',')
        if 'depends' in stepDict:
            usedList.append(stepIDs[stepDict['depends']])
    elif 'depends' in stepDict:
        usedList = stepIDs[stepDict['depends']]
    execList = stepDict['executed'].strip().split(';')

    act = Activity(name=stepDict['actName'],
                   description=stepDict['description'])
    if usedList is not None:
        act.used(usedList)
    for item in execList:
        splitItem = item.split(',')
        target = splitItem[0]
        version = 1
        if (len(splitItem) > 1):
            version = splitItem[1]
        if target.startswith('http'):
            act.executed(url=target, name=os.path.basename(target))
        else:
            act.executed(target=target, targetVersion=version)

    step_file = File(path=inFilePath,
                     name=inFilename,
                     description=stepDict['fileDescription'],
                     parentId=parentid,
                     synapseStore=str2bool(stepDict['store']))
    step_file = syn.store(step_file, activity=act, forceVersion=False)
    if 'annotations' in stepDict:
        syn.setAnnotations(step_file, annotations=stepDict['annotations'])
    print 'new entity id %s' % step_file.id
    return (step_file.id)
def test_provenance(syn, project, schedule_for_cleanup):
    # Create a File Entity
    fname = utils.make_bogus_data_file()
    schedule_for_cleanup(fname)
    data_entity = syn.store(File(fname, parent=project['id']))

    # Create a File Entity of Code
    fd, path = tempfile.mkstemp(suffix=".py")
    with os.fdopen(fd, 'w') as f:
        f.write(
            utils.normalize_lines("""
            ## Chris's fabulous random data generator
            ############################################################
            import random
            random.seed(12345)
            data = [random.gauss(mu=0.0, sigma=1.0) for i in range(100)]
            """))
    schedule_for_cleanup(path)
    code_entity = syn.store(File(path, parent=project['id']))

    # Create a new Activity asserting that the Code Entity was 'used'
    activity = Activity(name='random.gauss',
                        description='Generate some random numbers')
    activity.used(code_entity, wasExecuted=True)
    activity.used(
        {
            'name': 'Superhack',
            'url': 'https://github.com/joe_coder/Superhack'
        },
        wasExecuted=True)
    activity = syn.setProvenance(data_entity, activity)

    # Retrieve and verify the saved Provenance record
    retrieved_activity = syn.getProvenance(data_entity)
    assert retrieved_activity == activity

    # Test Activity update
    new_description = 'Generate random numbers like a gangsta'
    retrieved_activity['description'] = new_description
    updated_activity = syn.updateActivity(retrieved_activity)
    assert updated_activity['name'] == retrieved_activity['name']
    assert updated_activity['description'] == new_description

    # Test delete
    syn.deleteProvenance(data_entity)
    pytest.raises(SynapseHTTPError, syn.getProvenance, data_entity['id'])
def test_store_activity():
    """Test storing entities with Activities"""
    project = create_project()

    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)

    f = File(path, name='Hinkle horn honking holes', parent=project)

    honking = Activity(
        name='Hinkle horn honking',
        description=
        'Nettlebed Cave is a limestone cave located on the South Island of New Zealand.'
    )
    honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/')

    ## doesn't set the ID of the activity
    f = syn.store(f, activity=honking)

    honking = syn.getProvenance(f.id)
    ## now, we have an activity ID

    assert honking['name'] == 'Hinkle horn honking'
    assert len(honking['used']) == 2
    assert honking['used'][0][
        'concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][0]['wasExecuted'] == False
    assert honking['used'][0]['url'].startswith(
        'http://www.flickr.com/photos/bevanbfree/3482')
    assert honking['used'][1][
        'concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][1]['wasExecuted'] == False

    ## store another entity with the same activity
    f2 = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg',
              name='Nettlebed Cave',
              parent=project)
    f2 = syn.store(f2, activity=honking)

    honking2 = syn.getProvenance(f2)

    assert honking['id'] == honking2['id']
def setProvenance(args, syn):
    """Set provenance information on a synapse entity."""
    
    activity = Activity(name=args.name, description=args.description)
    if args.used:
        for item in args.used:
            activity.used(item)
    if args.executed:
        for item in args.executed:
            activity.used(item, wasExecuted=True)
    activity = syn.setProvenance(args.id, activity)

    # Display the activity record, if -o or -output specified
    if args.output:
        if args.output=='STDOUT':
            sys.stdout.write(json.dumps(activity))
            sys.stdout.write('\n')
        else:
            with open(args.output, 'w') as f:
                f.write(json.dumps(activity))
                f.write('\n')
    else:
        print 'Set provenance record %s on entity %s\n' % (str(activity['id']), str(args.id))
Beispiel #9
0
def test_activity_used_execute_methods():
    """test activity creation and used and execute methods"""
    a = Activity(name='Fuzz', description='hipster beard dataset')
    a.used({
        'id': 'syn101',
        'versionNumber': 42,
        'concreteType': 'org.sagebionetworks.repo.model.FileEntity'
    })
    a.executed('syn102', targetVersion=1)
    usedEntities = a['used']
    len(usedEntities), 2

    assert a['name'] == 'Fuzz'
    assert a['description'] == 'hipster beard dataset'

    used_syn101 = utils._find_used(
        a, lambda res: res['reference']['targetId'] == 'syn101')
    assert used_syn101['reference']['targetVersionNumber'] == 42
    assert not used_syn101['wasExecuted']

    used_syn102 = utils._find_used(
        a, lambda res: res['reference']['targetId'] == 'syn102')
    assert used_syn102['reference']['targetVersionNumber'] == 1
    assert used_syn102['wasExecuted']
commandsFile.close()
cf = File(path=cfPath,
          description='Job commands.',
          parentId=args.out,
          synapseStore=True)
cf = syn.store(
    cf,
    activityName='quant_evaluation',
    executed=[
        'https://github.com/Sage-Bionetworks/synapse-seq/blob/master/scripts/eval_quant_sailfish.py'
    ])
act = Activity(
    name='transcript quantitation',
    description='Alignment-free transcript quantitation using Sailfish.',
    executed=['syn2325155', cf.id])
act.used(target=submission.entityId, targetVersion=submission.versionNumber)
act.used(args.idx)

# Load raw quant file
print 'Loading %s to Synapse.' % os.path.join(wd, prefix + '_quant.sf')
quantEntity = File(path=os.path.join(wd, prefix + '_quant.sf'),
                   name=prefix + '_quant.sf',
                   description='Quantified transcript isoforms.',
                   parentId=args.out,
                   synapseStore=True)
quantEntity = syn.store(quantEntity, forceVersion=False, activity=act)
syn.setAnnotations(quantEntity,
                   annotations=dict(fileType='quantitation',
                                    normalized='TPM',
                                    summaryLevel='transcript',
                                    biasCorrection='False'))
import synapseclient
from synapseclient import File, Activity

syn = synapseclient.Synapse()
syn.login()

### Ensembl raw counts
annotDict = dict()
annotDict['fileType'] = 'count'
annotDict['normalized'] = 'no'
annotDict['summaryLevel'] = 'gene'

act = Activity(name='Counting', description='Raw gene counts using HTSeq.')
act.used(['syn2290932', 'syn2215531'])  # syn2290932 is BAM, syn2215531 is GTF
act.executed('syn2243147')  # syn2243147 is htseq
counts = File(
    path=
    '/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.DataFreeze.CountMatrix_V7.ensemble.Clean.txt',
    name='PFC_CountMatrix_ensembl.txt',
    description=
    'Gene counts for all BAMs summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq are not included.',
    parentId='syn2290933',
    synapseStore=True)
counts = syn.store(counts, activity=act)
syn.setAnnotations(counts, annotations=annotDict)
Beispiel #12
0
def test_activity_used_url():
    """test activity creation with UsedURLs"""
    u1 = 'http://xkcd.com'
    u2 = {'name': 'The Onion', 'url': 'http://theonion.com'}
    u3 = {
        'name': 'Seriously advanced code',
        'url':
        'https://github.com/cbare/Pydoku/blob/ef88069f70823808f3462410e941326ae7ffbbe0/solver.py',
        'wasExecuted': True
    }
    u4 = {
        'name': 'Heavy duty algorithm',
        'url': 'https://github.com/cbare/Pydoku/blob/master/solver.py'
    }

    a = Activity(name='Foobarbat',
                 description='Apply foo to a bar and a bat',
                 used=[u1, u2, u3],
                 executed=[u3, u4])

    a.executed(url='http://cran.r-project.org/web/packages/glmnet/index.html',
               name='glm.net')
    a.used(url='http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day',
           name='earthquakes')

    u = utils._find_used(a, lambda res: 'url' in res and res['url'] == u1)
    assert u is not None
    assert u['url'] == u1
    assert not u['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'The Onion')
    assert u is not None
    assert u['url'] == 'http://theonion.com'
    assert not u['wasExecuted']

    u = utils._find_used(
        a,
        lambda res: 'name' in res and res['name'] == 'Seriously advanced code')
    assert u is not None
    assert u['url'] == u3['url']
    assert u['wasExecuted'] == u3['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'Heavy duty algorithm')
    assert u is not None
    assert u['url'] == u4['url']
    assert u['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'glm.net')
    assert u is not None
    assert u[
        'url'] == 'http://cran.r-project.org/web/packages/glmnet/index.html'
    assert u['wasExecuted']

    u = utils._find_used(
        a, lambda res: 'name' in res and res['name'] == 'earthquakes')
    assert u is not None
    assert u[
        'url'] == 'http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day'
    assert not u['wasExecuted']
import synapseclient
from synapseclient import File, Activity

syn = synapseclient.Synapse()
syn.login()

### Ensembl raw counts
annotDict = dict()
annotDict['fileType'] = 'count'
annotDict['normalized'] = 'no'
annotDict['summaryLevel'] = 'gene'

act = Activity(name='Counting', description='Raw gene counts using HTSeq.')
act.used(['syn2290932', 'syn2215531']) # syn2290932 is BAM, syn2215531 is GTF
act.executed('syn2243147') # syn2243147 is htseq
counts =
File(path='/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.'
'DataFreeze.CountMatrix_V7.ensemble.Clean.txt',
name='PFC_CountMatrix_ensembl.txt', description='Gene counts for all BAMs'
'summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq'
'are not included.', parentId='syn2290933', synapseStore=True)
counts = syn.store(counts, activity=act)
syn.setAnnotations(counts, annotations=annotDict)


# Need to check:
# - annotations: standards?
# - synapseclient: auto-return synapse id on upload
# - syn.store: specify activity independently?
# - Activity.executed: executable file, or just script?