def test_store_activity(): # Create a File and an Activity path = utils.make_bogus_binary_file() schedule_for_cleanup(path) entity = File(path, name='Hinkle horn honking holes', parent=project) honking = Activity(name='Hinkle horn honking', description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.') honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/') honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/') # This doesn't set the ID of the Activity entity = syn.store(entity, activity=honking) # But this does honking = syn.getProvenance(entity.id) # Verify the Activity assert honking['name'] == 'Hinkle horn honking' assert len(honking['used']) == 2 assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][0]['wasExecuted'] == False assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482') assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][1]['wasExecuted'] == False # Store another Entity with the same Activity entity = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', name='Nettlebed Cave', parent=project, synapseStore=False) entity = syn.store(entity, activity=honking) # The Activities should match honking2 = syn.getProvenance(entity) assert honking['id'] == honking2['id']
def setProvenance(args, syn): """Set provenance information on a synapse entity.""" activity = Activity(name=args.name, description=args.description) if args.used: for item in _convertProvenanceList(args.used, args.limitSearch, syn): activity.used(item) if args.executed: for item in _convertProvenanceList(args.executed, args.limitSearch, syn): activity.used(item, wasExecuted=True) activity = syn.setProvenance(args.id, activity) # Display the activity record, if -o or -output specified if args.output: if args.output == 'STDOUT': sys.stdout.write(json.dumps(activity)) sys.stdout.write('\n') else: with open(args.output, 'w') as f: f.write(json.dumps(activity)) f.write('\n') else: print 'Set provenance record %s on entity %s\n' % (str( activity['id']), str(args.id))
def test_store_activity(): """Test storing entities with Activities""" project = create_project() path = utils.make_bogus_data_file() schedule_for_cleanup(path) f = File(path, name='Hinkle horn honking holes', parent=project) honking = Activity(name='Hinkle horn honking', description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.') honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/') honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/') ## doesn't set the ID of the activity f = syn.store(f, activity=honking) honking = syn.getProvenance(f.id) ## now, we have an activity ID assert honking['name'] == 'Hinkle horn honking' assert len(honking['used']) == 2 assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][0]['wasExecuted'] == False assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482') assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][1]['wasExecuted'] == False ## store another entity with the same activity f2 = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', name='Nettlebed Cave', parent=project) f2 = syn.store(f2, activity=honking) honking2 = syn.getProvenance(f2) assert honking['id'] == honking2['id']
def add_workflow_step_to_synapse(inFilePath, stepDict, step='1', software=None, parentid=None, syn=None, stepIDs=None, inFilename=None): '''Uploads files with provenance and annotations to Synapse.''' usedList = None if not inFilename: inFilename = os.path.basename(inFilePath.strip()) if not software: software = stepDict['softwareName'] if 'used' in stepDict: usedList = stepDict['used'].strip().split(',') if 'depends' in stepDict: usedList.append(stepIDs[stepDict['depends']]) elif 'depends' in stepDict: usedList = stepIDs[stepDict['depends']] execList = stepDict['executed'].strip().split(';') act = Activity(name=stepDict['actName'], description=stepDict['description']) if usedList is not None: act.used(usedList) for item in execList: splitItem = item.split(',') target = splitItem[0] version = 1 if (len(splitItem) > 1): version = splitItem[1] if target.startswith('http'): act.executed(url=target, name=os.path.basename(target)) else: act.executed(target=target, targetVersion=version) step_file = File(path=inFilePath, name=inFilename, description=stepDict['fileDescription'], parentId=parentid, synapseStore=str2bool(stepDict['store'])) step_file = syn.store(step_file, activity=act, forceVersion=False) if 'annotations' in stepDict: syn.setAnnotations(step_file, annotations=stepDict['annotations']) print 'new entity id %s' % step_file.id return (step_file.id)
def test_provenance(syn, project, schedule_for_cleanup): # Create a File Entity fname = utils.make_bogus_data_file() schedule_for_cleanup(fname) data_entity = syn.store(File(fname, parent=project['id'])) # Create a File Entity of Code fd, path = tempfile.mkstemp(suffix=".py") with os.fdopen(fd, 'w') as f: f.write( utils.normalize_lines(""" ## Chris's fabulous random data generator ############################################################ import random random.seed(12345) data = [random.gauss(mu=0.0, sigma=1.0) for i in range(100)] """)) schedule_for_cleanup(path) code_entity = syn.store(File(path, parent=project['id'])) # Create a new Activity asserting that the Code Entity was 'used' activity = Activity(name='random.gauss', description='Generate some random numbers') activity.used(code_entity, wasExecuted=True) activity.used( { 'name': 'Superhack', 'url': 'https://github.com/joe_coder/Superhack' }, wasExecuted=True) activity = syn.setProvenance(data_entity, activity) # Retrieve and verify the saved Provenance record retrieved_activity = syn.getProvenance(data_entity) assert retrieved_activity == activity # Test Activity update new_description = 'Generate random numbers like a gangsta' retrieved_activity['description'] = new_description updated_activity = syn.updateActivity(retrieved_activity) assert updated_activity['name'] == retrieved_activity['name'] assert updated_activity['description'] == new_description # Test delete syn.deleteProvenance(data_entity) pytest.raises(SynapseHTTPError, syn.getProvenance, data_entity['id'])
def test_store_activity(): """Test storing entities with Activities""" project = create_project() path = utils.make_bogus_data_file() schedule_for_cleanup(path) f = File(path, name='Hinkle horn honking holes', parent=project) honking = Activity( name='Hinkle horn honking', description= 'Nettlebed Cave is a limestone cave located on the South Island of New Zealand.' ) honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/') honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/') ## doesn't set the ID of the activity f = syn.store(f, activity=honking) honking = syn.getProvenance(f.id) ## now, we have an activity ID assert honking['name'] == 'Hinkle horn honking' assert len(honking['used']) == 2 assert honking['used'][0][ 'concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][0]['wasExecuted'] == False assert honking['used'][0]['url'].startswith( 'http://www.flickr.com/photos/bevanbfree/3482') assert honking['used'][1][ 'concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][1]['wasExecuted'] == False ## store another entity with the same activity f2 = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', name='Nettlebed Cave', parent=project) f2 = syn.store(f2, activity=honking) honking2 = syn.getProvenance(f2) assert honking['id'] == honking2['id']
def setProvenance(args, syn): """Set provenance information on a synapse entity.""" activity = Activity(name=args.name, description=args.description) if args.used: for item in args.used: activity.used(item) if args.executed: for item in args.executed: activity.used(item, wasExecuted=True) activity = syn.setProvenance(args.id, activity) # Display the activity record, if -o or -output specified if args.output: if args.output=='STDOUT': sys.stdout.write(json.dumps(activity)) sys.stdout.write('\n') else: with open(args.output, 'w') as f: f.write(json.dumps(activity)) f.write('\n') else: print 'Set provenance record %s on entity %s\n' % (str(activity['id']), str(args.id))
def test_activity_used_execute_methods(): """test activity creation and used and execute methods""" a = Activity(name='Fuzz', description='hipster beard dataset') a.used({ 'id': 'syn101', 'versionNumber': 42, 'concreteType': 'org.sagebionetworks.repo.model.FileEntity' }) a.executed('syn102', targetVersion=1) usedEntities = a['used'] len(usedEntities), 2 assert a['name'] == 'Fuzz' assert a['description'] == 'hipster beard dataset' used_syn101 = utils._find_used( a, lambda res: res['reference']['targetId'] == 'syn101') assert used_syn101['reference']['targetVersionNumber'] == 42 assert not used_syn101['wasExecuted'] used_syn102 = utils._find_used( a, lambda res: res['reference']['targetId'] == 'syn102') assert used_syn102['reference']['targetVersionNumber'] == 1 assert used_syn102['wasExecuted']
commandsFile.close() cf = File(path=cfPath, description='Job commands.', parentId=args.out, synapseStore=True) cf = syn.store( cf, activityName='quant_evaluation', executed=[ 'https://github.com/Sage-Bionetworks/synapse-seq/blob/master/scripts/eval_quant_sailfish.py' ]) act = Activity( name='transcript quantitation', description='Alignment-free transcript quantitation using Sailfish.', executed=['syn2325155', cf.id]) act.used(target=submission.entityId, targetVersion=submission.versionNumber) act.used(args.idx) # Load raw quant file print 'Loading %s to Synapse.' % os.path.join(wd, prefix + '_quant.sf') quantEntity = File(path=os.path.join(wd, prefix + '_quant.sf'), name=prefix + '_quant.sf', description='Quantified transcript isoforms.', parentId=args.out, synapseStore=True) quantEntity = syn.store(quantEntity, forceVersion=False, activity=act) syn.setAnnotations(quantEntity, annotations=dict(fileType='quantitation', normalized='TPM', summaryLevel='transcript', biasCorrection='False'))
import synapseclient from synapseclient import File, Activity syn = synapseclient.Synapse() syn.login() ### Ensembl raw counts annotDict = dict() annotDict['fileType'] = 'count' annotDict['normalized'] = 'no' annotDict['summaryLevel'] = 'gene' act = Activity(name='Counting', description='Raw gene counts using HTSeq.') act.used(['syn2290932', 'syn2215531']) # syn2290932 is BAM, syn2215531 is GTF act.executed('syn2243147') # syn2243147 is htseq counts = File( path= '/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.DataFreeze.CountMatrix_V7.ensemble.Clean.txt', name='PFC_CountMatrix_ensembl.txt', description= 'Gene counts for all BAMs summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq are not included.', parentId='syn2290933', synapseStore=True) counts = syn.store(counts, activity=act) syn.setAnnotations(counts, annotations=annotDict)
def test_activity_used_url(): """test activity creation with UsedURLs""" u1 = 'http://xkcd.com' u2 = {'name': 'The Onion', 'url': 'http://theonion.com'} u3 = { 'name': 'Seriously advanced code', 'url': 'https://github.com/cbare/Pydoku/blob/ef88069f70823808f3462410e941326ae7ffbbe0/solver.py', 'wasExecuted': True } u4 = { 'name': 'Heavy duty algorithm', 'url': 'https://github.com/cbare/Pydoku/blob/master/solver.py' } a = Activity(name='Foobarbat', description='Apply foo to a bar and a bat', used=[u1, u2, u3], executed=[u3, u4]) a.executed(url='http://cran.r-project.org/web/packages/glmnet/index.html', name='glm.net') a.used(url='http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day', name='earthquakes') u = utils._find_used(a, lambda res: 'url' in res and res['url'] == u1) assert u is not None assert u['url'] == u1 assert not u['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'The Onion') assert u is not None assert u['url'] == 'http://theonion.com' assert not u['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'Seriously advanced code') assert u is not None assert u['url'] == u3['url'] assert u['wasExecuted'] == u3['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'Heavy duty algorithm') assert u is not None assert u['url'] == u4['url'] assert u['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'glm.net') assert u is not None assert u[ 'url'] == 'http://cran.r-project.org/web/packages/glmnet/index.html' assert u['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'earthquakes') assert u is not None assert u[ 'url'] == 'http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day' assert not u['wasExecuted']
import synapseclient from synapseclient import File, Activity syn = synapseclient.Synapse() syn.login() ### Ensembl raw counts annotDict = dict() annotDict['fileType'] = 'count' annotDict['normalized'] = 'no' annotDict['summaryLevel'] = 'gene' act = Activity(name='Counting', description='Raw gene counts using HTSeq.') act.used(['syn2290932', 'syn2215531']) # syn2290932 is BAM, syn2215531 is GTF act.executed('syn2243147') # syn2243147 is htseq counts = File(path='/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.' 'DataFreeze.CountMatrix_V7.ensemble.Clean.txt', name='PFC_CountMatrix_ensembl.txt', description='Gene counts for all BAMs' 'summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq' 'are not included.', parentId='syn2290933', synapseStore=True) counts = syn.store(counts, activity=act) syn.setAnnotations(counts, annotations=annotDict) # Need to check: # - annotations: standards? # - synapseclient: auto-return synapse id on upload # - syn.store: specify activity independently? # - Activity.executed: executable file, or just script?