def test_store_activity(): # Create a File and an Activity path = utils.make_bogus_binary_file() schedule_for_cleanup(path) entity = File(path, name='Hinkle horn honking holes', parent=project) honking = Activity(name='Hinkle horn honking', description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.') honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/') honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/') # This doesn't set the ID of the Activity entity = syn.store(entity, activity=honking) # But this does honking = syn.getProvenance(entity.id) # Verify the Activity assert honking['name'] == 'Hinkle horn honking' assert len(honking['used']) == 2 assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][0]['wasExecuted'] == False assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482') assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][1]['wasExecuted'] == False # Store another Entity with the same Activity entity = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', name='Nettlebed Cave', parent=project, synapseStore=False) entity = syn.store(entity, activity=honking) # The Activities should match honking2 = syn.getProvenance(entity) assert honking['id'] == honking2['id']
def test_store_activity(): """Test storing entities with Activities""" project = create_project() path = utils.make_bogus_data_file() schedule_for_cleanup(path) f = File(path, name='Hinkle horn honking holes', parent=project) honking = Activity(name='Hinkle horn honking', description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.') honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/') honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/') ## doesn't set the ID of the activity f = syn.store(f, activity=honking) honking = syn.getProvenance(f.id) ## now, we have an activity ID assert honking['name'] == 'Hinkle horn honking' assert len(honking['used']) == 2 assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][0]['wasExecuted'] == False assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482') assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][1]['wasExecuted'] == False ## store another entity with the same activity f2 = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', name='Nettlebed Cave', parent=project) f2 = syn.store(f2, activity=honking) honking2 = syn.getProvenance(f2) assert honking['id'] == honking2['id']
def loadOneSample(a): """Goes through a single json annotation file a and: 1) Finds the parent Folder where to store the file (or makes directories) 2) Fetches the md5 of any existing file and compares 3) If new or different md5 upload file. """ logging.debug("Loading:" + a) with open(a) as handle: meta = json.load(handle) dpath = re.sub(r'.json$', '', a) #Skip the rest of the loop if data file is empty or we are not doing the current acronyms if os.stat(dpath).st_size == 0 or ( args.acronym != meta['annotations']['acronym'] and args.acronym is not None): return parentId = getParentFolder(syn, args.project, meta) #Determine if we are updating an existing file and if we should update based on md5 query = "select id from entity where parentId=='%s' and name=='%s'" % ( parentId, meta['name']) res = list(syn.chunkedQuery(query)) if len(res) != 0: tmp_ent = syn.get(res[0]['entity.id'], downloadFile=False) upload = (tmp_ent.md5 != meta['annotations']['md5']) logging.debug("\tFound: %s and upload (MD5 %s match)" % (tmp_ent.id, 'DOESN\'T' if upload else 'does')) else: logging.debug("\tNot found:" + meta['name']) upload = True #Prepare the entity for upload if upload and not args.push: logging.info("\tWILL UPLOAD: %s" % meta['name']) if upload and args.push: entity = File(dpath, name=meta['name'], parentId=parentId, annotations=meta['annotations']) if 'provenance' in meta: #Fix labels for urls for u in meta['provenance']['used']: if 'name' not in u and 'url' in u: u['name'] = u['url'] prov = Activity(data=meta['provenance']) prov.executed('https://github.com/Sage-Bionetworks/tcgaImport') else: prov = None logging.debug('\tUploading:%s' % entity.name) entity = syn.store(entity, activity=prov) logging.debug('\tCreated/Updated: **** %s ****' % entity.id)
def test_activity_creation_from_dict(): """test that activities are created correctly from a dictionary""" d = { 'name': 'Project Fuzz', 'description': 'hipster beard dataset', 'used': [{ 'reference': { 'targetId': 'syn12345', 'versionNumber': 42 }, 'wasExecuted': True }] } a = Activity(data=d) assert a['name'] == 'Project Fuzz' assert a['description'] == 'hipster beard dataset' usedEntities = a['used'] assert len(usedEntities) == 1 u = usedEntities[0] assert u['wasExecuted'] assert u['reference']['targetId'] == 'syn12345' assert u['reference']['versionNumber'] == 42
def test_store_activity(): """Test storing entities with Activities""" project = create_project() path = utils.make_bogus_data_file() schedule_for_cleanup(path) f = File(path, name='Hinkle horn honking holes', parent=project) honking = Activity( name='Hinkle horn honking', description= 'Nettlebed Cave is a limestone cave located on the South Island of New Zealand.' ) honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/') honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/') ## doesn't set the ID of the activity f = syn.store(f, activity=honking) honking = syn.getProvenance(f.id) ## now, we have an activity ID assert honking['name'] == 'Hinkle horn honking' assert len(honking['used']) == 2 assert honking['used'][0][ 'concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][0]['wasExecuted'] == False assert honking['used'][0]['url'].startswith( 'http://www.flickr.com/photos/bevanbfree/3482') assert honking['used'][1][ 'concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][1]['wasExecuted'] == False ## store another entity with the same activity f2 = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', name='Nettlebed Cave', parent=project) f2 = syn.store(f2, activity=honking) honking2 = syn.getProvenance(f2) assert honking['id'] == honking2['id']
def loadOneSample(a): """Goes through a single json annotation file a and: 1) Finds the parent Folder where to store the file (or makes directories) 2) Fetches the md5 of any existing file and compares 3) If new or different md5 upload file. """ logging.debug( "Loading:" + a ) with open(a) as handle: meta = json.load(handle) dpath = re.sub(r'.json$', '', a) #Skip the rest of the loop if data file is empty or we are not doing the current acronyms if os.stat(dpath).st_size==0 or (args.acronym != meta['annotations']['acronym'] and args.acronym is not None): return parentId= getParentFolder(syn, args.project, meta) #Determine if we are updating an existing file and if we should update based on md5 query = "select id from entity where parentId=='%s' and name=='%s'" % (parentId, meta['name']) res = list(syn.chunkedQuery(query)) if len(res) != 0: tmp_ent = syn.get(res[0]['entity.id'], downloadFile=False) upload = (tmp_ent.md5 != meta['annotations']['md5']) logging.debug( "\tFound: %s and upload (MD5 %s match)" %(tmp_ent.id, 'DOESN\'T' if upload else 'does')) else: logging.debug("\tNot found:" + meta['name']) upload = True #Prepare the entity for upload if upload and not args.push: logging.info( "\tWILL UPLOAD: %s" %meta['name']) if upload and args.push: entity = File(dpath, name=meta['name'], parentId=parentId, annotations=meta['annotations']) if 'provenance' in meta: #Fix labels for urls for u in meta['provenance']['used']: if 'name' not in u and 'url' in u: u['name'] = u['url'] prov = Activity(data=meta['provenance']) prov.executed('https://github.com/Sage-Bionetworks/tcgaImport') else: prov=None logging.debug('\tUploading:%s' %entity.name) entity = syn.store(entity, activity=prov) logging.debug('\tCreated/Updated: **** %s ****' %entity.id)
def setProvenance(args, syn): """Set provenance information on a synapse entity.""" activity = Activity(name=args.name, description=args.description) if args.used: for item in _convertProvenanceList(args.used, args.limitSearch, syn): activity.used(item) if args.executed: for item in _convertProvenanceList(args.executed, args.limitSearch, syn): activity.used(item, wasExecuted=True) activity = syn.setProvenance(args.id, activity) # Display the activity record, if -o or -output specified if args.output: if args.output == 'STDOUT': sys.stdout.write(json.dumps(activity)) sys.stdout.write('\n') else: with open(args.output, 'w') as f: f.write(json.dumps(activity)) f.write('\n') else: print 'Set provenance record %s on entity %s\n' % (str( activity['id']), str(args.id))
def setProvenance(args, syn): """Set provenance information on a synapse entity.""" activity = Activity(name=args.name, description=args.description) if args.used: for item in args.used: activity.used(item) if args.executed: for item in args.executed: activity.used(item, wasExecuted=True) activity = syn.setProvenance(args.id, activity) # Display the activity record, if -o or -output specified if args.output: if args.output=='STDOUT': sys.stdout.write(json.dumps(activity)) sys.stdout.write('\n') else: with open(args.output, 'w') as f: f.write(json.dumps(activity)) f.write('\n') else: print 'Set provenance record %s on entity %s\n' % (str(activity['id']), str(args.id))
def test_activity_parameter_errors(): """Test error handling in Activity.used()""" a = Activity(name='Foobarbat', description='Apply foo to a bar and a bat') pytest.raises(SynapseMalformedEntityError, a.used, ['syn12345', 'http://google.com'], url='http://amazon.com') pytest.raises(SynapseMalformedEntityError, a.used, 'syn12345', url='http://amazon.com') pytest.raises(SynapseMalformedEntityError, a.used, 'http://amazon.com', targetVersion=1)
def add_workflow_step_to_synapse(inFilePath, stepDict, step='1', software=None, parentid=None, syn=None, stepIDs=None, inFilename=None): '''Uploads files with provenance and annotations to Synapse.''' usedList = None if not inFilename: inFilename = os.path.basename(inFilePath.strip()) if not software: software = stepDict['softwareName'] if 'used' in stepDict: usedList = stepDict['used'].strip().split(',') if 'depends' in stepDict: usedList.append(stepIDs[stepDict['depends']]) elif 'depends' in stepDict: usedList = stepIDs[stepDict['depends']] execList = stepDict['executed'].strip().split(';') act = Activity(name=stepDict['actName'], description=stepDict['description']) if usedList is not None: act.used(usedList) for item in execList: splitItem = item.split(',') target = splitItem[0] version = 1 if (len(splitItem) > 1): version = splitItem[1] if target.startswith('http'): act.executed(url=target, name=os.path.basename(target)) else: act.executed(target=target, targetVersion=version) step_file = File(path=inFilePath, name=inFilename, description=stepDict['fileDescription'], parentId=parentid, synapseStore=str2bool(stepDict['store'])) step_file = syn.store(step_file, activity=act, forceVersion=False) if 'annotations' in stepDict: syn.setAnnotations(step_file, annotations=stepDict['annotations']) print 'new entity id %s' % step_file.id return (step_file.id)
def tmp(self, path, setter, parent): try: result = syn.store(File(path, parentId=parent)) synid = result.properties['id'] setter(synid) syn.setProvenance( synid, activity=Activity(name='gTap Archive Manager')) syn.setAnnotations(synid, annotations={ 'study_id': self.consent.study_id, 'internal_id': self.consent.internal_id }) self.__log_it(f'uploaded {path} data as {synid}') os.remove(path) except Exception as e: self.__log_it(f'uploading {path} data failed with <{str(e)}>') return 0
def test_provenance(syn, project, schedule_for_cleanup): # Create a File Entity fname = utils.make_bogus_data_file() schedule_for_cleanup(fname) data_entity = syn.store(File(fname, parent=project['id'])) # Create a File Entity of Code fd, path = tempfile.mkstemp(suffix=".py") with os.fdopen(fd, 'w') as f: f.write( utils.normalize_lines(""" ## Chris's fabulous random data generator ############################################################ import random random.seed(12345) data = [random.gauss(mu=0.0, sigma=1.0) for i in range(100)] """)) schedule_for_cleanup(path) code_entity = syn.store(File(path, parent=project['id'])) # Create a new Activity asserting that the Code Entity was 'used' activity = Activity(name='random.gauss', description='Generate some random numbers') activity.used(code_entity, wasExecuted=True) activity.used( { 'name': 'Superhack', 'url': 'https://github.com/joe_coder/Superhack' }, wasExecuted=True) activity = syn.setProvenance(data_entity, activity) # Retrieve and verify the saved Provenance record retrieved_activity = syn.getProvenance(data_entity) assert retrieved_activity == activity # Test Activity update new_description = 'Generate random numbers like a gangsta' retrieved_activity['description'] = new_description updated_activity = syn.updateActivity(retrieved_activity) assert updated_activity['name'] == retrieved_activity['name'] assert updated_activity['description'] == new_description # Test delete syn.deleteProvenance(data_entity) pytest.raises(SynapseHTTPError, syn.getProvenance, data_entity['id'])
def update_figure_and_table(sources, script_commit_url=None, replace_table=False, force_update=False, dry_run=False): df_all, df, df_progress = create_metadata_df(sources) print df_progress.groupby(["source", "variant_type"])["synapse_id"].count() table = add_new_rows_to_table(df_progress, replace_table, dry_run=dry_run) if table or force_update: script_entity = syn.get(THIS_SCRIPT_SYNAPSE_ID, downloadFile=False) if script_commit_url and not dry_run: script_entity.externalURL = script_commit_url fileHandle = syn._addURLtoFileHandleService( script_commit_url, mimetype="text/x-python") script_entity.dataFileHandleId = fileHandle['id'] script_entity = syn.store(script_entity) activity = Activity( name='Pilot-63-progress', description= 'Track VCF files uploaded for the PCAWG Pilot-63 project', used=list(set(source.folder_id for source in sources)), executed=[script_entity]) if not dry_run: activity = syn.setProvenance(TABLE_SYNAPSE_ID, activity) image_filename = "pilot-63-progress.png" plot_progress(df_progress, sources, image_filename) bar_chart = syn.get(BAR_CHART_SYNAPSE_ID, downloadFile=False) bar_chart.path = "pilot-63-progress.png" bar_chart.synapseStore = True if not dry_run: bar_chart = syn.store(bar_chart, activity=activity)
def test_activity_creation_by_constructor(): """test activity creation adding used entities by the constructor""" ue1 = { 'reference': { 'targetId': 'syn101', 'targetVersionNumber': 42 }, 'wasExecuted': False } ue2 = { 'id': 'syn102', 'versionNumber': 2, 'concreteType': 'org.sagebionetworks.repo.model.FileEntity' } ue3 = 'syn103' a = Activity(name='Fuzz', description='hipster beard dataset', used=[ue1, ue3], executed=[ue2]) used_syn101 = utils._find_used( a, lambda res: res['reference']['targetId'] == 'syn101') assert used_syn101 is not None assert used_syn101['reference']['targetVersionNumber'] == 42 assert not used_syn101['wasExecuted'] used_syn102 = utils._find_used( a, lambda res: res['reference']['targetId'] == 'syn102') assert used_syn102 is not None assert used_syn102['reference']['targetVersionNumber'] == 2 assert used_syn102['wasExecuted'] used_syn103 = utils._find_used( a, lambda res: res['reference']['targetId'] == 'syn103') assert used_syn103 is not None
def test_activity_used_execute_methods(): """test activity creation and used and execute methods""" a = Activity(name='Fuzz', description='hipster beard dataset') a.used({ 'id': 'syn101', 'versionNumber': 42, 'concreteType': 'org.sagebionetworks.repo.model.FileEntity' }) a.executed('syn102', targetVersion=1) usedEntities = a['used'] len(usedEntities), 2 assert a['name'] == 'Fuzz' assert a['description'] == 'hipster beard dataset' used_syn101 = utils._find_used( a, lambda res: res['reference']['targetId'] == 'syn101') assert used_syn101['reference']['targetVersionNumber'] == 42 assert not used_syn101['wasExecuted'] used_syn102 = utils._find_used( a, lambda res: res['reference']['targetId'] == 'syn102') assert used_syn102['reference']['targetVersionNumber'] == 1 assert used_syn102['wasExecuted']
existingBAMDict[BAMentity.path] = BAMentity # Check for new BAM and submit BAMDir = os.listdir(args.bam) for dir in BAMDir: filesList = os.listdir(os.path.join(args.bam, dir)): for file in filesList: if file.endswith('.bam'): if file not in existingBAMDict: ### For qsub, the evaluation code for this submission will have to contain the provenance calls. filePath = os.path.join(args.bam, dir, file) newFile = File(filePath, description = 'BAM file of aligned reads.', parentId = foldersDict['BAM'], synapseStore = False) ## Try to extract this code to a function in seq_loading act = Activity(name='Alignment', description='Align reads to genome.') #act.executed(target='tophatid', targetVersion=version) newFile = syn.store(newFile, activity = act) submission = syn.submit(entity=newFile, evaluation = evalID, name = 'submissionTest', teamName = profile['displayName']) print 'Submitted %s to %s' % (newFile.name, countEval.name) ### Could have multiple other submissions here: count, fusion, ## Notes # Can evaluations be used or executed entities in an activity? # -->> Prefer to use/execute the code behind the eval? # How to break up data between projects? i.e. could data be generated for more than one project within the window that the cron job runs? # --> ?? Don't know yet...hope it doesn't come to parsing the sample sheet. If cron is hard-coded to check a specific directory per project, that will work. Not so much if data is dumped into same directory generically for all projects. # How are evaluations exposed to users? i.e. how will they know the eval id?
import synapseclient from synapseclient import File, Activity syn = synapseclient.Synapse() syn.login() ### Ensembl raw counts annotDict = dict() annotDict['fileType'] = 'count' annotDict['normalized'] = 'no' annotDict['summaryLevel'] = 'gene' act = Activity(name='Counting', description='Raw gene counts using HTSeq.') act.used(['syn2290932', 'syn2215531']) # syn2290932 is BAM, syn2215531 is GTF act.executed('syn2243147') # syn2243147 is htseq counts = File(path='/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.' 'DataFreeze.CountMatrix_V7.ensemble.Clean.txt', name='PFC_CountMatrix_ensembl.txt', description='Gene counts for all BAMs' 'summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq' 'are not included.', parentId='syn2290933', synapseStore=True) counts = syn.store(counts, activity=act) syn.setAnnotations(counts, annotations=annotDict) # Need to check: # - annotations: standards? # - synapseclient: auto-return synapse id on upload # - syn.store: specify activity independently? # - Activity.executed: executable file, or just script?
import synapseclient from synapseclient import File, Activity syn = synapseclient.Synapse() syn.login() ### Ensembl raw counts annotDict = dict() annotDict['fileType'] = 'count' annotDict['normalized'] = 'no' annotDict['summaryLevel'] = 'gene' act = Activity(name='Counting', description='Raw gene counts using HTSeq.') act.used(['syn2290932', 'syn2215531']) # syn2290932 is BAM, syn2215531 is GTF act.executed('syn2243147') # syn2243147 is htseq counts = File( path= '/projects/CommonMind/data/FROM_CORE/Production/readCounts/CMC.DataFreeze.CountMatrix_V7.ensemble.Clean.txt', name='PFC_CountMatrix_ensembl.txt', description= 'Gene counts for all BAMs summarized using Ensembl gene models. QC counts (e.g. \"ambiguous\") from HTSeq are not included.', parentId='syn2290933', synapseStore=True) counts = syn.store(counts, activity=act) syn.setAnnotations(counts, annotations=annotDict)
import sys import json import synapseclient from synapseclient import File, Activity, Wiki syn = synapseclient.login() input_path = sys.argv[1] with open(input_path + ".json") as handle: meta_data = json.loads(handle.read()) DST_FOLDER = 'syn3079564' #test upload folder #Create Provenance log provenance = Activity(name=meta_data['activity'], desciption=meta_data['description'], used = meta_data['used'] exectuted = meta_data['used'] ) #prov = syn.store(prov) name = of.path.basename(input_path) #Add metadata to files to be uploaded f = File(input_path, name = name, parentId=DST_FOLDER) f.dataType = meta_data['dataType'] f.fileType = meta_data['dataType'] f.variant_workflow = meta_data['workflow'] f.variant_workflow_version = meta_data['workflowVersion'] f.call_type = call_type f.reference_build = meta_data['referenceBuild'] f.center_name = meta_data['center_name']
def test_syncFromSynase__manifest(syn): """Verify that we generate manifest files when syncing to a location outside of the cache.""" project = Project(name="the project", parent="whatever", id="syn123") path1 = '/tmp/foo' file1 = File(name="file1", parent=project, id="syn456", path=path1) path2 = '/tmp/afolder/bar' file2 = File(name="file2", parent=project, id="syn789", parentId='syn098', path=path2) folder = Folder(name="afolder", parent=project, id="syn098") entities = { file1.id: file1, file2.id: file2, folder.id: folder, } def syn_get_side_effect(entity, *args, **kwargs): return entities[id_of(entity)] file_1_provenance = Activity(data={ 'used': '', 'executed': '', }) file_2_provenance = Activity(data={ 'used': '', 'executed': '', 'name': 'foo', 'description': 'bar', }) provenance = { file1.id: file_1_provenance, file2.id: file_2_provenance, } def getProvenance_side_effect(entity, *args, **kwargs): return provenance[id_of(entity)] expected_project_manifest = \ f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription {path1}\tsyn123\tfile1\tTrue\t\t\t\t\t {path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar """ expected_folder_manifest = \ f"""path\tparent\tname\tsynapseStore\tcontentType\tused\texecuted\tactivityName\tactivityDescription {path2}\tsyn098\tfile2\tTrue\t\t\t\tfoo\tbar """ expected_synced_files = [file2, file1] with tempfile.TemporaryDirectory() as sync_dir: with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\ patch.object(syn, "get", side_effect=syn_get_side_effect),\ patch.object(syn, "getProvenance") as patch_syn_get_provenance: patch_syn_get_provenance.side_effect = getProvenance_side_effect synced_files = synapseutils.syncFromSynapse(syn, project, path=sync_dir) assert sorted([id_of(e) for e in expected_synced_files ]) == sorted([id_of(e) for e in synced_files]) # we only expect two calls to provenance even though there are three rows of provenance data # in the manifests (two in the outer project, one in the folder) # since one of the files is repeated in both manifests we expect only the single get provenance call assert len( expected_synced_files) == patch_syn_get_provenance.call_count # we should have two manifest files, one rooted at the project and one rooted in the sub folder _compareCsv( expected_project_manifest, os.path.join(sync_dir, synapseutils.sync.MANIFEST_FILENAME)) _compareCsv( expected_folder_manifest, os.path.join(sync_dir, folder.name, synapseutils.sync.MANIFEST_FILENAME))
cmd = ' '.join(['featureCounts -p -t exon -g gene_id -a', gtf, '-o', outputFile, '-s', args.strand, '-T', args.thread, localBAMfilePath]) print 'featurecounts start %s' % time.asctime() print >> commandsFile, '%s' % cmd subprocess.call(cmd, shell = True) print 'featurecounts end %s' % time.asctime() ## Load results to synapse # Set up provenance. print 'Loading %s to Synapse.' % cfPath commandsFile.close() cf = File(path=cfPath, description='Job commands.', parentId=args.out, synapseStore=True) cf = syn.store(cf, activityName='count_evaluation', executed=['https://github.com/Sage-Bionetworks/synapse-seq/blob/master/scripts/eval_counts_featurecounts.py']) act = Activity(name='Read counting', description='Counting aligned reads to GTF features using featurecounts.', executed=['syn2807330', cf.id]) act.used(target=submission.entityId, targetVersion=submission.versionNumber) act.used(args.gtf) # Load raw count file print 'Loading %s to Synapse.' % outputFile quantEntity = File(path=outputFile, name=prefix+'_gene_counts.txt', description='Read counts summarized at gene level.', parentId=args.out, synapseStore=True) quantEntity = syn.store(quantEntity, forceVersion=False, activity=act) syn.setAnnotations(quantEntity, annotations=dict(fileType='count',normalized='no',summaryLevel='gene',biasCorrection='False')) print 'new entity id %s' % quantEntity.id ## Use this code after related JIRA is resolved ## Load metrics to Synapse table # table = syn.get(syncfg.featurecountsMetricsTable)
def test_copy(): """Tests the copy function""" # Create a Project project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) acl = syn.setPermissions( project_entity, other_user['principalId'], accessType=['READ', 'CREATE', 'UPDATE', 'DOWNLOAD']) # Create two Folders in Project folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) second_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) third_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) schedule_for_cleanup(second_folder.id) schedule_for_cleanup(third_folder.id) # Annotations and provenance repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient' annots = {'test': ['hello_world']} prov = Activity(name="test", used=repo_url) # Create, upload, and set annotations/provenance on a file in Folder filename = utils.make_bogus_data_file() schedule_for_cleanup(filename) file_entity = syn.store(File(filename, parent=folder_entity)) externalURL_entity = syn.store( File(repo_url, name='rand', parent=folder_entity, synapseStore=False)) syn.setAnnotations(file_entity, annots) syn.setAnnotations(externalURL_entity, annots) syn.setProvenance(externalURL_entity.id, prov) schedule_for_cleanup(file_entity.id) schedule_for_cleanup(externalURL_entity.id) # ------------------------------------ # TEST COPY FILE # ------------------------------------ output = synapseutils.copy(syn, file_entity.id, destinationId=project_entity.id) output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=project_entity.id, skipCopyAnnotations=True) #Verify that our copied files are identical copied_ent = syn.get(output[file_entity.id]) copied_URL_ent = syn.get(output_URL[externalURL_entity.id], downloadFile=False) copied_ent_annot = syn.getAnnotations(copied_ent) copied_url_annot = syn.getAnnotations(copied_URL_ent) copied_prov = syn.getProvenance(copied_ent) copied_url_prov = syn.getProvenance(copied_URL_ent) schedule_for_cleanup(copied_ent.id) schedule_for_cleanup(copied_URL_ent.id) # TEST: set_Provenance = Traceback print("Test: setProvenance = Traceback") assert copied_prov['used'][0]['reference']['targetId'] == file_entity.id assert copied_url_prov['used'][0]['reference'][ 'targetId'] == externalURL_entity.id # TEST: Make sure copied files are the same assert copied_ent_annot == annots assert copied_ent.dataFileHandleId == file_entity.dataFileHandleId # TEST: Make sure copied URLs are the same assert copied_url_annot == {} assert copied_URL_ent.externalURL == repo_url assert copied_URL_ent.name == 'rand' assert copied_URL_ent.dataFileHandleId == externalURL_entity.dataFileHandleId # TEST: Throw error if file is copied to a folder/project that has a file with the same filename assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=project_entity.id) assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=third_folder.id, setProvenance="gib") assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=file_entity.id) print("Test: setProvenance = None") output = synapseutils.copy(syn, file_entity.id, destinationId=second_folder.id, setProvenance=None) assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id]) schedule_for_cleanup(output[file_entity.id]) print("Test: setProvenance = Existing") output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=second_folder.id, setProvenance="existing") output_prov = syn.getProvenance(output_URL[externalURL_entity.id]) schedule_for_cleanup(output_URL[externalURL_entity.id]) assert output_prov['name'] == prov['name'] assert output_prov['used'] == prov['used'] if 'username' not in other_user or 'password' not in other_user: sys.stderr.write( '\nWarning: no test-authentication configured. skipping testing copy function when trying to copy file made by another user.\n' ) return try: print( "Test: Other user copy should result in different data file handle" ) syn_other = synapseclient.Synapse(skip_checks=True) syn_other.login(other_user['username'], other_user['password']) output = synapseutils.copy(syn_other, file_entity.id, destinationId=third_folder.id) new_copied_ent = syn.get(output[file_entity.id]) new_copied_ent_annot = syn.getAnnotations(new_copied_ent) schedule_for_cleanup(new_copied_ent.id) copied_URL_ent.externalURL = "https://www.google.com" copied_URL_ent = syn.store(copied_URL_ent) output = synapseutils.copy(syn_other, copied_URL_ent.id, destinationId=third_folder.id, version=1) new_copied_URL = syn.get(output[copied_URL_ent.id], downloadFile=False) schedule_for_cleanup(new_copied_URL.id) assert new_copied_ent_annot == annots assert new_copied_ent.dataFileHandleId != copied_ent.dataFileHandleId #Test if copying different versions gets you the correct file assert new_copied_URL.versionNumber == 1 assert new_copied_URL.externalURL == repo_url assert new_copied_URL.dataFileHandleId != copied_URL_ent.dataFileHandleId finally: syn_other.logout() # ------------------------------------ # TEST COPY LINKS # ------------------------------------ print("Test: Copy Links") second_file = utils.make_bogus_data_file() #schedule_for_cleanup(filename) second_file_entity = syn.store(File(second_file, parent=project_entity)) link_entity = Link(second_file_entity.id, parent=folder_entity.id) link_entity = syn.store(link_entity) #function under test uses queries which are eventually consistent but not immediately after creating the entities start_time = time.time() while syn.query("select id from entity where id=='%s'" % link_entity.id).get('totalNumberOfResults') <= 0: assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) copied_link = synapseutils.copy(syn, link_entity.id, destinationId=second_folder.id) old = syn.get(link_entity.id, followLink=False) new = syn.get(copied_link[link_entity.id], followLink=False) assert old.linksTo['targetId'] == new.linksTo['targetId'] assert old.linksTo['targetVersionNumber'] == new.linksTo[ 'targetVersionNumber'] schedule_for_cleanup(second_file_entity.id) schedule_for_cleanup(link_entity.id) schedule_for_cleanup(copied_link[link_entity.id]) time.sleep(3) assert_raises(ValueError, synapseutils.copy, syn, link_entity.id, destinationId=second_folder.id) # ------------------------------------ # TEST COPY TABLE # ------------------------------------ second_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(second_project.id) print("Test: Copy Tables") cols = [ Column(name='n', columnType='DOUBLE', maximumSize=50), Column(name='c', columnType='STRING', maximumSize=50), Column(name='i', columnType='INTEGER') ] data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]] schema = syn.store( Schema(name='Testing', columns=cols, parent=project_entity.id)) row_reference_set = syn.store( RowSet(columns=cols, schema=schema, rows=[Row(r) for r in data])) table_map = synapseutils.copy(syn, schema.id, destinationId=second_project.id) copied_table = syn.tableQuery('select * from %s' % table_map[schema.id]) rows = copied_table.asRowSet()['rows'] # TEST: Check if all values are the same for i, row in enumerate(rows): assert row['values'] == data[i] assert_raises(ValueError, synapseutils.copy, syn, schema.id, destinationId=second_project.id) schedule_for_cleanup(schema.id) schedule_for_cleanup(table_map[schema.id]) # ------------------------------------ # TEST COPY FOLDER # ------------------------------------ print("Test: Copy Folder") mapping = synapseutils.copy(syn, folder_entity.id, destinationId=second_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) assert old.name == new.name assert old.annotations == new.annotations assert old.concreteType == new.concreteType assert_raises(ValueError, synapseutils.copy, syn, folder_entity.id, destinationId=second_project.id) # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes=["foo"]) assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id, excludeTypes="file") # TEST: excludeType = ["file"], only the folder is created second = synapseutils.copy(syn, second_folder.id, destinationId=second_project.id, excludeTypes=["file", "table", "link"]) copied_folder = syn.get(second[second_folder.id]) assert copied_folder.name == second_folder.name assert len(second) == 1 # TEST: Make sure error is thrown if foldername already exists start_time = time.time() while syn.query("select id from entity where id=='%s'" % copied_folder.id).get('totalNumberOfResults') <= 0: assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC) time.sleep(2) assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id) # ------------------------------------ # TEST COPY PROJECT # ------------------------------------ print("Test: Copy Project") third_project = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(third_project.id) mapping = synapseutils.copy(syn, project_entity.id, destinationId=third_project.id) for i in mapping: old = syn.get(i, downloadFile=False) new = syn.get(mapping[i], downloadFile=False) if not isinstance(old, Project): assert old.name == new.name assert old.annotations == new.annotations assert old.concreteType == new.concreteType # TEST: Can't copy project to a folder assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=second_folder.id)
def test_activity_used_url(): """test activity creation with UsedURLs""" u1 = 'http://xkcd.com' u2 = {'name': 'The Onion', 'url': 'http://theonion.com'} u3 = { 'name': 'Seriously advanced code', 'url': 'https://github.com/cbare/Pydoku/blob/ef88069f70823808f3462410e941326ae7ffbbe0/solver.py', 'wasExecuted': True } u4 = { 'name': 'Heavy duty algorithm', 'url': 'https://github.com/cbare/Pydoku/blob/master/solver.py' } a = Activity(name='Foobarbat', description='Apply foo to a bar and a bat', used=[u1, u2, u3], executed=[u3, u4]) a.executed(url='http://cran.r-project.org/web/packages/glmnet/index.html', name='glm.net') a.used(url='http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day', name='earthquakes') u = utils._find_used(a, lambda res: 'url' in res and res['url'] == u1) assert u is not None assert u['url'] == u1 assert not u['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'The Onion') assert u is not None assert u['url'] == 'http://theonion.com' assert not u['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'Seriously advanced code') assert u is not None assert u['url'] == u3['url'] assert u['wasExecuted'] == u3['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'Heavy duty algorithm') assert u is not None assert u['url'] == u4['url'] assert u['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'glm.net') assert u is not None assert u[ 'url'] == 'http://cran.r-project.org/web/packages/glmnet/index.html' assert u['wasExecuted'] u = utils._find_used( a, lambda res: 'name' in res and res['name'] == 'earthquakes') assert u is not None assert u[ 'url'] == 'http://earthquake.usgs.gov/earthquakes/feed/geojson/2.5/day' assert not u['wasExecuted']
def get_activity(self, entity: Entity, version=None) -> Activity: try: act = self.getProvenance(entity, version) except (SynapseHTTPError, ValueError): act = Activity() return act