def uploadToSynapse(f): """Given a filepath extracts metadata and uploads to Synapse""" center, sample_id, workflow_name, date, call_type, dataType, fileType = ['']*7 url = URLBASE+f if 'OICR_BL' in f: center = 'oicr_bl' elif 'CRG/clindel/somatic' in f: center = 'crg_clindel' else: center = f.split('/')[4] filename = f.split('/')[-1] if center in ('yale', 'wustl', 'LOHcomplete'): if filename =='bd829214-f230-4331-b234-def10bbe7938CNV.vcf.gz': sample_id, dataType, fileType='bd829214-f230-4331-b234-def10bbe7938', 'cnv', 'vcf' else: sample_id, dataType = filename.lower().split('.')[:2] fileType = [i for i in filename.split('.')[2:] if i != 'gz'][-1] elif center in ('broad', 'BSC', 'oicr_sga', 'mda_kchen', 'MDA_HGSC', 'mcgill_popsv', 'sfu', 'UCSC', 'oicr_bl', 'Synteka_pgm21', 'crg_clindel'): sample_id, workflow_name, date, call_type, dataType = filename.replace('indels', 'indel', split('.')[:5]) fileType = [i for i in filename.split('.')[5:] if i != 'gz'][-1] else: print 'Not uploading:', f return print center, workflow_name, date, call_type, dataType, fileType file = File(url, parentId=DIRS[center], synapseStore=False) file.center = center.lower() file.sample_id = sample_id file.workflow_name = workflow_name file.date = date file.call_type = call_type file.dataType = 'DNA' file.disease = 'Cancer' file.dataSubType = dataType file.fileType = fileType #file.analysis_id_tumor = ????? syn.store(file, forceVersion=False)
def test_Entity(): # Test CRUD on Entity objects, Project, Folder, File with createEntity/getEntity/updateEntity project_name = str(uuid.uuid4()) project = Project(project_name, description='Bogus testing project') project = syn.createEntity(project) schedule_for_cleanup(project) folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000) folder = syn.createEntity(folder) path = utils.make_bogus_data_file() schedule_for_cleanup(path) a_file = File(path, parent=folder, description='Random data for testing', foo='An arbitrary value', bar=[33,44,55], bday=Datetime(2013,3,15)) a_file = syn._createFileEntity(a_file) ## local object state should be preserved assert a_file.path == path ## check the project entity project = syn.getEntity(project) assert project.name == project_name ## check the folder entity folder = syn.getEntity(folder.id) assert folder.name == 'Test Folder' assert folder.parentId == project.id assert folder.foo[0] == 1000 ## check the file entity a_file = syn.getEntity(a_file) assert a_file['foo'][0] == 'An arbitrary value' assert a_file['bar'] == [33,44,55] assert a_file['bday'][0] == Datetime(2013,3,15) ## make sure file comes back intact a_file = syn.downloadEntity(a_file) assert filecmp.cmp(path, a_file.path) #TODO We're forgotten the local file path a_file.path = path ## update the file entity a_file['foo'] = 'Another arbitrary chunk of text data' a_file['new_key'] = 'A newly created value' a_file = syn.updateEntity(a_file) assert a_file['foo'][0] == 'Another arbitrary chunk of text data' assert a_file['bar'] == [33,44,55] assert a_file['bday'][0] == Datetime(2013,3,15) assert a_file.new_key[0] == 'A newly created value' assert a_file.path == path ## upload a new file new_path = utils.make_bogus_data_file() schedule_for_cleanup(new_path) a_file = syn.uploadFile(a_file, new_path) ## make sure file comes back intact a_file = syn.downloadEntity(a_file) assert filecmp.cmp(new_path, a_file.path)
def test_round_trip(): fh = None filepath = utils.make_bogus_binary_file(6*MB + 777771, verbose=True) print 'Made bogus file: ', filepath try: fh = syn._chunkedUploadFile(filepath, verbose=False) # print 'FileHandle:' # syn.printEntity(fh) # Download the file and compare it with the original junk = File(filepath, parent=project, dataFileHandleId=fh['id']) junk.properties.update(syn._createEntity(junk.properties)) junk.update(syn._downloadFileEntity(junk, filepath)) assert filecmp.cmp(filepath, junk.path) finally: try: if 'junk' in locals(): syn.delete(junk) except Exception: print traceback.format_exc() try: os.remove(filepath) except Exception: print traceback.format_exc() if fh: # print 'Deleting fileHandle', fh['id'] syn._deleteFileHandle(fh)
def test_uploadFileEntity(syn, project, schedule_for_cleanup): # Create a FileEntity # Dictionaries default to FileEntity as a type fname = utils.make_bogus_data_file() schedule_for_cleanup(fname) entity = File(name='fooUploadFileEntity', path=fname, parentId=project['id'], description='A test file entity') entity = syn.store(entity) # Download and verify entity = syn.get(entity) assert entity['files'][0] == os.path.basename(fname) assert filecmp.cmp(fname, entity['path']) # Check if we upload the wrong type of file handle fh = syn.restGET('/entity/%s/filehandles' % entity.id)['list'][0] assert fh[ 'concreteType'] == 'org.sagebionetworks.repo.model.file.S3FileHandle' # Create a different temporary file fname = utils.make_bogus_data_file() schedule_for_cleanup(fname) # Update existing FileEntity entity.path = fname entity = syn.store(entity) # Download and verify that it is the same file entity = syn.get(entity) assert entity['files'][0] == os.path.basename(fname) assert filecmp.cmp(fname, entity['path'])
def test_store_activity(): # Create a File and an Activity path = utils.make_bogus_binary_file() schedule_for_cleanup(path) entity = File(path, name='Hinkle horn honking holes', parent=project) honking = Activity(name='Hinkle horn honking', description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.') honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/') honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/') # This doesn't set the ID of the Activity entity = syn.store(entity, activity=honking) # But this does honking = syn.getProvenance(entity.id) # Verify the Activity assert honking['name'] == 'Hinkle horn honking' assert len(honking['used']) == 2 assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][0]['wasExecuted'] == False assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482') assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL' assert honking['used'][1]['wasExecuted'] == False # Store another Entity with the same Activity entity = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', name='Nettlebed Cave', parent=project, synapseStore=False) entity = syn.store(entity, activity=honking) # The Activities should match honking2 = syn.getProvenance(entity) assert honking['id'] == honking2['id']
def test_synapseStore_flag(): # Store a path to a local file path = utils.make_bogus_data_file() schedule_for_cleanup(path) bogus = File(path, name='Totally bogus data', parent=project, synapseStore=False) bogus = syn.store(bogus) # Verify the thing can be downloaded as a URL bogus = syn.get(bogus, downloadFile=False) assert bogus.name == 'Totally bogus data' assert bogus.path == path, "Path: %s\nExpected: %s" % (bogus.path, path) assert bogus.synapseStore == False # Make sure the test runs on Windows and other OS's if path[0].isalpha() and path[1]==':': # A Windows file URL looks like this: file:///c:/foo/bar/bat.txt expected_url = 'file:///' + path.replace("\\","/") else: expected_url = 'file://' + path assert bogus.externalURL == expected_url, 'URL: %s\nExpected %s' % (bogus.externalURL, expected_url) # A file path that doesn't exist should still work bogus = File('/path/to/local/file1.xyz', parentId=project.id, synapseStore=False) bogus = syn.store(bogus) assert_raises(IOError, syn.get, bogus) assert bogus.synapseStore == False # Try a URL bogus = File('http://dev-versions.synapse.sagebase.org/synapsePythonClient', parent=project, synapseStore=False) bogus = syn.store(bogus) bogus = syn.get(bogus) assert bogus.synapseStore == False
def test_syncFromSynapse(): """This function tests recursive download as defined in syncFromSynapse most of the functionality of this function are already tested in the tests/integration/test_command_line_client::test_command_get_recursive_and_query which means that the only test if for path=None """ # Create a Project project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) # Create a Folder in Project folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) # Create and upload two files in Folder uploaded_paths = [] for i in range(2): f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=folder_entity)) # Add a file in the project level as well f = utils.make_bogus_data_file() uploaded_paths.append(f) schedule_for_cleanup(f) syn.store(File(f, parent=project_entity)) # Test recursive get output = synapseutils.syncFromSynapse(syn, project_entity) assert_equals(len(output), len(uploaded_paths)) for f in output: assert_in(f.path, uploaded_paths)
def test_upload_string(): ## This tests the utility that uploads a _string_ rather than ## a file on disk, to S3. fh = None content = "My dog has fleas.\n" f = tempfile.NamedTemporaryFile(suffix=".txt", delete=False) f.write(content) f.close() filepath = f.name print 'Made bogus file: ', filepath try: fh = syn._uploadStringToFile(content) # print 'FileHandle:' # syn.printEntity(fh) # Download the file and compare it with the original junk = File(filepath, parent=project, dataFileHandleId=fh['id']) junk.properties.update(syn._createEntity(junk.properties)) junk.update(syn._downloadFileEntity(junk, filepath)) assert filecmp.cmp(filepath, junk.path) finally: try: if 'junk' in locals(): syn.delete(junk) except Exception: print traceback.format_exc() try: os.remove(filepath) except Exception: print traceback.format_exc()
def test_get_local_file(): """Tests synapse.get() with local a local file """ new_path = utils.make_bogus_data_file() schedule_for_cleanup(new_path) folder = Folder('TestFindFileFolder', parent=project, description='A place to put my junk') folder = syn.createEntity(folder) #Get an nonexistent file in Synapse assert_raises(SynapseError, syn.get, new_path) #Get a file really stored in Synapse ent_folder = syn.store(File(new_path, parent=folder)) ent2 = syn.get(new_path) assert ent_folder.id==ent2.id and ent_folder.versionNumber==ent2.versionNumber #Get a file stored in Multiple locations #should display warning ent = syn.store(File(new_path, parent=project)) ent = syn.get(new_path) #Get a file stored in multiple locations with limit set ent = syn.get(new_path, limitSearch=folder.id) assert ent.id == ent_folder.id and ent.versionNumber==ent_folder.versionNumber #Get a file that exists but such that limitSearch removes them and raises error assert_raises(SynapseError, syn.get, new_path, limitSearch='syn1')
def test_getWithEntityBundle(*mocks): mocks = [item for item in mocks] is_loco_mock = mocks.pop() cache_location_guess_mock = mocks.pop() download_file_mock = mocks.pop() # -- Change downloadLocation but do not download more than once -- is_loco_mock.return_value = False bundle = {"entity" : {"name": "anonymous", "dataFileHandleId": "-1337", "concreteType": "org.sagebionetworks.repo.model.FileEntity", "parentId": "syn12345"}, "fileHandles": [{u'concreteType': u'org.sagebionetworks.repo.model.file.S3FileHandle', u'fileName': u'anonymous', u'contentMd5': u'1698d26000d60816caab15169efcd23a', u'id': u'-1337'}], "annotations": {}} # Make the cache point to some temporary location cacheDir = synapseclient.cache.determine_cache_directory(bundle['entity']) # Pretend that the file is downloaded by the first call to syn._downloadFileEntity # The temp file should be added to the cache by the first syn._getWithEntityBundle() call f, cachedFile = tempfile.mkstemp() os.close(f) defaultLocation = os.path.join(cacheDir, bundle['entity']['name']) cache_location_guess_mock.return_value = (cacheDir, defaultLocation, cachedFile) # Make sure the Entity is updated with the cached file path def _downloadFileEntity(entity, path, submission): # We're disabling the download, but the given path should be within the cache assert path == defaultLocation return {"path": cachedFile} download_file_mock.side_effect = _downloadFileEntity # Make sure the cache does not already exist cacheMap = os.path.join(cacheDir, '.cacheMap') if os.path.exists(cacheMap): os.remove(cacheMap) syn._getWithEntityBundle(entityBundle=bundle, entity=None, downloadLocation=cacheDir, ifcollision="overwrite.local") syn._getWithEntityBundle(entityBundle=bundle, entity=None, ifcollision="overwrite.local") e = syn._getWithEntityBundle(entityBundle=bundle, entity=None, downloadLocation=cacheDir, ifcollision="overwrite.local") assert download_file_mock.call_count == 1 assert e.name == bundle["entity"]["name"] assert e.parentId == bundle["entity"]["parentId"] assert e.cacheDir == cacheDir assert bundle['entity']['name'] in e.files assert e.path == os.path.join(cacheDir, bundle["entity"]["name"]) ## test preservation of local state url = 'http://foo.com/secretstuff.txt' e = File(name='anonymous', parentId="syn12345", synapseStore=False, externalURL=url) e.local_state({'zap':'pow'}) e = syn._getWithEntityBundle(entityBundle=bundle, entity=e) assert e.local_state()['zap'] == 'pow' assert e.synapseStore == False assert e.externalURL == url
def test_store_with_create_or_update_flag(): project = create_project() filepath = utils.make_bogus_binary_file() bogus1 = File(filepath, name='Bogus Test File', parent=project) bogus1 = syn.store(bogus1, createOrUpdate=True) # Create a different file with the same name and parent new_filepath = utils.make_bogus_binary_file() bogus1.path = new_filepath # Expected behavior is that a new version of the first File will be created bogus2 = syn.store(bogus1, createOrUpdate=True) assert bogus2.id == bogus1.id assert bogus2.versionNumber == 2 assert not filecmp.cmp(bogus2.path, filepath) bogus2a = syn.get(bogus2.id) assert bogus2a.id == bogus1.id assert bogus2a.versionNumber == 2 assert filecmp.cmp(bogus2.path, bogus2a.path) # Create yet another file with the same name and parent newer_filepath = utils.make_bogus_binary_file() bogus3 = File(newer_filepath, name='Bogus Test File', parent=project) # Expected behavior is raising an exception with a 409 error assert_raises(requests.exceptions.HTTPError, syn.store, bogus3, createOrUpdate=False)
def upload(args,syn): if args.dataType == "rnaseq": parentId = "syn6034916" pipeline = "syn6126122" dataType = "RNASeq" elif args.dataType == "dnaseq": parentId = "syn6034751" pipeline = "syn6126123" dataType = "TargDNASeq" elif args.dataType == "snparray": parentId = "syn6038475" pipeline = "syn6126121" dataType = "SNParray" elif args.dataType == "exparray": parentId = "syn6038915" pipeline = "syn6126120" dataType = "expression_microarray" elif args.dataType == "exome": parentId = "syn6115597" dataType = "exome" pipeline = "" else: raise ValueError("dataType needs to be rnaseq/dnaseq/snparray/exparray/exome") if args.workflow is not None: workflow = syn.get(pipeline,downloadFile=False) workflow.path = args.workflow workflow.name = os.path.basename(args.workflow) workflow = syn.store(workflow) pipeline = workflow.id fileEnt = File(args.input,parent=parentId) #fileEnt.annotations = temp.to_dict('index').values()[0] fileEnt.dataType = dataType fileEnt.sampleId = sampleId fileEnt = syn.store(fileEnt,used = pipeline) return(fileEnt.id)
def test_upload__error(self, syn): """Verify that if an item upload fails the error is raised in the main thread and any running Futures are cancelled""" item_1 = _SyncUploadItem(File(path='/tmp/foo', parentId='syn123'), [], [], {}) item_2 = _SyncUploadItem(File(path='/tmp/bar', parentId='syn123'), [], [], {}) items = [item_1, item_2] def syn_store_side_effect(entity, *args, **kwargs): if entity.path == entity.path: raise ValueError() return Mock() uploader = _SyncUploader(syn, get_executor()) original_abort = uploader._abort def abort_side_effect(futures): return original_abort(futures) with patch.object(syn, 'store') as mock_syn_store, \ patch.object(uploader, '_abort') as mock_abort: mock_syn_store.side_effect = syn_store_side_effect mock_abort.side_effect = abort_side_effect with pytest.raises(ValueError): uploader.upload(items) # it would be aborted with Futures mock_abort.assert_called_once_with([ANY]) isinstance(mock_abort.call_args_list[0][0], Future)
def test_ftp_download(): """Test downloading an Entity that points to a file on an FTP server. """ # Another test with an external reference. This is because we only need to test FTP download; not upload. Also so we don't have to maintain an FTP server just for this purpose. # Make an entity that points to an FTP server file. entity = File(parent=project['id'], name='1KB.zip') fileHandle = {} fileHandle['externalURL'] = 'ftp://speedtest.tele2.net/1KB.zip' fileHandle["fileName"] = entity.name fileHandle["contentType"] = "application/zip" fileHandle["contentMd5"] = '0f343b0931126a20f133d67c2b018a3b' fileHandle["contentSize"] = 1024 fileHandle[ "concreteType"] = "org.sagebionetworks.repo.model.file.ExternalFileHandle" fileHandle = syn.restPOST('/externalFileHandle', json.dumps(fileHandle), syn.fileHandleEndpoint) entity.dataFileHandleId = fileHandle['id'] entity = syn.store(entity) # Download the entity and check that MD5 matches expected FTPfile = syn.get(entity.id, downloadLocation=os.getcwd(), downloadFile=True) assert FTPfile.md5 == utils.md5_for_file(FTPfile.path).hexdigest() schedule_for_cleanup(entity) os.remove(FTPfile.path)
def test_get_and_store_by_name_and_parent_id(): project = create_project() path = utils.make_bogus_data_file() schedule_for_cleanup(path) f = File(path, name='Foobarbat', parent=project) f2 = syn.store(f) f = syn.get(f) assert f.id == f2.id assert f.name == f2.name assert f.parentId == f2.parentId ## new file path = utils.make_bogus_data_file() schedule_for_cleanup(path) ## should create a new version of the previous File entity f3 = File(path, name='Foobarbat', parent=project, description='banana', junk=1234) f3 = syn.store(f3) ## should be an update of the existing entity with the same name and parent assert f3.id == f.id assert f3.description == 'banana' assert f3.junk == [1234] assert filecmp.cmp(path, f3.path)
def test_extract_file_entity_metadata__ensure_correct_row_metadata(syn): # Test for SYNPY-692, where 'contentType' was incorrectly set on all rows except for the very first row. # create 2 file entities with different metadata entity1 = File(parent='syn123', id='syn456', contentType='text/json', path='path1', name='entity1', synapseStore=True) entity2 = File(parent='syn789', id='syn890', contentType='text/html', path='path2', name='entity2', synapseStore=False) files = [entity1, entity2] # we don't care about provenance metadata in this case with patch.object(synapseutils.sync, "_get_file_entity_provenance_dict", return_value={}): # method under test keys, data = synapseutils.sync._extract_file_entity_metadata( syn, files) # compare source entity metadata gainst the extracted metadata for file_entity, file_row_data in zip(files, data): for key in keys: if key == 'parent': # workaroundd for parent/parentId inconsistency. (SYNPY-697) assert file_entity.get('parentId') == file_row_data.get(key) else: assert file_entity.get(key) == file_row_data.get(key)
def _manifest_upload(syn, df): items = [] for i, row in df.iterrows(): file = File( path=row['path'], parent=row['parent'], **{key: row[key] for key in FILE_CONSTRUCTOR_FIELDS if key in row}, ) file.annotations = dict( row.drop(FILE_CONSTRUCTOR_FIELDS + STORE_FUNCTION_FIELDS + REQUIRED_FIELDS + PROVENANCE_FIELDS, errors='ignore')) item = _SyncUploadItem( file, row['used'] if 'used' in row else [], row['executed'] if 'executed' in row else [], {key: row[key] for key in STORE_FUNCTION_FIELDS if key in row}, ) items.append(item) with _sync_executor(syn) as executor: uploader = _SyncUploader(syn, executor) uploader.upload(items) return True
def test_upload_string(): ## This tests the utility that uploads a _string_ rather than ## a file on disk, to S3. fh = None content = "My dog has fleas.\n" f = tempfile.NamedTemporaryFile(suffix=".txt", delete=False) f.write(content) f.close() filepath=f.name print 'Made bogus file: ', filepath try: fh = syn._uploadStringToFile(content) # print 'FileHandle:' # syn.printEntity(fh) # Download the file and compare it with the original junk = File(filepath, parent=project, dataFileHandleId=fh['id']) junk.properties.update(syn._createEntity(junk.properties)) junk.update(syn._downloadFileEntity(junk, filepath)) assert filecmp.cmp(filepath, junk.path) finally: try: if 'junk' in locals(): syn.delete(junk) except Exception: print traceback.format_exc() try: os.remove(filepath) except Exception: print traceback.format_exc()
def _manifest_upload(syn, df): items = [] for i, row in df.iterrows(): file = File( path=row['path'], parent=row['parent'], **{key: row[key] for key in FILE_CONSTRUCTOR_FIELDS if key in row}, ) annotations = dict( row.drop(FILE_CONSTRUCTOR_FIELDS + STORE_FUNCTION_FIELDS + REQUIRED_FIELDS + PROVENANCE_FIELDS, errors='ignore')) # if a item in the manifest upload is an empty string we do not want to upload that # as an empty string annotation file.annotations = {k: v for k, v in annotations.items() if v != ''} item = _SyncUploadItem( file, row['used'] if 'used' in row else [], row['executed'] if 'executed' in row else [], {key: row[key] for key in STORE_FUNCTION_FIELDS if key in row}, ) items.append(item) with _sync_executor(syn) as executor: uploader = _SyncUploader(syn, executor) uploader.upload(items) return True
def test_round_trip(): fh = None filepath = utils.make_bogus_binary_file(6 * MB + 777771) print 'Made bogus file: ', filepath try: fh = syn._chunkedUploadFile(filepath) # print 'FileHandle:' # syn.printEntity(fh) # Download the file and compare it with the original junk = File(filepath, parent=project, dataFileHandleId=fh['id']) junk.properties.update(syn._createEntity(junk.properties)) junk.update(syn._downloadFileEntity(junk, filepath)) assert filecmp.cmp(filepath, junk.path) finally: try: if 'junk' in locals(): syn.delete(junk) except Exception: print traceback.format_exc() try: os.remove(filepath) except Exception: print traceback.format_exc()
def test_multipart_upload_big_string(): cities = ["Seattle", "Portland", "Vancouver", "Victoria", "San Francisco", "Los Angeles", "New York", "Oaxaca", "Cancún", "Curaçao", "जोधपुर", "অসম", "ལྷ་ས།", "ཐིམ་ཕུ་", "دبي", "አዲስ አበባ", "São Paulo", "Buenos Aires", "Cartagena", "Amsterdam", "Venice", "Rome", "Dubrovnik", "Sarajevo", "Madrid", "Barcelona", "Paris", "Αθήνα", "Ρόδος", "København", "Zürich", "金沢市", "서울", "แม่ฮ่องสอน", "Москва"] text = "Places I wanna go:\n" while len(text.encode('utf-8')) < multipart_upload_module.MIN_PART_SIZE: text += ", ".join( random.choice(cities) for i in range(5000) ) + "\n" fhid = multipart_upload_string(syn, text) print('FileHandle: {fhid}'.format(fhid=fhid)) # Download the file and compare it with the original junk = File("message.txt", parent=project, dataFileHandleId=fhid) junk.properties.update(syn._createEntity(junk.properties)) (tmp_f, tmp_path) = tempfile.mkstemp() schedule_for_cleanup(tmp_path) junk.update(syn._downloadFileEntity(junk, tmp_path)) with open(junk.path, encoding='utf-8') as f: retrieved_text = f.read() assert retrieved_text == text
def test_round_trip(): fhid = None filepath = utils.make_bogus_binary_file(multipart_upload_module.MIN_PART_SIZE + 777771) print('Made bogus file: ', filepath) try: fhid = multipart_upload(syn, filepath) print('FileHandle: {fhid}'.format(fhid=fhid)) # Download the file and compare it with the original junk = File(filepath, parent=project, dataFileHandleId=fhid) junk.properties.update(syn._createEntity(junk.properties)) (tmp_f, tmp_path) = tempfile.mkstemp() schedule_for_cleanup(tmp_path) junk.update(syn._downloadFileEntity(junk, tmp_path)) assert filecmp.cmp(filepath, junk.path) finally: try: if 'junk' in locals(): syn.delete(junk) except Exception: print(traceback.format_exc()) try: os.remove(filepath) except Exception: print(traceback.format_exc())
def test_download_file_URL_false(): # Upload an external file handle fileThatExists = 'http://dev-versions.synapse.sagebase.org/synapsePythonClient' reupload = File(fileThatExists, synapseStore=False, parent=project) reupload = syn.store(reupload) reupload = syn.get(reupload, downloadFile=False) originalVersion = reupload.versionNumber # Reupload and check that the URL and version does not get mangled reupload = syn.store(reupload, forceVersion=False) assert_equals(reupload.path, fileThatExists, "Entity should still be pointing at a URL") assert_equals(originalVersion, reupload.versionNumber) # Try a URL with an extra slash at the end fileThatDoesntExist = 'http://dev-versions.synapse.sagebase.org/synapsePythonClient/' reupload.synapseStore = False reupload.path = fileThatDoesntExist reupload = syn.store(reupload) reupload = syn.get(reupload, downloadFile=False) originalVersion = reupload.versionNumber reupload = syn.store(reupload, forceVersion=False) assert_equals(reupload.path, fileThatDoesntExist, "Entity should still be pointing at a URL") assert_equals(originalVersion, reupload.versionNumber)
def _view_setup(cls): # set up a file view folder = syn.store( Folder(name="PartialRowTestFolder" + str(uuid.uuid4()), parent=project)) syn.store( File("~/path/doesnt/matter", name="f1", parent=folder, synapseStore=False)) syn.store( File("~/path/doesnt/matter/again", name="f2", parent=folder, synapseStore=False)) cols = [ Column(name='foo', columnType='STRING', maximumSize=1000), Column(name='bar', columnType='STRING') ] return syn.store( EntityViewSchema(name='PartialRowTestViews' + str(uuid.uuid4()), columns=cols, addDefaultViewColumns=False, parent=project, scopes=[folder]))
def test_getChildren(syn, schedule_for_cleanup): # setup a hierarchy for folders # PROJECT # | \ # File Folder # | # File project_name = str(uuid.uuid1()) test_project = syn.store(Project(name=project_name)) folder = syn.store(Folder(name="firstFolder", parent=test_project)) syn.store( File(path="~/doesntMatter.txt", name="file inside folders", parent=folder, synapseStore=False)) project_file = syn.store( File(path="~/doesntMatterAgain.txt", name="file inside project", parent=test_project, synapseStore=False)) schedule_for_cleanup(test_project) expected_id_set = {project_file.id, folder.id} children_id_set = {x['id'] for x in syn.getChildren(test_project.id)} assert expected_id_set == children_id_set
def test_syncFromSynapse__manifest_is_root( mock__get_file_entity_provenance_dict, mock_generateManifest, syn): """ Verify manifest argument equal to "root" that pass in to syncFromSynapse, it will create root_manifest file only. """ project = Project(name="the project", parent="whatever", id="syn123") file1 = File(name="a file", parent=project, id="syn456") folder = Folder(name="a folder", parent=project, id="syn789") file2 = File(name="a file2", parent=folder, id="syn789123") # Structure of nested project # project # |---> file1 # |---> folder # |---> file2 entities = { file1.id: file1, folder.id: folder, file2.id: file2, } def syn_get_side_effect(entity, *args, **kwargs): return entities[id_of(entity)] mock__get_file_entity_provenance_dict.return_value = {} with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\ patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get: synapseutils.syncFromSynapse(syn, project, path="./", downloadFile=False, manifest="root") assert patch_syn_get.call_args_list == [ call( file1['id'], downloadLocation="./", ifcollision='overwrite.local', followLink=False, downloadFile=False, ), call( file2['id'], downloadLocation="./a folder", ifcollision='overwrite.local', followLink=False, downloadFile=False, ) ] assert mock_generateManifest.call_count == 1 call_files = mock_generateManifest.call_args_list[0][0][1] assert len(call_files) == 2 assert call_files[0].id == "syn456" assert call_files[1].id == "syn789123"
def test_walk(): walked = [] firstfile = utils.make_bogus_data_file() schedule_for_cleanup(firstfile) project_entity = syn.store(Project(name=str(uuid.uuid4()))) schedule_for_cleanup(project_entity.id) folder_entity = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(folder_entity.id) second_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=project_entity)) schedule_for_cleanup(second_folder.id) file_entity = syn.store(File(firstfile, parent=project_entity)) schedule_for_cleanup(file_entity.id) walked.append(((project_entity.name, project_entity.id), [ (folder_entity.name, folder_entity.id), (second_folder.name, second_folder.id) ], [(file_entity.name, file_entity.id)])) nested_folder = syn.store( Folder(name=str(uuid.uuid4()), parent=folder_entity)) schedule_for_cleanup(nested_folder.id) secondfile = utils.make_bogus_data_file() schedule_for_cleanup(secondfile) second_file = syn.store(File(secondfile, parent=nested_folder)) schedule_for_cleanup(second_file.id) thirdfile = utils.make_bogus_data_file() schedule_for_cleanup(thirdfile) third_file = syn.store(File(thirdfile, parent=second_folder)) schedule_for_cleanup(third_file.id) walked.append(((os.path.join(project_entity.name, folder_entity.name), folder_entity.id), [(nested_folder.name, nested_folder.id)], [])) walked.append( ((os.path.join(os.path.join(project_entity.name, folder_entity.name), nested_folder.name), nested_folder.id), [], [(second_file.name, second_file.id)])) walked.append(((os.path.join(project_entity.name, second_folder.name), second_folder.id), [], [(third_file.name, third_file.id)])) temp = synapseutils.walk(syn, project_entity.id) temp = list(temp) #Must sort the tuples returned, because order matters for the assert #Folders are returned in a different ordering depending on the name for i in walked: for x in i: if type(x) == list: x = x.sort() for i in temp: for x in i: if type(x) == list: x = x.sort() assert i in walked print("CHECK: synapseutils.walk on a file should return empty generator") temp = synapseutils.walk(syn, second_file.id) assert list(temp) == []
def test_File_update_file_handle__External_non_sftp(): external_file_handle = { 'concreteType': 'org.sagebionetworks.repo.model.file.ExternalFileHandle', 'externalURL': "https://some.website" } f = File(parent="idk") assert f.synapseStore f._update_file_handle(external_file_handle) assert not f.synapseStore
def test_download_file_entity__correct_local_state(syn): mock_cache_path = utils.normalize_path("/i/will/show/you/the/path/yi.txt") file_entity = File(parentId="syn123") file_entity.dataFileHandleId = 123 with patch.object(syn.cache, 'get', return_value=mock_cache_path): syn._download_file_entity(downloadLocation=None, entity=file_entity, ifcollision="overwrite.local", submission=None) assert mock_cache_path == file_entity.path assert os.path.dirname(mock_cache_path) == file_entity.cacheDir assert 1 == len(file_entity.files) assert os.path.basename(mock_cache_path) == file_entity.files[0]
def _manifest_upload(syn, df): for i, row in df.iterrows(): #Todo extract known constructor variables kwargs = {key: row[key] for key in FILE_CONSTRUCTOR_FIELDS if key in row } entity = File(row['path'], parent=row['parent'], **kwargs) entity.annotations = dict(row.drop(FILE_CONSTRUCTOR_FIELDS+STORE_FUNCTION_FIELDS+REQUIRED_FIELDS, errors = 'ignore')) #Update provenance list again to replace all file references that were uploaded if 'used' in row: row['used'] = syn._convertProvenanceList(row['used']) if 'executed' in row: row['executed'] = syn._convertProvenanceList(row['executed']) kwargs = {key: row[key] for key in STORE_FUNCTION_FIELDS if key in row} entity = syn.store(entity, **kwargs) return True
def test_synapseStore_flag(): """Test storing entities while setting the synapseStore flag to False""" project = create_project() ## store a path to a local file (synapseStore=False) path = utils.make_bogus_data_file() schedule_for_cleanup(path) f1 = File(path, name='Totally bogus data', parent=project, synapseStore=False) f1 = syn.store(f1) f1a = syn.get(f1.id, downloadFile=False) assert f1a.name == 'Totally bogus data' assert f1a.path == path, 'path=' + str(f1a.path) + '; expected=' + path assert f1a.synapseStore == False ## make sure the test runs on Windows and other OS's if path[0].isalpha() and path[1] == ':': ## a windows file URL looks like this: file:///c:/foo/bar/bat.txt expected_url = 'file:///' + path else: expected_url = 'file://' + path assert f1a.externalURL == expected_url, 'unexpected externalURL: ' + f1a.externalURL ## a file path that doesn't exist should still work f2 = File('/path/to/local/file1.xyz', parentId=project.id, synapseStore=False) f2 = syn.store(f2) try: syn.get(f2) assert False except Exception as err: assert err.message.startswith("Could not download non-existent file") assert f1a.synapseStore == False ## Try a URL f3 = File('http://dev-versions.synapse.sagebase.org/synapsePythonClient', parent=project, synapseStore=False) f3 = syn.store(f3) f3a = syn.get(f3) assert f1a.synapseStore == False
def test_dispose(syn_client, syn_test_helper, new_temp_file): project = syn_client.store(Project(name=syn_test_helper.uniq_name())) folder = syn_client.store( Folder(name=syn_test_helper.uniq_name(prefix='Folder '), parent=project)) file = syn_client.store( File(name=syn_test_helper.uniq_name(prefix='File '), path=new_temp_file, parent=folder)) syn_objects = [project, folder, file] for syn_obj in syn_objects: syn_test_helper.dispose_of(syn_obj) assert syn_obj in syn_test_helper._trash syn_test_helper.dispose() assert len(syn_test_helper._trash) == 0 for syn_obj in syn_objects: with pytest.raises(synapseclient.exceptions.SynapseHTTPError) as ex: syn_client.get(syn_obj, downloadFile=False) err_str = str(ex.value) assert "Not Found" in err_str or "cannot be found" in err_str or "is in trash can" in err_str or "does not exist" in err_str try: os.remove(new_temp_file) except: pass
def test_synStore_sftpIntegration(): """Creates a File Entity on an sftp server and add the external url. """ filepath = utils.make_bogus_binary_file(1 * MB - 777771) try: file = syn.store(File(filepath, parent=project)) file2 = syn.get(file) assert file.externalURL == file2.externalURL and urlparse( file2.externalURL).scheme == 'sftp' tmpdir = tempfile.mkdtemp() schedule_for_cleanup(tmpdir) ## test filename override file2.fileNameOverride = "whats_new_in_baltimore.data" file2 = syn.store(file2) ## TODO We haven't defined how filename override interacts with ## TODO previously cached files so, side-step that for now by ## TODO making sure the file is not in the cache! syn.cache.remove(file2.dataFileHandleId, delete=True) file3 = syn.get(file, downloadLocation=tmpdir) assert os.path.basename(file3.path) == file2.fileNameOverride ## test that we got an MD5 à la SYNPY-185 assert_is_not_none(file3.md5) fh = syn._getFileHandle(file3.dataFileHandleId) assert_is_not_none(fh['contentMd5']) assert_equals(file3.md5, fh['contentMd5']) finally: try: os.remove(filepath) except Exception: print(traceback.format_exc())
def test_store__changing_from_Synapse_to_externalURL_by_changing_path(syn, project, schedule_for_cleanup): # create a temp file temp_path = utils.make_bogus_data_file() schedule_for_cleanup(temp_path) ext = syn.store(File(temp_path, parent=project, synapseStore=True)) ext = syn.get(ext) assert "org.sagebionetworks.repo.model.file.S3FileHandle" == ext._file_handle.concreteType ext.synapseStore = False ext = syn.store(ext) # do a get to make sure filehandle has been updated correctly ext = syn.get(ext.id, downloadFile=True) assert "org.sagebionetworks.repo.model.file.ExternalFileHandle" == ext._file_handle.concreteType assert utils.as_url(temp_path) == ext.externalURL assert not ext.synapseStore # swap back to synapse storage ext.synapseStore = True ext = syn.store(ext) # do a get to make sure filehandle has been updated correctly ext = syn.get(ext.id, downloadFile=True) assert "org.sagebionetworks.repo.model.file.S3FileHandle" == ext._file_handle.concreteType assert ext.externalURL is None assert ext.synapseStore
def test_store_file_handle_update_metadata(): original_file_path = utils.make_bogus_data_file() schedule_for_cleanup(original_file_path) #upload the project entity = syn.store(File(original_file_path, parent=project)) old_file_handle = entity._file_handle #create file handle to replace the old one replacement_file_path = utils.make_bogus_data_file() schedule_for_cleanup(replacement_file_path) new_file_handle = syn.uploadFileHandle(replacement_file_path, parent=project) entity.dataFileHandleId = new_file_handle['id'] new_entity = syn.store(entity) #make sure _file_handle info was changed (_file_handle values are all changed at once so just verifying id change is sufficient) assert_equal(new_file_handle['id'], new_entity._file_handle['id']) assert_not_equal(old_file_handle['id'], new_entity._file_handle['id']) #check that local_state was updated assert_equal(replacement_file_path, new_entity.path) assert_equal(os.path.dirname(replacement_file_path), new_entity.cacheDir) assert_equal([os.path.basename(replacement_file_path)], new_entity.files)
def test_syncFromSynapse__given_file_id(test_state): file_path = utils.make_bogus_data_file() test_state.schedule_for_cleanup(file_path) file = test_state.syn.store(File(file_path, name=str(uuid.uuid4()), parent=test_state.project, synapseStore=False)) all_files = synapseutils.syncFromSynapse(test_state.syn, file.id) assert 1 == len(all_files) assert file == all_files[0]
def test_randomly_failing_parts(): FAILURE_RATE = 1.0/3.0 fhid = None multipart_upload_module.MIN_PART_SIZE = 5*MB multipart_upload_module.MAX_RETRIES = 20 filepath = utils.make_bogus_binary_file(multipart_upload_module.MIN_PART_SIZE*2 + 777771) print('Made bogus file: ', filepath) normal_put_chunk = None def _put_chunk_or_fail_randomly(url, chunk, verbose=False): if random.random() < FAILURE_RATE: raise IOError("Ooops! Artificial upload failure for testing.") else: return normal_put_chunk(url, chunk, verbose) ## Mock _put_chunk to fail randomly normal_put_chunk = multipart_upload_module._put_chunk multipart_upload_module._put_chunk = _put_chunk_or_fail_randomly try: fhid = multipart_upload(syn, filepath) print('FileHandle: {fhid}'.format(fhid=fhid)) # Download the file and compare it with the original junk = File(filepath, parent=project, dataFileHandleId=fhid) junk.properties.update(syn._createEntity(junk.properties)) (tmp_f, tmp_path) = tempfile.mkstemp() schedule_for_cleanup(tmp_path) junk.update(syn._downloadFileEntity(junk, tmp_path)) assert filecmp.cmp(filepath, junk.path) finally: ## Un-mock _put_chunk if normal_put_chunk: multipart_upload_module._put_chunk = normal_put_chunk try: if 'junk' in locals(): syn.delete(junk) except Exception: print(traceback.format_exc()) try: os.remove(filepath) except Exception: print(traceback.format_exc())
def test_ExternalFileHandle(): # Tests shouldn't have external dependencies, but this is a pretty picture of Singapore singapore_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/3/3e/1_singapore_city_skyline_dusk_panorama_2011.jpg/1280px-1_singapore_city_skyline_dusk_panorama_2011.jpg' singapore = File(singapore_url, parent=project, synapseStore=False) singapore = syn.store(singapore) # Verify the file handle fileHandle = syn._getFileHandle(singapore.dataFileHandleId) assert fileHandle['concreteType'] == 'org.sagebionetworks.repo.model.file.ExternalFileHandle' assert fileHandle['externalURL'] == singapore_url # The download should occur only on the client side singapore = syn.get(singapore, downloadFile=True) assert singapore.path is not None assert singapore.externalURL == singapore_url assert os.path.exists(singapore.path) # Update external URL singapore_2_url = 'https://upload.wikimedia.org/wikipedia/commons/a/a2/Singapore_Panorama_v2.jpg' singapore.externalURL = singapore_2_url singapore = syn.store(singapore) s2 = syn.get(singapore, downloadFile=False) assert s2.externalURL == singapore_2_url
def test_download_file_false(): RENAME_SUFFIX = 'blah' # Upload a file filepath = utils.make_bogus_binary_file() schedule_for_cleanup(filepath) schedule_for_cleanup(filepath + RENAME_SUFFIX) file = File(filepath, name='SYNR 619', parent=project) file = syn.store(file) # Now hide the file from the cache and download with downloadFile=False os.rename(filepath, filepath + RENAME_SUFFIX) file = syn.get(file.id, downloadFile=False) # Change something and reupload the file's metadata file.name = "Only change the name, not the file" reupload = syn.store(file) assert reupload.path is None, "Path field should be null: %s" % reupload.path # This should still get the correct file reupload = syn.get(reupload.id) assert filecmp.cmp(filepath + RENAME_SUFFIX, reupload.path) assert reupload.name == file.name
def test_round_trip(): fh = None filepath = utils.make_bogus_binary_file(6*MB + 777771, verbose=True) print 'Made bogus file: ', filepath try: fh = syn._chunkedUploadFile(filepath, verbose=False) print '=' * 60 print 'FileHandle:' syn.printEntity(fh) print 'creating project and file' project = create_project() junk = File(filepath, parent=project, dataFileHandleId=fh['id']) junk.properties.update(syn._createEntity(junk.properties)) print 'downloading file' junk.update(syn._downloadFileEntity(junk, filepath)) print 'comparing files' assert filecmp.cmp(filepath, junk.path) print 'ok!' finally: try: if 'junk' in locals(): syn.delete(junk) except Exception as ex: print ex try: os.remove(filepath) except Exception as ex: print ex if fh: print 'Deleting fileHandle', fh['id'] syn._deleteFileHandle(fh)
def test_download_file_URL_false(): # Upload an external file handle fileThatExists = 'http://dev-versions.synapse.sagebase.org/synapsePythonClient' reupload = File(fileThatExists, synapseStore=False, parent=project) reupload = syn.store(reupload) reupload = syn.get(reupload, downloadFile=False) originalVersion = reupload.versionNumber # Reupload and check that the URL and version does not get mangled reupload = syn.store(reupload, forceVersion=False) assert reupload.path == fileThatExists, "Entity should still be pointing at a URL" assert originalVersion == reupload.versionNumber # Try a URL with an extra slash at the end fileThatDoesntExist = 'http://dev-versions.synapse.sagebase.org/synapsePythonClient/' reupload.synapseStore = False reupload.path = fileThatDoesntExist reupload = syn.store(reupload) reupload = syn.get(reupload, downloadFile=False) originalVersion = reupload.versionNumber reupload = syn.store(reupload, forceVersion=False) assert reupload.path == fileThatDoesntExist, "Entity should still be pointing at a URL" assert originalVersion == reupload.versionNumber
def test_Entity(): # Update the project project_name = str(uuid.uuid4()) project = Project(name=project_name) project = syn.store(project) schedule_for_cleanup(project) project = syn.getEntity(project) assert project.name == project_name # Create and get a Folder folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000) folder = syn.createEntity(folder) folder = syn.getEntity(folder) assert folder.name == 'Test Folder' assert folder.parentId == project.id assert folder.description == 'A place to put my junk' assert folder.foo[0] == 1000 # Update and get the Folder folder.pi = 3.14159265359 folder.description = 'The rejects from the other folder' folder = syn.store(folder) folder = syn.get(folder) assert folder.name == 'Test Folder' assert folder.parentId == project.id assert folder.description == 'The rejects from the other folder' assert folder.pi[0] == 3.14159265359 # Test CRUD on Files, check unicode path = utils.make_bogus_data_file() schedule_for_cleanup(path) a_file = File(path, parent=folder, description=u'Description with funny characters: Déjà vu, ประเทศไทย, 中国', contentType='text/flapdoodle', foo='An arbitrary value', bar=[33,44,55], bday=Datetime(2013,3,15), band=u"Motörhead", lunch=u"すし") a_file = syn.store(a_file) assert a_file.path == path a_file = syn.getEntity(a_file) assert a_file.description == u'Description with funny characters: Déjà vu, ประเทศไทย, 中国', u'description= %s' % a_file.description assert a_file['foo'][0] == 'An arbitrary value', u'foo= %s' % a_file['foo'][0] assert a_file['bar'] == [33,44,55] assert a_file['bday'][0] == Datetime(2013,3,15) assert a_file.contentType == 'text/flapdoodle', u'contentType= %s' % a_file.contentType assert a_file['band'][0] == u"Motörhead", u'band= %s' % a_file['band'][0] assert a_file['lunch'][0] == u"すし", u'lunch= %s' % a_file['lunch'][0] a_file = syn.downloadEntity(a_file) assert filecmp.cmp(path, a_file.path) assert_raises(ValueError,File,a_file.path,parent=folder,dataFileHandleId=56456) b_file = File(name="blah",parent=folder,dataFileHandleId=a_file.dataFileHandleId) b_file = syn.store(b_file) assert b_file.dataFileHandleId == a_file.dataFileHandleId # Update the File a_file.path = path a_file['foo'] = 'Another arbitrary chunk of text data' a_file['new_key'] = 'A newly created value' a_file = syn.updateEntity(a_file) assert a_file['foo'][0] == 'Another arbitrary chunk of text data' assert a_file['bar'] == [33,44,55] assert a_file['bday'][0] == Datetime(2013,3,15) assert a_file.new_key[0] == 'A newly created value' assert a_file.path == path assert a_file.versionNumber == 1, "unexpected version number: " + str(a_file.versionNumber) #Test create, store, get Links link = Link(a_file['id'], targetVersion=a_file.versionNumber, parent=project) link = syn.store(link) assert link['linksTo']['targetId'] == a_file['id'] assert link['linksTo']['targetVersionNumber'] == a_file.versionNumber assert link['linksToClassName'] == a_file['concreteType'] testLink = syn.get(link) assert testLink == link link = syn.get(link,followLink= True) assert link['foo'][0] == 'Another arbitrary chunk of text data' assert link['bar'] == [33,44,55] assert link['bday'][0] == Datetime(2013,3,15) assert link.new_key[0] == 'A newly created value' assert utils.equal_paths(link.path, path) assert link.versionNumber == 1, "unexpected version number: " + str(a_file.versionNumber) # Upload a new File and verify new_path = utils.make_bogus_data_file() schedule_for_cleanup(new_path) a_file = syn.uploadFile(a_file, new_path) a_file = syn.downloadEntity(a_file) assert filecmp.cmp(new_path, a_file.path) assert a_file.versionNumber == 2 # Make sure we can still get the older version of file old_random_data = syn.get(a_file.id, version=1) assert filecmp.cmp(old_random_data.path, path) tmpdir = tempfile.mkdtemp() schedule_for_cleanup(tmpdir) ## test file name override a_file.fileNameOverride = "peaches_en_regalia.zoinks" syn.store(a_file) ## TODO We haven't defined how filename override interacts with ## TODO previously cached files so, side-step that for now by ## TODO making sure the file is not in the cache! syn.cache.remove(a_file.dataFileHandleId, delete=True) a_file_retreived = syn.get(a_file, downloadLocation=tmpdir) assert os.path.basename(a_file_retreived.path) == a_file.fileNameOverride, os.path.basename(a_file_retreived.path) ## test getting the file from the cache with downloadLocation parameter (SYNPY-330) a_file_cached = syn.get(a_file.id, downloadLocation=tmpdir) assert a_file_cached.path is not None assert os.path.basename(a_file_cached.path) == a_file.fileNameOverride, a_file_cached.path print("\n\nList of files in project:\n") syn._list(project, recursive=True)
def _copyFile(syn, entity, destinationId, version=None, update=False, setProvenance="traceback"): """ Copies most recent version of a file to a specified synapse ID. :param entity: A synapse ID of a File entity :param destinationId: Synapse ID of a folder/project that the file wants to be copied to :param version: Can specify version of a file. Default to None :param update: Can choose to update files that have the same name Default to False :param setProvenance: Has three values to set the provenance of the copied entity: traceback: Sets to the source entity existing: Sets to source entity's original provenance (if it exists) None: No provenance is set """ ent = syn.get(entity, downloadFile=False, version=version, followLink=False) #CHECK: If File is in the same parent directory (throw an error) (Can choose to update files) if not update: search = syn.query('select name from entity where parentId =="%s"'%destinationId) for i in search['results']: if i['entity.name'] == ent.name: raise ValueError('An item named "%s" already exists in this location. File could not be copied'%ent.name) profile = syn.getUserProfile() # get provenance earlier to prevent errors from being called in the end # If traceback, set activity to old entity if setProvenance == "traceback": act = Activity("Copied file", used=ent) # if existing, check if provenance exists elif setProvenance == "existing": try: act = syn.getProvenance(ent.id) except SynapseHTTPError as e: # Should catch the 404 act = None elif setProvenance is None or setProvenance.lower() == 'none': act = None else: raise ValueError('setProvenance must be one of None, existing, or traceback') #Grab file handle createdBy annotation to see the user that created fileHandle fileHandleList = syn.restGET('/entity/%s/version/%s/filehandles'%(ent.id,ent.versionNumber)) #NOTE: May not always be the first index (need to filter to make sure not PreviewFileHandle) #Loop through to check which dataFileHandles match and return createdBy # Look at convenience function for fileHandle in fileHandleList['list']: if fileHandle['id'] == ent.dataFileHandleId: createdBy = fileHandle['createdBy'] break else: createdBy = None #CHECK: If the user created the file, copy the file by using fileHandleId else hard copy if profile.ownerId == createdBy: new_ent = File(name=ent.name, parentId=destinationId) new_ent.dataFileHandleId = ent.dataFileHandleId else: #CHECK: If the synapse entity is an external URL, change path and store if ent.externalURL is None: #and ent.path == None: #####If you have never downloaded the file before, the path is None store = True #This needs to be here, because if the file has never been downloaded before #there wont be a ent.path ent = syn.get(entity,downloadFile=store,version=version) path = ent.path else: store = False ent = syn.get(entity,downloadFile=store,version=version) path = ent.externalURL new_ent = File(path, name=ent.name, parentId=destinationId, synapseStore=store) #Set annotations here new_ent.annotations = ent.annotations #Store provenance if act is not None if act is not None: new_ent = syn.store(new_ent, activity=act) else: new_ent = syn.store(new_ent) #Leave this return statement for test return new_ent['id']
'tissueTypeAbrv': ['FP', 'STG', 'PHG'], 'name' :'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_rawCounts.tsv'}, 'syn2920161':{'parentId' :'syn3157743', #'normalized.sex_race_age_RIN_PMI_batch_site.corrected.csv' 'dataType': 'mRNA', 'platform': 'IlluminaHiSeq2500', 'tissueType':['Frontal Pole', 'Superior Temporal Gyrus','Parahippocampal Gyrus'], 'tissueTypeAbrv': ['FP', 'STG', 'PHG'], 'name' :'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_normalized-sex-race-age-RIN-PMI-batch-site.corrected.csv'}, } for id, v in toMove.items(): ent = syn.get(id) print v['name'] os.rename(ent.path, v['name']) f = File(v['name'], parentId=v['parentId'], name=v['name'][7:-4]) print f.name f.consortium, f.study, f.center, f.disease = consortium, study, center, disease f.dataType = v['dataType'] f.platfrom = v['platform'] if 'tissueTypeAbrv' in v: f.tissueTypeAbrv = v['tissueTypeAbrv'] f.tissueType = v['tissueType'] f.fileType = fileType f.organism = organism f = syn.store(f, used = [id], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/e71bbde262625e6999ea9defd98e10fce8f3c542/Mount-Sinai/migrateMSBBMetaAndRNASeq.py'], activityName='Data migration')
def test_Entity(): # Update the project project_name = str(uuid.uuid4()) project = Project(name=project_name) project = syn.store(project) schedule_for_cleanup(project) project = syn.getEntity(project) assert project.name == project_name # Create and get a Folder folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000) folder = syn.createEntity(folder) folder = syn.getEntity(folder) assert folder.name == 'Test Folder' assert folder.parentId == project.id assert folder.description == 'A place to put my junk' assert folder.foo[0] == 1000 # Update and get the Folder folder.pi = 3.14159265359 folder.description = 'The rejects from the other folder' folder = syn.store(folder) folder = syn.get(folder) assert folder.name == 'Test Folder' assert folder.parentId == project.id assert folder.description == 'The rejects from the other folder' assert folder.pi[0] == 3.14159265359 # Test CRUD on Files path = utils.make_bogus_data_file() schedule_for_cleanup(path) a_file = File(path, parent=folder, description='Random data for testing', contentType='text/flapdoodle', foo='An arbitrary value', bar=[33,44,55], bday=Datetime(2013,3,15)) a_file = syn._createFileEntity(a_file) assert a_file.path == path a_file = syn.getEntity(a_file) assert a_file['foo'][0] == 'An arbitrary value' assert a_file['bar'] == [33,44,55] assert a_file['bday'][0] == Datetime(2013,3,15) assert a_file.contentType == 'text/flapdoodle' a_file = syn.downloadEntity(a_file) assert filecmp.cmp(path, a_file.path) # Update the File a_file.path = path a_file['foo'] = 'Another arbitrary chunk of text data' a_file['new_key'] = 'A newly created value' a_file = syn.updateEntity(a_file) assert a_file['foo'][0] == 'Another arbitrary chunk of text data' assert a_file['bar'] == [33,44,55] assert a_file['bday'][0] == Datetime(2013,3,15) assert a_file.new_key[0] == 'A newly created value' assert a_file.path == path assert a_file.versionNumber == 1 # Upload a new File and verify new_path = utils.make_bogus_data_file() schedule_for_cleanup(new_path) a_file = syn.uploadFile(a_file, new_path) a_file = syn.downloadEntity(a_file) assert filecmp.cmp(new_path, a_file.path) assert a_file.versionNumber == 2 # Make sure we can still get the older version of file old_random_data = syn.get(a_file.id, version=1) assert filecmp.cmp(old_random_data.path, path)
PLATFORM_MAP = {'133AB': 'AffymetrixU133AB', 'Plus2': 'AffymetrixU133Plus2'} query = 'select id, name from entity where parentId=="%s"' %OLDPARENTID df = synapseHelpers.query2df(syn.chunkedQuery(query)) for i in range(1,df.shape[0]): row = df.ix[i, :] ent = syn.get(row.id) fStudy, fTissue, fPlatform, fDatatype, fRest = ent.name.split('_') name = 'AMP-AD_MSBB_MSSM_%s_%s_%s' % (PLATFORM_MAP[fPlatform], TISSUEABRMAP[fTissue][0], fRest) print name os.rename(ent.path, name) f = File(name, parentId=NEWPARENTID, name=name[7:]) f.consortium = 'AMP-AD' f.study = 'MSBB' f.center = 'MSSM' f.dataType = 'mRNA' f.disease = 'Alzheimers Disease' f.platfrom = PLATFORM_MAP[fPlatform] f.tissueTypeAbrv = TISSUEABRMAP[fTissue][1] f.tissueType = TISSUEABRMAP[fTissue][0] f.dataSubType = 'geneExp' f.fileType = 'genomicMatrix' f.organism = 'human' f = syn.store(f, used = [ent], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/4d7d6b78b1e73058483354a1a18bff7422966a4b/Mount-Sinai/migrateMSBBExpression.py'], activityName='Data migration')
okay = False if okay: #----------------------------------------------------------------- # Upload the file to the correct path: #----------------------------------------------------------------- path = os.path.join(dirpath, filename) stat = os.stat(path) if stat.st_size > 0: mtime = stat.st_mtime previous_mtime = previous_uploads.get(path, None) if mtime > previous_mtime: print('Uploading {0}...'.format(path)) f = File(path, parent=parents[dirpath], name=filename) #------------------------------------------------------------- # Annotate the file on Synapse: #------------------------------------------------------------- for istr2, str2 in enumerate(types): if filename.endswith(str2): f.fileType = type_names[istr2] # Optionally add "syn.store(f, used='http://..)" # to specify the source location syn.store(f) c = conn.cursor() c.execute('INSERT OR REPLACE INTO files (path, mtime) VALUES ("%s", "%s")' % (path, mtime)) conn.commit()
def test_Entity(): # Update the project project_name = str(uuid.uuid4()) project = Project(name=project_name) project = syn.store(project) schedule_for_cleanup(project) project = syn.getEntity(project) assert_equals(project.name, project_name) # Create and get a Folder folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000) folder = syn.createEntity(folder) folder = syn.getEntity(folder) assert_equals(folder.name, 'Test Folder') assert_equals(folder.parentId, project.id) assert_equals(folder.description, 'A place to put my junk') assert_equals(folder.foo[0], 1000) # Update and get the Folder folder.pi = 3.14159265359 folder.description = 'The rejects from the other folder' folder = syn.store(folder) folder = syn.get(folder) assert_equals(folder.name, 'Test Folder') assert_equals(folder.parentId, project.id) assert_equals(folder.description, 'The rejects from the other folder') assert_equals(folder.pi[0], 3.14159265359) # Test CRUD on Files, check unicode path = utils.make_bogus_data_file() schedule_for_cleanup(path) a_file = File(path, parent=folder, description=u'Description with funny characters: Déjà vu, ประเทศไทย, 中国', contentType='text/flapdoodle', foo='An arbitrary value', bar=[33, 44, 55], bday=Datetime(2013, 3, 15), band=u"Motörhead", lunch=u"すし") a_file = syn.store(a_file) assert_equals(a_file.path, path) a_file = syn.getEntity(a_file) assert_equals(a_file.description, u'Description with funny characters: Déjà vu, ประเทศไทย, 中国', u'description= %s' % a_file.description) assert_equals(a_file['foo'][0], 'An arbitrary value', u'foo= %s' % a_file['foo'][0]) assert_equals(a_file['bar'], [33, 44, 55]) assert_equals(a_file['bday'][0], Datetime(2013, 3, 15)) assert_equals(a_file.contentType, 'text/flapdoodle', u'contentType= %s' % a_file.contentType) assert_equals(a_file['band'][0], u"Motörhead", u'band= %s' % a_file['band'][0]) assert_equals(a_file['lunch'][0], u"すし", u'lunch= %s' % a_file['lunch'][0]) a_file = syn.downloadEntity(a_file) assert_true(filecmp.cmp(path, a_file.path)) b_file = File(name="blah", parent=folder, dataFileHandleId=a_file.dataFileHandleId) b_file = syn.store(b_file) assert_equals(b_file.dataFileHandleId, a_file.dataFileHandleId) # Update the File a_file.path = path a_file['foo'] = 'Another arbitrary chunk of text data' a_file['new_key'] = 'A newly created value' a_file = syn.updateEntity(a_file) assert_equals(a_file['foo'][0], 'Another arbitrary chunk of text data') assert_equals(a_file['bar'], [33, 44, 55]) assert_equals(a_file['bday'][0], Datetime(2013, 3, 15)) assert_equals(a_file.new_key[0], 'A newly created value') assert_equals(a_file.path, path) assert_equals(a_file.versionNumber, 1, "unexpected version number: " + str(a_file.versionNumber)) # Test create, store, get Links # If version isn't specified, targetVersionNumber should not be set link = Link(a_file['id'], parent=project) link = syn.store(link) assert_equals(link['linksTo']['targetId'], a_file['id']) assert_is_none(link['linksTo'].get('targetVersionNumber')) assert_equals(link['linksToClassName'], a_file['concreteType']) link = Link(a_file['id'], targetVersion=a_file.versionNumber, parent=project) link = syn.store(link) assert_equals(link['linksTo']['targetId'], a_file['id']) assert_equals(link['linksTo']['targetVersionNumber'], a_file.versionNumber) assert_equals(link['linksToClassName'], a_file['concreteType']) testLink = syn.get(link) assert_equals(testLink, link) link = syn.get(link, followLink=True) assert_equals(link['foo'][0], 'Another arbitrary chunk of text data') assert_equals(link['bar'], [33, 44, 55]) assert_equals(link['bday'][0], Datetime(2013, 3, 15)) assert_equals(link.new_key[0], 'A newly created value') assert_true(utils.equal_paths(link.path, path)) assert_equals(link.versionNumber, 1, "unexpected version number: " + str(a_file.versionNumber)) newfolder = Folder('Testing Folder', parent=project) newfolder = syn.store(newfolder) link = Link(newfolder, parent=folder.id) link = syn.store(link) assert_equals(link['linksTo']['targetId'], newfolder.id) assert_equals(link['linksToClassName'], newfolder['concreteType']) assert_is_none(link['linksTo'].get('targetVersionNumber')) # Upload a new File and verify new_path = utils.make_bogus_data_file() schedule_for_cleanup(new_path) a_file = syn.uploadFile(a_file, new_path) a_file = syn.downloadEntity(a_file) assert_true(filecmp.cmp(new_path, a_file.path)) assert_equals(a_file.versionNumber, 2) # Make sure we can still get the older version of file old_random_data = syn.get(a_file.id, version=1) assert_true(filecmp.cmp(old_random_data.path, path)) tmpdir = tempfile.mkdtemp() schedule_for_cleanup(tmpdir) # test getting the file from the cache with downloadLocation parameter (SYNPY-330) a_file_cached = syn.get(a_file.id, downloadLocation=tmpdir) assert_is_not_none(a_file_cached.path) assert_equal(os.path.basename(a_file_cached.path), os.path.basename(a_file.path))
def test_Entity(): # Update the project project_name = str(uuid.uuid4()) project = Project(name=project_name) project = syn.store(project) schedule_for_cleanup(project) project = syn.getEntity(project) assert project.name == project_name # Create and get a Folder folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000) folder = syn.createEntity(folder) folder = syn.getEntity(folder) assert folder.name == 'Test Folder' assert folder.parentId == project.id assert folder.description == 'A place to put my junk' assert folder.foo[0] == 1000 # Update and get the Folder folder.pi = 3.14159265359 folder.description = 'The rejects from the other folder' folder = syn.store(folder) folder = syn.get(folder) assert folder.name == 'Test Folder' assert folder.parentId == project.id assert folder.description == 'The rejects from the other folder' assert folder.pi[0] == 3.14159265359 # Test CRUD on Files, check unicode path = utils.make_bogus_data_file() schedule_for_cleanup(path) a_file = File(path, parent=folder, description=u'Description with funny characters: Déjà vu, ประเทศไทย, 中国', contentType='text/flapdoodle', foo='An arbitrary value', bar=[33,44,55], bday=Datetime(2013,3,15), band=u"Motörhead", lunch=u"すし") a_file = syn.store(a_file) assert a_file.path == path a_file = syn.getEntity(a_file) assert a_file.description == u'Description with funny characters: Déjà vu, ประเทศไทย, 中国', u'description= %s' % a_file.description assert a_file['foo'][0] == 'An arbitrary value', u'foo= %s' % a_file['foo'][0] assert a_file['bar'] == [33,44,55] assert a_file['bday'][0] == Datetime(2013,3,15) assert a_file.contentType == 'text/flapdoodle', u'contentType= %s' % a_file.contentType assert a_file['band'][0] == u"Motörhead", u'band= %s' % a_file['band'][0] assert a_file['lunch'][0] == u"すし", u'lunch= %s' % a_file['lunch'][0] a_file = syn.downloadEntity(a_file) assert filecmp.cmp(path, a_file.path) # Update the File a_file.path = path a_file['foo'] = 'Another arbitrary chunk of text data' a_file['new_key'] = 'A newly created value' a_file = syn.updateEntity(a_file) assert a_file['foo'][0] == 'Another arbitrary chunk of text data' assert a_file['bar'] == [33,44,55] assert a_file['bday'][0] == Datetime(2013,3,15) assert a_file.new_key[0] == 'A newly created value' assert a_file.path == path assert a_file.versionNumber == 1, "unexpected version number: " + str(a_file.versionNumber) # Upload a new File and verify new_path = utils.make_bogus_data_file() schedule_for_cleanup(new_path) a_file = syn.uploadFile(a_file, new_path) a_file = syn.downloadEntity(a_file) assert filecmp.cmp(new_path, a_file.path) assert a_file.versionNumber == 2 # Make sure we can still get the older version of file old_random_data = syn.get(a_file.id, version=1) assert filecmp.cmp(old_random_data.path, path)
def test_Entity(): # Update the project project_name = str(uuid.uuid4()) project = Project(name=project_name) project = syn.store(project) schedule_for_cleanup(project) project = syn.getEntity(project) assert project.name == project_name # Create and get a Folder folder = Folder("Test Folder", parent=project, description="A place to put my junk", foo=1000) folder = syn.createEntity(folder) folder = syn.getEntity(folder) assert folder.name == "Test Folder" assert folder.parentId == project.id assert folder.description == "A place to put my junk" assert folder.foo[0] == 1000 # Update and get the Folder folder.pi = 3.14159265359 folder.description = "The rejects from the other folder" folder = syn.store(folder) folder = syn.get(folder) assert folder.name == "Test Folder" assert folder.parentId == project.id assert folder.description == "The rejects from the other folder" assert folder.pi[0] == 3.14159265359 # Test CRUD on Files, check unicode path = utils.make_bogus_data_file() schedule_for_cleanup(path) a_file = File( path, parent=folder, description="Description with funny characters: Déjà vu, ประเทศไทย, 中国", contentType="text/flapdoodle", foo="An arbitrary value", bar=[33, 44, 55], bday=Datetime(2013, 3, 15), band="Motörhead", lunch="すし", ) a_file = syn.store(a_file) assert a_file.path == path a_file = syn.getEntity(a_file) assert a_file.description == "Description with funny characters: Déjà vu, ประเทศไทย, 中国", ( "description= %s" % a_file.description ) assert a_file["foo"][0] == "An arbitrary value", "foo= %s" % a_file["foo"][0] assert a_file["bar"] == [33, 44, 55] assert a_file["bday"][0] == Datetime(2013, 3, 15) assert a_file.contentType == "text/flapdoodle", "contentType= %s" % a_file.contentType assert a_file["band"][0] == "Motörhead", "band= %s" % a_file["band"][0] assert a_file["lunch"][0] == "すし", "lunch= %s" % a_file["lunch"][0] a_file = syn.downloadEntity(a_file) assert filecmp.cmp(path, a_file.path) # Update the File a_file.path = path a_file["foo"] = "Another arbitrary chunk of text data" a_file["new_key"] = "A newly created value" a_file = syn.updateEntity(a_file) assert a_file["foo"][0] == "Another arbitrary chunk of text data" assert a_file["bar"] == [33, 44, 55] assert a_file["bday"][0] == Datetime(2013, 3, 15) assert a_file.new_key[0] == "A newly created value" assert a_file.path == path assert a_file.versionNumber == 1 # Upload a new File and verify new_path = utils.make_bogus_data_file() schedule_for_cleanup(new_path) a_file = syn.uploadFile(a_file, new_path) a_file = syn.downloadEntity(a_file) assert filecmp.cmp(new_path, a_file.path) assert a_file.versionNumber == 2 # Make sure we can still get the older version of file old_random_data = syn.get(a_file.id, version=1) assert filecmp.cmp(old_random_data.path, path)
with open(input_path + ".json") as handle: meta_data = json.loads(handle.read()) DST_FOLDER = 'syn3079564' #test upload folder #Create Provenance log provenance = Activity(name=meta_data['activity'], desciption=meta_data['description'], used = meta_data['used'] exectuted = meta_data['used'] ) #prov = syn.store(prov) name = of.path.basename(input_path) #Add metadata to files to be uploaded f = File(input_path, name = name, parentId=DST_FOLDER) f.dataType = meta_data['dataType'] f.fileType = meta_data['dataType'] f.variant_workflow = meta_data['workflow'] f.variant_workflow_version = meta_data['workflowVersion'] f.call_type = call_type f.reference_build = meta_data['referenceBuild'] f.center_name = meta_data['center_name'] f.file_md5 = synapseclient.utils.md5_for_file(input_path) f.study = 'PCAWG 2.0' f.submitter_donor_id = meta_data['donor_id'] f.alignment_workflow_name='Workflow_Bundle_BWA (UCSC Implementation)' f.alignment_workflow_source_url='https://github.com/kellrott/tcga_realign' f.alignment_workflow_version='2.6.0'
def test_getWithEntityBundle(self, download_file_mock, get_file_URL_and_metadata_mock): # Note: one thing that remains unexplained is why the previous version of # this test worked if you had a .cacheMap file of the form: # {"/Users/chris/.synapseCache/663/-1337/anonymous": "2014-09-15T22:54:57.000Z", # "/var/folders/ym/p7cr7rrx4z7fw36sxv04pqh00000gq/T/tmpJ4nz8U": "2014-09-15T23:27:25.000Z"} # ...but failed if you didn't. bundle = { 'entity': { 'id': 'syn10101', 'name': 'anonymous', 'dataFileHandleId': '-1337', 'concreteType': 'org.sagebionetworks.repo.model.FileEntity', 'parentId': 'syn12345'}, 'fileHandles': [{ 'concreteType': 'org.sagebionetworks.repo.model.file.S3FileHandle', 'fileName': 'anonymous', 'contentType': 'application/flapdoodle', 'contentMd5': '1698d26000d60816caab15169efcd23a', 'id': '-1337'}], 'annotations': {}} fileHandle = bundle['fileHandles'][0]['id'] cacheDir = syn.cache.get_cache_dir(fileHandle) # Make sure the .cacheMap file does not already exist cacheMap = os.path.join(cacheDir, '.cacheMap') if os.path.exists(cacheMap): os.remove(cacheMap) def _downloadFileHandle(fileHandleId, objectId, objectType, path, retries=5): # touch file at path with open(path, 'a'): os.utime(path, None) os.path.split(path) syn.cache.add(fileHandle, path) return path def _getFileHandleDownload(fileHandleId, objectId, objectType='FileHandle'): return {'fileHandle': bundle['fileHandles'][0], 'fileHandleId': fileHandleId, 'preSignedURL': 'http://example.com'} download_file_mock.side_effect = _downloadFileHandle get_file_URL_and_metadata_mock.side_effect = _getFileHandleDownload # 1. ---------------------------------------------------------------------- # download file to an alternate location temp_dir1 = tempfile.mkdtemp() e = syn._getWithEntityBundle(entityBundle=bundle, downloadLocation=temp_dir1, ifcollision="overwrite.local") assert_equal(e.name, bundle["entity"]["name"]) assert_equal(e.parentId, bundle["entity"]["parentId"]) assert_equal(utils.normalize_path(os.path.abspath(os.path.dirname(e.path))), utils.normalize_path(temp_dir1)) assert_equal(bundle["fileHandles"][0]["fileName"], os.path.basename(e.path)) assert_equal(utils.normalize_path(os.path.abspath(e.path)), utils.normalize_path(os.path.join(temp_dir1, bundle["fileHandles"][0]["fileName"]))) # 2. ---------------------------------------------------------------------- # get without specifying downloadLocation e = syn._getWithEntityBundle(entityBundle=bundle, ifcollision="overwrite.local") assert_equal(e.name, bundle["entity"]["name"]) assert_equal(e.parentId, bundle["entity"]["parentId"]) assert_in(bundle["fileHandles"][0]["fileName"], e.files) # 3. ---------------------------------------------------------------------- # download to another location temp_dir2 = tempfile.mkdtemp() assert_not_equals(temp_dir2, temp_dir1) e = syn._getWithEntityBundle(entityBundle=bundle, downloadLocation=temp_dir2, ifcollision="overwrite.local") assert_in(bundle["fileHandles"][0]["fileName"], e.files) assert_is_not_none(e.path) assert_true(utils.equal_paths(os.path.dirname(e.path), temp_dir2)) # 4. ---------------------------------------------------------------------- # test preservation of local state url = 'http://foo.com/secretstuff.txt' # need to create a bundle with externalURL externalURLBundle = dict(bundle) externalURLBundle['fileHandles'][0]['externalURL'] = url e = File(name='anonymous', parentId="syn12345", synapseStore=False, externalURL=url) e.local_state({'zap': 'pow'}) e = syn._getWithEntityBundle(entityBundle=externalURLBundle, entity=e) assert_equal(e.local_state()['zap'], 'pow') assert_equal(e.synapseStore, False) assert_equal(e.externalURL, url)
def _copyFile(syn, entity, destinationId, version=None, updateExisting=False, setProvenance="traceback", skipCopyAnnotations=False): """ Copies most recent version of a file to a specified synapse ID. :param entity: A synapse ID of a File entity :param destinationId: Synapse ID of a folder/project that the file wants to be copied to :param version: Can specify version of a file. Default to None :param updateExisting: Can choose to update files that have the same name Default to False :param setProvenance: Has three values to set the provenance of the copied entity: traceback: Sets to the source entity existing: Sets to source entity's original provenance (if it exists) None: No provenance is set :param skipCopyAnnotations: Skips copying the annotations Default is False """ ent = syn.get(entity, downloadFile=False, version=version, followLink=False) # CHECK: If File is in the same parent directory (throw an error) (Can choose to update files) if not updateExisting: existingEntity = syn.findEntityId(ent.name, parent=destinationId) if existingEntity is not None: raise ValueError('An entity named "%s" already exists in this location. File could not be copied' % ent.name) profile = syn.getUserProfile() # get provenance earlier to prevent errors from being called in the end # If traceback, set activity to old entity if setProvenance == "traceback": act = Activity("Copied file", used=ent) # if existing, check if provenance exists elif setProvenance == "existing": try: act = syn.getProvenance(ent.id) except SynapseHTTPError as e: if e.response.status_code == 404: act = None else: raise e elif setProvenance is None or setProvenance.lower() == 'none': act = None else: raise ValueError('setProvenance must be one of None, existing, or traceback') # Grab entity bundle bundle = syn._getEntityBundle(ent.id, version=ent.versionNumber, bitFlags=0x800 | 0x1) fileHandle = synapseclient.utils.find_data_file_handle(bundle) createdBy = fileHandle['createdBy'] # CHECK: If the user created the file, copy the file by using fileHandleId else copy the fileHandle if profile.ownerId == createdBy: newdataFileHandleId = ent.dataFileHandleId else: copiedFileHandle = copyFileHandles(syn, [fileHandle], ["FileEntity"], [bundle['entity']['id']], [fileHandle['contentType']], [fileHandle['fileName']]) # Check if failurecodes exist copyResult = copiedFileHandle['copyResults'][0] if copyResult.get("failureCode") is not None: raise ValueError("%s dataFileHandleId: %s" % (copyResult["failureCode"], copyResult['originalFileHandleId'])) newdataFileHandleId = copyResult['newFileHandle']['id'] new_ent = File(dataFileHandleId=newdataFileHandleId, name=ent.name, parentId=destinationId) # Set annotations here if not skipCopyAnnotations: new_ent.annotations = ent.annotations # Store provenance if act is not None if act is not None: new_ent = syn.store(new_ent, activity=act) else: new_ent = syn.store(new_ent) # Leave this return statement for test return new_ent['id']
def test_getWithEntityBundle(download_file_mock): ## Note: one thing that remains unexplained is why the previous version of ## this test worked if you had a .cacheMap file of the form: ## {"/Users/chris/.synapseCache/663/-1337/anonymous": "2014-09-15T22:54:57.000Z", ## "/var/folders/ym/p7cr7rrx4z7fw36sxv04pqh00000gq/T/tmpJ4nz8U": "2014-09-15T23:27:25.000Z"} ## ...but failed if you didn't. ## TODO: Uncomment failing asserts after SYNR-790 and SYNR-697 are fixed bundle = { 'entity': { 'id': 'syn10101', 'name': 'anonymous', 'dataFileHandleId': '-1337', 'concreteType': 'org.sagebionetworks.repo.model.FileEntity', 'parentId': 'syn12345'}, 'fileHandles': [{ 'concreteType': 'org.sagebionetworks.repo.model.file.S3FileHandle', 'fileName': 'anonymous', 'contentType': 'application/flapdoodle', 'contentMd5': '1698d26000d60816caab15169efcd23a', 'id': '-1337'}], 'annotations': {}} fileHandle = bundle['fileHandles'][0]['id'] cacheDir = syn.cache.get_cache_dir(fileHandle) print "cacheDir=", cacheDir # Make sure the .cacheMap file does not already exist cacheMap = os.path.join(cacheDir, '.cacheMap') if os.path.exists(cacheMap): print "removing cacheMap file: ", cacheMap os.remove(cacheMap) def _downloadFileEntity(entity, path, submission): print "mock downloading file to:", path ## touch file at path with open(path, 'a'): os.utime(path, None) dest_dir, filename = os.path.split(path) return {"path": path, "files": [filename], "cacheDir": dest_dir} download_file_mock.side_effect = _downloadFileEntity # 1. ---------------------------------------------------------------------- # download file to an alternate location temp_dir1 = tempfile.mkdtemp() print "temp_dir1=", temp_dir1 e = syn._getWithEntityBundle(entityBundle=bundle, downloadLocation=temp_dir1, ifcollision="overwrite.local") print e assert e.name == bundle["entity"]["name"] assert e.parentId == bundle["entity"]["parentId"] assert e.cacheDir == temp_dir1 assert bundle["fileHandles"][0]["fileName"] in e.files assert e.path == os.path.join(temp_dir1, bundle["fileHandles"][0]["fileName"]) # 2. ---------------------------------------------------------------------- # get without specifying downloadLocation e = syn._getWithEntityBundle(entityBundle=bundle, ifcollision="overwrite.local") print e assert e.name == bundle["entity"]["name"] assert e.parentId == bundle["entity"]["parentId"] assert bundle["fileHandles"][0]["fileName"] in e.files # 3. ---------------------------------------------------------------------- # download to another location temp_dir2 = tempfile.mkdtemp() assert temp_dir2 != temp_dir1 e = syn._getWithEntityBundle(entityBundle=bundle, downloadLocation=temp_dir2, ifcollision="overwrite.local") print "temp_dir2=", temp_dir2 print e assert_in(bundle["fileHandles"][0]["fileName"], e.files) assert e.path is not None assert_equal( os.path.dirname(e.path), temp_dir2 ) # 4. ---------------------------------------------------------------------- ## test preservation of local state url = 'http://foo.com/secretstuff.txt' e = File(name='anonymous', parentId="syn12345", synapseStore=False, externalURL=url) e.local_state({'zap':'pow'}) e = syn._getWithEntityBundle(entityBundle=bundle, entity=e) assert e.local_state()['zap'] == 'pow' assert e.synapseStore == False assert e.externalURL == url
def test_get_and_store(): """Test synapse.get and synapse.store in Project, Folder and File""" ## create project project = Project(name=str(uuid.uuid4()), description='A bogus test project') project = syn.store(project) schedule_for_cleanup(project) ## create folder folder = Folder('Bad stuff', parent=project, description='The rejects from the other fauxldurr', pi=3) folder = syn.store(folder) ## get folder folder = syn.get(folder.id) assert folder.name == 'Bad stuff' assert folder.parentId == project.id assert folder.description == 'The rejects from the other fauxldurr' assert folder.pi[0] == 3 ## update folder folder.pi = 3.14159265359 folder.description = 'The rejects from the other folder' syn.store(folder) ## verify that the updates stuck folder = syn.get(folder) assert folder.name == 'Bad stuff' assert folder.parentId == project.id assert folder.description == 'The rejects from the other folder' assert folder.pi[0] == 3.14159265359 ## upload a File path = utils.make_bogus_data_file() schedule_for_cleanup(path) random_data = File(path, parent=folder, description='Random data', foo=9844) random_data = syn.store(random_data) ## make sure file comes back intact random_data_2 = syn.downloadEntity(random_data) assert filecmp.cmp(path, random_data_2.path) assert random_data.foo[0] == 9844 ## update with a new File new_file_path = utils.make_bogus_data_file() schedule_for_cleanup(new_file_path) random_data.path = new_file_path random_data.foo = 1266 random_data = syn.store(random_data) ## should be version 2 assert random_data.versionNumber == 2 ## make sure the updates stuck random_data_2 = syn.get(random_data) assert random_data_2.path is not None assert filecmp.cmp(new_file_path, random_data_2.path) assert random_data_2.foo[0] == 1266 assert random_data_2.versionNumber == 2 ## make sure we can still get the older version of file old_random_data = syn.get(random_data.id, version=1) assert filecmp.cmp(old_random_data.path, path)