def test_ftp_download(): """Test downloading an Entity that points to a file on an FTP server. """ # Another test with an external reference. This is because we only need to test FTP download; not upload. Also so we don't have to maintain an FTP server just for this purpose. # Make an entity that points to an FTP server file. entity = File(parent=project['id'], name='1KB.zip') fileHandle = {} fileHandle['externalURL'] = 'ftp://speedtest.tele2.net/1KB.zip' fileHandle["fileName"] = entity.name fileHandle["contentType"] = "application/zip" fileHandle["contentMd5"] = '0f343b0931126a20f133d67c2b018a3b' fileHandle["contentSize"] = 1024 fileHandle[ "concreteType"] = "org.sagebionetworks.repo.model.file.ExternalFileHandle" fileHandle = syn.restPOST('/externalFileHandle', json.dumps(fileHandle), syn.fileHandleEndpoint) entity.dataFileHandleId = fileHandle['id'] entity = syn.store(entity) # Download the entity and check that MD5 matches expected FTPfile = syn.get(entity.id, downloadLocation=os.getcwd(), downloadFile=True) assert FTPfile.md5 == utils.md5_for_file(FTPfile.path).hexdigest() schedule_for_cleanup(entity) os.remove(FTPfile.path)
def test_resume_partial_download(): original_file = utils.make_bogus_data_file(40000) original_md5 = utils.md5_for_file(original_file).hexdigest() entity = File(original_file, parent=project['id']) entity = syn.store(entity) ## stash the original file for comparison later shutil.move(original_file, original_file+'.original') original_file += '.original' schedule_for_cleanup(original_file) temp_dir = tempfile.gettempdir() url = '%s/entity/%s/file' % (syn.repoEndpoint, entity.id) path = syn._download(url, destination=temp_dir, file_handle_id=entity.dataFileHandleId, expected_md5=entity.md5) ## simulate an imcomplete download by putting the ## complete file back into its temporary location tmp_path = utils.temp_download_filename(temp_dir, entity.dataFileHandleId) shutil.move(path, tmp_path) ## ...and truncating it to some fraction of its original size with open(tmp_path, 'r+') as f: f.truncate(3*os.path.getsize(original_file)//7) ## this should complete the partial download path = syn._download(url, destination=temp_dir, file_handle_id=entity.dataFileHandleId, expected_md5=entity.md5) assert filecmp.cmp(original_file, path), "File comparison failed"
def test_pool_provider_is_used_in__multipart_upload(): mocked_get_chunk_function = MagicMock(side_effect=[1, 2, 3, 4]) file_size = 1 * MB filepath = make_bogus_binary_file(n=file_size) md5 = md5_for_file(filepath).hexdigest() status = {'partsState': {}, 'uploadId': {}, 'state': 'COMPLETED'} pool = MagicMock() with patch.object(syn, "restPOST", return_value=status),\ patch.object(pool_provider, "get_pool", return_value=pool) as mock_provider: _multipart_upload(syn, filepath, "application/octet-stream", mocked_get_chunk_function, md5, file_size) mock_provider.assert_called() pool.map.assert_called()
def test_pool_provider_is_used_in__multipart_upload(): mocked_get_chunk_function = MagicMock(side_effect=[1, 2, 3, 4]) file_size = 1*MB filepath = make_bogus_binary_file(n=file_size) md5 = md5_for_file(filepath).hexdigest() status = {'partsState': {}, 'uploadId': {}, 'state': 'COMPLETED'} pool = MagicMock() with patch.object(syn, "restPOST", return_value=status),\ patch.object(pool_provider, "get_pool", return_value=pool) as mock_provider: _multipart_upload(syn, filepath, "application/octet-stream", mocked_get_chunk_function, md5, file_size) mock_provider.assert_called() pool.map.assert_called()
def test_md5_query(): # Add the same Entity several times path = utils.make_bogus_data_file() schedule_for_cleanup(path) repeated = File(path, parent=project['id'], description='Same data over and over again') # Retrieve the data via MD5 num = 5 stored = [] for i in range(num): repeated.name = 'Repeated data %d.dat' % i stored.append(syn.store(repeated).id) # Although we expect num results, it is possible for the MD5 to be non-unique results = syn.md5Query(utils.md5_for_file(path).hexdigest()) assert str(sorted(stored)) == str(sorted([res['id'] for res in results])) assert len(results) == num
def test_md5_query(): # Create a project then add the same entity several times and retrieve them via MD5 project = create_project() path = utils.make_bogus_data_file() schedule_for_cleanup(path) repeated = File(path, parent=project['id'], description='Same data over and over again') num = 5 for i in range(num): try: repeated.name = 'Repeated data %d.dat' % i syn.store(repeated) except Exception as ex: print ex print ex.response.text results = syn.md5Query(utils.md5_for_file(path).hexdigest()) print [res['id'] for res in results]
def test_md5_query(): # Create a project then add the same entity several times and retrieve them via MD5 project = create_project() path = utils.make_bogus_data_file() schedule_for_cleanup(path) repeated = File(path, parent=project['id'], description='Same data over and over again') num = 5 for i in range(num): try: repeated.name = 'Repeated data %d.dat' % i syn.store(repeated) except Exception as ex: print ex print ex.response.text results = syn.md5Query(utils.md5_for_file(path).hexdigest()) print[res['id'] for res in results] ## Not sure how to make this assertion more accurate ## Although we expect num results, it is possible for the MD5 to be non-unique assert len(results) == num
def test_resume_partial_download(): original_file = utils.make_bogus_data_file(40000) original_md5 = utils.md5_for_file(original_file).hexdigest() entity = File(original_file, parent=project['id']) entity = syn.store(entity) ## stash the original file for comparison later shutil.move(original_file, original_file + '.original') original_file += '.original' schedule_for_cleanup(original_file) temp_dir = tempfile.gettempdir() url = '%s/entity/%s/file' % (syn.repoEndpoint, entity.id) path = syn._download_from_URL(url, destination=temp_dir, fileHandleId=entity.dataFileHandleId, expected_md5=entity.md5) ## simulate an imcomplete download by putting the ## complete file back into its temporary location tmp_path = utils.temp_download_filename(temp_dir, entity.dataFileHandleId) shutil.move(path, tmp_path) ## ...and truncating it to some fraction of its original size with open(tmp_path, 'r+') as f: f.truncate(3 * os.path.getsize(original_file) // 7) ## this should complete the partial download path = syn._download_from_URL(url, destination=temp_dir, fileHandleId=entity.dataFileHandleId, expected_md5=entity.md5) assert filecmp.cmp(original_file, path), "File comparison failed"