def download_with_exception(self, output_file_path, expected_file_size): """ Simulates downloading a file of the given size: "true_file_size". During the transfer, an exception happens and the temporary file is either deleted or kept, depending on the TEMPORARY_FILES_SHOULD_BE_DELETED flag. """ number_of_retries = 2 not_enough_bytes = int(expected_file_size / 3 - 1000) # First, normal GET request: self.file_can_be_downloaded(self.create_input_file(not_enough_bytes)) # First retry attempt: self.file_can_be_downloaded(self.create_input_file(not_enough_bytes)) # Second, last retry attempt: self.file_can_be_downloaded(self.create_input_file(not_enough_bytes)) with self.assertRaises(Exception) as context_manager: pyega3.download_file_retry(('', ''), 'test_file_id1', output_file_path, output_file_path, expected_file_size, 'check_sum', 1, None, output_file_path, None, number_of_retries, 0.1) exception_message = str(context_manager.exception) self.assertRegex(exception_message, r'Slice error: received=\d+, requested=\d+') self.assertFalse(os.path.exists(output_file_path))
def test_temp_files_are_deleted_automatically_if_there_are_no_exceptions(self): """ The temporary files are deleted by the algorithm automatically, during the happy path, when the temporary files are assembled into the final, big file. There's no need for extra deleting-mechanism. """ pyega3.TEMPORARY_FILES_SHOULD_BE_DELETED = False file_size_without_iv = 92700 file_size_with_iv = file_size_without_iv + 16 self.server_config_file_loaded_successfully() self.user_has_authenticated_successfully() input_file = bytearray(os.urandom(file_size_without_iv)) self.file_can_be_downloaded(input_file) output_file_path = self.create_output_file_path() pyega3.download_file_retry(('', ''), 'test_file_id1', output_file_path, output_file_path, file_size_with_iv, 'check_sum', 1, None, output_file_path, None, 2, 0.1) temp_file = pyega3.TEMPORARY_FILES.pop() # The temporary file should not exist because everything went fine, # and it was deleted automatically: self.assertFalse(os.path.exists(temp_file)) self.assertTrue(os.path.exists(output_file_path)) output_file_size = os.stat(output_file_path).st_size self.assertEqual(output_file_size, file_size_without_iv) os.remove(output_file_path)
def testhtsget(self, identifier, ref, start, end, type, saveTo): display_file_name, file_name, file_size, check_sum = ega.get_file_name_size_md5( self.token, identifier) genomic_range_args = (ref, check_sum, start, end, type) print(display_file_name) ega.download_file_retry(self.credentials, identifier, display_file_name, file_name, file_size, check_sum, 3, self.key, saveTo, genomic_range_args, -1, 10)
def test_second_attempt_succeeds(self): """ It was not possible to download the whole file on the first download attempt, so the script retries for a second time and continues from where it stopped on the first attempt. """ pyega3.TEMPORARY_FILES_SHOULD_BE_DELETED = False file_size_without_iv = 92700 file_size_with_iv = file_size_without_iv + 16 self.server_config_file_loaded_successfully() self.user_has_authenticated_successfully() amount_of_missing_bytes = 123 file_size_with_missing_bytes = file_size_without_iv - amount_of_missing_bytes input_file_with_few_bytes_missing = bytearray(os.urandom(file_size_with_missing_bytes)) self.file_can_be_downloaded(input_file_with_few_bytes_missing) rest_of_the_input_file = bytearray(os.urandom(amount_of_missing_bytes)) self.file_can_be_downloaded(rest_of_the_input_file) output_file_path = self.create_output_file_path() pyega3.download_file_retry(('', ''), 'test_file_id1', output_file_path, output_file_path, file_size_with_iv, 'check_sum', 1, None, output_file_path, None, 2, 0.1) temp_file = pyega3.TEMPORARY_FILES.pop() # The temporary file should not exist because everything went fine, # and it was deleted automatically: self.assertFalse(os.path.exists(temp_file)) self.assertEqual(responses.calls[1].request.headers.get('Range'), 'bytes=0-92699') self.assertEqual(responses.calls[2].request.headers.get('Range'), 'bytes=92577-92699') self.assertEqual(responses.calls[2].request.headers.get('Range'), 'bytes={}-92699' .format(file_size_with_missing_bytes)) self.assertTrue(os.path.exists(output_file_path)) output_file_size = os.stat(output_file_path).st_size self.assertEqual(output_file_size, file_size_without_iv) os.remove(output_file_path)
def test_download_file(self,mocked_remove): file_id = "EGAF00000000001" url = "https://ega.ebi.ac.uk:8051/elixir/data/files/{}".format(file_id) good_token = rand_str() mem = virtual_memory().available file_sz = random.randint(1, mem//512) file_name = "resulting.file" file_contents = os.urandom(file_sz) file_md5 = hashlib.md5(file_contents).hexdigest() mocked_files = {} def open_wrapper(filename, mode): filename = os.path.basename(filename) if filename not in mocked_files : mocked_files[filename] = bytearray() content = bytes(mocked_files[filename]) content_len = len(content) read_buf_sz = 65536 file_object = mock.mock_open(read_data=content).return_value file_object.__iter__.return_value = [content[i:min(i+read_buf_sz,content_len)] for i in range(0,content_len,read_buf_sz)] file_object.write.side_effect = lambda write_buf: mocked_files[filename].extend(write_buf) return file_object def parse_ranges(s): return tuple(map(int,re.match(r'^bytes=(\d+)-(\d+)$', s).groups())) def request_callback(request): auth_hdr = request.headers['Authorization'] if auth_hdr is None or auth_hdr != 'Bearer ' + good_token: return ( 400, {}, json.dumps({"error_description": "invalid token"}) ) start, end = parse_ranges( request.headers['Range'] ) self.assertLess(start,end) return ( 200, {}, file_contents[start:end+1] ) responses.add_callback( responses.GET, url, callback=request_callback ) with mock.patch('builtins.open', new=open_wrapper): with mock.patch('os.makedirs', lambda path: None): with mock.patch('os.path.exists', lambda path: os.path.basename(path) in mocked_files): def os_stat_mock(fn): fn=os.path.basename(fn) X = namedtuple('X','st_size f1 f2 f3 f4 f5 f6 f7 f8 f9') sr = [None] * 10; sr[0]=len(mocked_files[fn]); return X(*sr) with mock.patch('os.stat', os_stat_mock): with mock.patch( 'os.rename', lambda s,d: mocked_files.__setitem__(os.path.basename(d),mocked_files.pop(os.path.basename(s))) ): pyega3.download_file( # add 16 bytes to file size ( IV adjustment ) good_token, file_id, file_name+".cip", file_sz+16, file_md5, 1, None, output_file=None ) self.assertEqual( file_contents, mocked_files[file_name] ) pyega3.download_file_retry( good_token, file_id, file_name+".cip", file_sz+16, file_md5, 1, None, output_file=None ) wrong_md5 = "wrong_md5_exactly_32_chars_longg" with self.assertRaises(Exception): pyega3.download_file( good_token, file_id, file_name+".cip", file_sz+16, wrong_md5, 1, None, output_file=None ) mocked_remove.assert_has_calls( [ mock.call(os.path.join( os.getcwd(), file_id, os.path.basename(f) )) for f in list(mocked_files.keys())[1:] ], any_order=True ) with self.assertRaises(ValueError): pyega3.download_file_retry( "", "", "", 0, 0, 1, "key", output_file=None ) pyega3.download_file( "", "", "test.gpg", 0, 0, 1, None, output_file=None )
def test_download_file(self, mocked_remove): file_id = "EGAF00000000001" url = "https://ega.ebi.ac.uk:8052/elixir/data/files/{}".format(file_id) good_creds = { "username": rand_str(), "password": rand_str(), "client_secret": rand_str() } mem = virtual_memory().available file_sz = random.randint(1, mem // 512) file_name = "resulting.file" file_contents = os.urandom(file_sz) file_md5 = hashlib.md5(file_contents).hexdigest() mocked_files = {} def open_wrapper(filename, mode): filename = os.path.basename(filename) if filename not in mocked_files: if 'r' in mode: raise Exception( "Attempt to read mock file before it was created.") mocked_files[filename] = bytearray() content = bytes(mocked_files[filename]) content_len = len(content) read_buf_sz = 65536 file_object = mock.mock_open(read_data=content).return_value file_object.__iter__.return_value = [ content[i:min(i + read_buf_sz, content_len)] for i in range(0, content_len, read_buf_sz) ] file_object.write.side_effect = lambda write_buf: mocked_files[ filename].extend(write_buf) return file_object def parse_ranges(s): return tuple(map(int, re.match(r'^bytes=(\d+)-(\d+)$', s).groups())) def request_callback(request): auth_hdr = request.headers['Authorization'] if auth_hdr is None or auth_hdr != 'Bearer ' + 'good_token': return (400, {}, json.dumps({"error_description": "invalid token"})) start, end = parse_ranges(request.headers['Range']) self.assertLess(start, end) return (200, {}, file_contents[start:end + 1]) responses.add_callback(responses.GET, url, callback=request_callback) with mock.patch('builtins.open', new=open_wrapper): with mock.patch('os.makedirs', lambda path: None): with mock.patch( 'os.path.exists', lambda path: os.path.basename(path) in mocked_files): def os_stat_mock(fn): fn = os.path.basename(fn) X = namedtuple('X', 'st_size f1 f2 f3 f4 f5 f6 f7 f8 f9') sr = [None] * 10 sr[0] = len(mocked_files[fn]) return X(*sr) with mock.patch('os.stat', os_stat_mock): with mock.patch( 'os.rename', lambda s, d: mocked_files.__setitem__( os.path.basename(d), mocked_files.pop(os.path.basename(s)))): pyega3.download_file_retry( # add 16 bytes to file size ( IV adjustment ) good_creds, file_id, file_name + ".cip", file_sz + 16, file_md5, 1, None, output_file=None, genomic_range_args=None, max_retries=5, retry_wait=5) self.assertEqual(file_contents, mocked_files[file_name]) # to cover 'local file exists' case pyega3.download_file_retry(good_creds, file_id, file_name + ".cip", file_sz + 16, file_md5, 1, None, output_file=None, genomic_range_args=None, max_retries=5, retry_wait=5) wrong_md5 = "wrong_md5_exactly_32_chars_longg" with self.assertRaises(Exception): pyega3.download_file_retry( good_creds, file_id, file_name + ".cip", file_sz + 16, wrong_md5, 1, None, output_file=None, genomic_range_args=None) mocked_remove.assert_has_calls([ mock.call( os.path.join(os.getcwd(), file_id, os.path.basename(f))) for f in list(mocked_files.keys()) if not file_name in f ], any_order=True) with mock.patch('htsget.get') as mocked_htsget: pyega3.download_file_retry( good_creds, file_id, file_name + ".cip", file_sz + 16, file_md5, 1, None, output_file=None, genomic_range_args=("chr1", None, 1, 100, None), max_retries=5, retry_wait=5) args, kwargs = mocked_htsget.call_args self.assertEqual( args[0], 'https://ega.ebi.ac.uk:8052/elixir/tickets/tickets/files/EGAF00000000001' ) self.assertEqual(kwargs.get('reference_name'), 'chr1') self.assertEqual(kwargs.get('reference_md5'), None) self.assertEqual(kwargs.get('start'), 1) self.assertEqual(kwargs.get('end'), 100) self.assertEqual(kwargs.get('data_format'), None) with self.assertRaises(ValueError): pyega3.download_file_retry("", "", "", 0, 0, 1, "key", output_file=None, genomic_range_args=None, max_retries=5, retry_wait=5) pyega3.download_file_retry("", "", "test.gpg", 0, 0, 1, None, output_file=None, genomic_range_args=None, max_retries=5, retry_wait=5)