def download_with_exception(self, output_file_path, expected_file_size):
        """
        Simulates downloading a file of the given size: "true_file_size".
        During the transfer, an exception happens and the temporary file is either deleted
        or kept, depending on the TEMPORARY_FILES_SHOULD_BE_DELETED flag.
        """

        number_of_retries = 2
        not_enough_bytes = int(expected_file_size / 3 - 1000)

        # First, normal GET request:
        self.file_can_be_downloaded(self.create_input_file(not_enough_bytes))
        # First retry attempt:
        self.file_can_be_downloaded(self.create_input_file(not_enough_bytes))
        # Second, last retry attempt:
        self.file_can_be_downloaded(self.create_input_file(not_enough_bytes))

        with self.assertRaises(Exception) as context_manager:
            pyega3.download_file_retry(('', ''), 'test_file_id1', output_file_path, output_file_path,
                                            expected_file_size, 'check_sum', 1, None, output_file_path, None,
                                            number_of_retries, 0.1)

        exception_message = str(context_manager.exception)
        self.assertRegex(exception_message, r'Slice error: received=\d+, requested=\d+')

        self.assertFalse(os.path.exists(output_file_path))
    def test_temp_files_are_deleted_automatically_if_there_are_no_exceptions(self):
        """
        The temporary files are deleted by the algorithm automatically, during the happy path,
        when the temporary files are assembled into the final, big file.
        There's no need for extra deleting-mechanism.
        """
        pyega3.TEMPORARY_FILES_SHOULD_BE_DELETED = False

        file_size_without_iv = 92700
        file_size_with_iv = file_size_without_iv + 16

        self.server_config_file_loaded_successfully()
        self.user_has_authenticated_successfully()

        input_file = bytearray(os.urandom(file_size_without_iv))
        self.file_can_be_downloaded(input_file)

        output_file_path = self.create_output_file_path()

        pyega3.download_file_retry(('', ''), 'test_file_id1', output_file_path, output_file_path,
                                        file_size_with_iv, 'check_sum', 1, None, output_file_path, None, 2, 0.1)

        temp_file = pyega3.TEMPORARY_FILES.pop()
        # The temporary file should not exist because everything went fine,
        # and it was deleted automatically:
        self.assertFalse(os.path.exists(temp_file))

        self.assertTrue(os.path.exists(output_file_path))
        output_file_size = os.stat(output_file_path).st_size
        self.assertEqual(output_file_size, file_size_without_iv)
        os.remove(output_file_path)
Example #3
0
 def testhtsget(self, identifier, ref, start, end, type, saveTo):
     display_file_name, file_name, file_size, check_sum = ega.get_file_name_size_md5(
         self.token, identifier)
     genomic_range_args = (ref, check_sum, start, end, type)
     print(display_file_name)
     ega.download_file_retry(self.credentials, identifier,
                             display_file_name, file_name, file_size,
                             check_sum, 3, self.key, saveTo,
                             genomic_range_args, -1, 10)
    def test_second_attempt_succeeds(self):
        """
        It was not possible to download the whole file on the first download attempt,
        so the script retries for a second time and continues from where it stopped
        on the first attempt.
        """

        pyega3.TEMPORARY_FILES_SHOULD_BE_DELETED = False

        file_size_without_iv = 92700
        file_size_with_iv = file_size_without_iv + 16

        self.server_config_file_loaded_successfully()
        self.user_has_authenticated_successfully()

        amount_of_missing_bytes = 123
        file_size_with_missing_bytes = file_size_without_iv - amount_of_missing_bytes
        input_file_with_few_bytes_missing = bytearray(os.urandom(file_size_with_missing_bytes))
        self.file_can_be_downloaded(input_file_with_few_bytes_missing)

        rest_of_the_input_file = bytearray(os.urandom(amount_of_missing_bytes))
        self.file_can_be_downloaded(rest_of_the_input_file)

        output_file_path = self.create_output_file_path()

        pyega3.download_file_retry(('', ''), 'test_file_id1', output_file_path, output_file_path,
                                   file_size_with_iv, 'check_sum', 1, None, output_file_path, None, 2, 0.1)

        temp_file = pyega3.TEMPORARY_FILES.pop()
        # The temporary file should not exist because everything went fine,
        # and it was deleted automatically:
        self.assertFalse(os.path.exists(temp_file))

        self.assertEqual(responses.calls[1].request.headers.get('Range'), 'bytes=0-92699')
        self.assertEqual(responses.calls[2].request.headers.get('Range'), 'bytes=92577-92699')
        self.assertEqual(responses.calls[2].request.headers.get('Range'), 'bytes={}-92699'
                         .format(file_size_with_missing_bytes))

        self.assertTrue(os.path.exists(output_file_path))
        output_file_size = os.stat(output_file_path).st_size
        self.assertEqual(output_file_size, file_size_without_iv)
        os.remove(output_file_path)
Example #5
0
    def test_download_file(self,mocked_remove):        
        file_id = "EGAF00000000001"
        url     = "https://ega.ebi.ac.uk:8051/elixir/data/files/{}".format(file_id)        
        good_token = rand_str() 

        mem             = virtual_memory().available
        file_sz         = random.randint(1, mem//512)
        file_name       = "resulting.file"
        file_contents   = os.urandom(file_sz)         
        file_md5        = hashlib.md5(file_contents).hexdigest()

        mocked_files = {}        
        def open_wrapper(filename, mode):
            filename = os.path.basename(filename)
            if filename not in mocked_files :
                mocked_files[filename] = bytearray()
            content     = bytes(mocked_files[filename])
            content_len = len(content)
            read_buf_sz = 65536
            file_object = mock.mock_open(read_data=content).return_value
            file_object.__iter__.return_value = [content[i:min(i+read_buf_sz,content_len)] for i in range(0,content_len,read_buf_sz)]
            file_object.write.side_effect = lambda write_buf: mocked_files[filename].extend(write_buf)
            return file_object

        def parse_ranges(s):
            return tuple(map(int,re.match(r'^bytes=(\d+)-(\d+)$', s).groups()))

        def request_callback(request):
            auth_hdr = request.headers['Authorization']
            if auth_hdr is None or auth_hdr != 'Bearer ' + good_token:
                return ( 400, {}, json.dumps({"error_description": "invalid token"}) )

            start, end = parse_ranges( request.headers['Range'] )
            self.assertLess(start,end)                              
            return ( 200, {}, file_contents[start:end+1] )
                
        responses.add_callback(
            responses.GET, 
            url,
            callback=request_callback
            )                
        with mock.patch('builtins.open', new=open_wrapper): 
             with mock.patch('os.makedirs', lambda path: None):
                with mock.patch('os.path.exists', lambda path: os.path.basename(path) in mocked_files):
                    def os_stat_mock(fn):
                        fn=os.path.basename(fn)                        
                        X = namedtuple('X','st_size f1 f2 f3 f4 f5 f6 f7 f8 f9')
                        sr = [None] * 10; sr[0]=len(mocked_files[fn]); return X(*sr)
                    with mock.patch('os.stat', os_stat_mock):
                        with mock.patch( 'os.rename', lambda s,d: mocked_files.__setitem__(os.path.basename(d),mocked_files.pop(os.path.basename(s))) ):
                            pyega3.download_file( 
                                # add 16 bytes to file size ( IV adjustment )
                                good_token, file_id, file_name+".cip", file_sz+16, file_md5, 1, None, output_file=None ) 
                            self.assertEqual( file_contents, mocked_files[file_name] )

                            pyega3.download_file_retry( 
                                good_token, file_id, file_name+".cip", file_sz+16, file_md5, 1, None, output_file=None ) 

                            wrong_md5 = "wrong_md5_exactly_32_chars_longg"
                            with self.assertRaises(Exception):
                                pyega3.download_file( 
                                    good_token, file_id, file_name+".cip", file_sz+16, wrong_md5, 1, None, output_file=None ) 

                            mocked_remove.assert_has_calls( 
                                [ mock.call(os.path.join( os.getcwd(), file_id, os.path.basename(f) )) for f in list(mocked_files.keys())[1:] ],
                                any_order=True )

        with self.assertRaises(ValueError):
            pyega3.download_file_retry( "", "", "", 0, 0, 1, "key", output_file=None ) 

        pyega3.download_file( "", "", "test.gpg",  0, 0, 1, None, output_file=None ) 
Example #6
0
    def test_download_file(self, mocked_remove):
        file_id = "EGAF00000000001"
        url = "https://ega.ebi.ac.uk:8052/elixir/data/files/{}".format(file_id)
        good_creds = {
            "username": rand_str(),
            "password": rand_str(),
            "client_secret": rand_str()
        }

        mem = virtual_memory().available
        file_sz = random.randint(1, mem // 512)
        file_name = "resulting.file"
        file_contents = os.urandom(file_sz)
        file_md5 = hashlib.md5(file_contents).hexdigest()

        mocked_files = {}

        def open_wrapper(filename, mode):
            filename = os.path.basename(filename)
            if filename not in mocked_files:
                if 'r' in mode:
                    raise Exception(
                        "Attempt to read mock file before it was created.")
                mocked_files[filename] = bytearray()
            content = bytes(mocked_files[filename])
            content_len = len(content)
            read_buf_sz = 65536
            file_object = mock.mock_open(read_data=content).return_value
            file_object.__iter__.return_value = [
                content[i:min(i + read_buf_sz, content_len)]
                for i in range(0, content_len, read_buf_sz)
            ]
            file_object.write.side_effect = lambda write_buf: mocked_files[
                filename].extend(write_buf)
            return file_object

        def parse_ranges(s):
            return tuple(map(int,
                             re.match(r'^bytes=(\d+)-(\d+)$', s).groups()))

        def request_callback(request):
            auth_hdr = request.headers['Authorization']
            if auth_hdr is None or auth_hdr != 'Bearer ' + 'good_token':
                return (400, {},
                        json.dumps({"error_description": "invalid token"}))

            start, end = parse_ranges(request.headers['Range'])
            self.assertLess(start, end)
            return (200, {}, file_contents[start:end + 1])

        responses.add_callback(responses.GET, url, callback=request_callback)
        with mock.patch('builtins.open', new=open_wrapper):
            with mock.patch('os.makedirs', lambda path: None):
                with mock.patch(
                        'os.path.exists',
                        lambda path: os.path.basename(path) in mocked_files):

                    def os_stat_mock(fn):
                        fn = os.path.basename(fn)
                        X = namedtuple('X',
                                       'st_size f1 f2 f3 f4 f5 f6 f7 f8 f9')
                        sr = [None] * 10
                        sr[0] = len(mocked_files[fn])
                        return X(*sr)

                    with mock.patch('os.stat', os_stat_mock):
                        with mock.patch(
                                'os.rename',
                                lambda s, d: mocked_files.__setitem__(
                                    os.path.basename(d),
                                    mocked_files.pop(os.path.basename(s)))):
                            pyega3.download_file_retry(
                                # add 16 bytes to file size ( IV adjustment )
                                good_creds,
                                file_id,
                                file_name + ".cip",
                                file_sz + 16,
                                file_md5,
                                1,
                                None,
                                output_file=None,
                                genomic_range_args=None,
                                max_retries=5,
                                retry_wait=5)
                            self.assertEqual(file_contents,
                                             mocked_files[file_name])

                            # to cover 'local file exists' case
                            pyega3.download_file_retry(good_creds,
                                                       file_id,
                                                       file_name + ".cip",
                                                       file_sz + 16,
                                                       file_md5,
                                                       1,
                                                       None,
                                                       output_file=None,
                                                       genomic_range_args=None,
                                                       max_retries=5,
                                                       retry_wait=5)

                            wrong_md5 = "wrong_md5_exactly_32_chars_longg"
                            with self.assertRaises(Exception):
                                pyega3.download_file_retry(
                                    good_creds,
                                    file_id,
                                    file_name + ".cip",
                                    file_sz + 16,
                                    wrong_md5,
                                    1,
                                    None,
                                    output_file=None,
                                    genomic_range_args=None)

                            mocked_remove.assert_has_calls([
                                mock.call(
                                    os.path.join(os.getcwd(), file_id,
                                                 os.path.basename(f)))
                                for f in list(mocked_files.keys())
                                if not file_name in f
                            ],
                                                           any_order=True)

                            with mock.patch('htsget.get') as mocked_htsget:
                                pyega3.download_file_retry(
                                    good_creds,
                                    file_id,
                                    file_name + ".cip",
                                    file_sz + 16,
                                    file_md5,
                                    1,
                                    None,
                                    output_file=None,
                                    genomic_range_args=("chr1", None, 1, 100,
                                                        None),
                                    max_retries=5,
                                    retry_wait=5)

                            args, kwargs = mocked_htsget.call_args
                            self.assertEqual(
                                args[0],
                                'https://ega.ebi.ac.uk:8052/elixir/tickets/tickets/files/EGAF00000000001'
                            )

                            self.assertEqual(kwargs.get('reference_name'),
                                             'chr1')
                            self.assertEqual(kwargs.get('reference_md5'), None)
                            self.assertEqual(kwargs.get('start'), 1)
                            self.assertEqual(kwargs.get('end'), 100)
                            self.assertEqual(kwargs.get('data_format'), None)

        with self.assertRaises(ValueError):
            pyega3.download_file_retry("",
                                       "",
                                       "",
                                       0,
                                       0,
                                       1,
                                       "key",
                                       output_file=None,
                                       genomic_range_args=None,
                                       max_retries=5,
                                       retry_wait=5)

        pyega3.download_file_retry("",
                                   "",
                                   "test.gpg",
                                   0,
                                   0,
                                   1,
                                   None,
                                   output_file=None,
                                   genomic_range_args=None,
                                   max_retries=5,
                                   retry_wait=5)