def assert_data_transfer_ok(self, test_instances, max_retries=0): self.httpd.test_instances = test_instances htsget.get( TestRequestHandler.ticket_url, self.output_file, max_retries=max_retries) self.output_file.seek(0) all_data = b"".join(test_instance.data for test_instance in test_instances) self.assertEqual(self.output_file.read(), all_data)
def htsget_api( url, filename, reference_name=None, reference_md5=None, start=None, end=None, data_format=None): logging.info("htsget-api: request (name='{}', md5='{}', start={}, end={})".format( reference_name, reference_md5, start, end)) with open(filename, "wb") as tmp: htsget.get( url, tmp, reference_name=reference_name, reference_md5=reference_md5, start=start, end=end, data_format=data_format, timeout=120)
def download_file_retry(creds, file_id, file_name, file_size, check_sum, num_connections, key, output_file, genomic_range_args, max_retries, retry_wait): time0 = time.time() token = get_token(creds) if file_name.endswith(".gpg"): logging.info("GPG files are not supported") return logging.info("File Id: '{}'({} bytes).".format(file_id, file_size)) if output_file is None: output_file = generate_output_filename(os.getcwd(), file_id, file_name, genomic_range_args) dir = os.path.dirname(output_file) if not os.path.exists(dir) and len(dir) > 0: os.makedirs(dir) if is_genomic_range(genomic_range_args): with open(output_file, 'wb') as output: htsget.get( URL_API + "/tickets/files/{}".format(file_id), output, reference_name=genomic_range_args[0], reference_md5=genomic_range_args[1], start=genomic_range_args[2], end=genomic_range_args[3], data_format=genomic_range_args[4], max_retries=sys.maxsize if max_retries < 0 else max_retries, retry_wait=retry_wait, bearer_token=token) print_local_file_info_genomic_range('Saved to : ', output_file, genomic_range_args) return done = False num_retries = 0 while not done: try: if time.time() - time0 > 1 * 60 * 60: # token expires in 1 hour time0 = time.time() token = get_token(creds) download_file(token, file_id, file_size, check_sum, num_connections, key, output_file) done = True except Exception as e: logging.info(e) if num_retries == max_retries: raise e time.sleep(retry_wait) num_retries += 1 logging.info("retry attempt {}".format(num_retries))
def test_404(self): body = "XXXX" returned_response = MockedErrorResponse(404, body) with mock.patch("requests.get", return_value=returned_response): with tempfile.TemporaryFile("wb+") as f: try: htsget.get("http://some_url", f) except exceptions.ClientError as cse: s = str(cse) self.assertIn(body, s) else: self.assertFalse(True)
def download_file_retry(token, file_id, file_name, file_size, check_sum, num_connections, key, output_file, genomic_range_args): max_retries = 50 retry_wait = 5 if file_name.endswith(".gpg"): logging.info("GPG files are not supported") return logging.info("File Id: '{}'({} bytes).".format(file_id, file_size)) if output_file is None: output_file = generate_output_filename(os.getcwd(), file_id, file_name, genomic_range_args) dir = os.path.dirname(output_file) if not os.path.exists(dir) and len(dir) > 0: os.makedirs(dir) if is_genomic_range(genomic_range_args): with open(output_file, 'wb') as output: htsget.get( "https://ega.ebi.ac.uk:8051/elixir/data/tickets/files/{}". format(file_id), output, reference_name=genomic_range_args[0], reference_md5=genomic_range_args[1], start=genomic_range_args[2], end=genomic_range_args[3], data_format=genomic_range_args[4], max_retries=max_retries, retry_wait=retry_wait, bearer_token=token) print_local_file_info_genomic_range('Saved to : ', output_file, genomic_range_args) return done = False num_retries = 0 while not done: try: download_file(token, file_id, file_size, check_sum, num_connections, key, output_file) done = True except Exception as e: logging.info(e) if num_retries == max_retries: raise e time.sleep(retry_wait) num_retries += 1 logging.info("retry attempt {}".format(num_retries))
def run(args): log_level = logging.WARNING if args.verbose == 1: log_level = logging.INFO elif args.verbose >= 2: log_level = logging.DEBUG logging.basicConfig(format='%(asctime)s %(message)s', level=log_level) if args.output is not None: output = open(args.output, "wb") else: # This is an awkard hack to get things to work on Python 2 and 3. In Python 3, # if we want to write bytes directly, we need to get the underlying buffer. # This isn't a problem in Python 2, which doesn't have a buffer. Also, to # facilitate testing, we allow stdout to be swapped out for a different file # handle. try: output = sys.stdout.buffer except AttributeError: output = sys.stdout if args.max_retries != 0: logging.warn( "Cannot retry failed transfers when writing to stdout. Setting " "max_retries to zero") args.max_retries = 0 exit_status = 1 try: htsget.get(args.url, output, reference_name=args.reference_name, reference_md5=args.reference_md5, start=args.start, end=args.end, data_format=args.format, max_retries=args.max_retries, retry_wait=args.retry_wait, timeout=args.timeout, bearer_token=args.bearer_token) exit_status = 0 except exceptions.ExceptionWrapper as ew: error_message(str(ew)) except exceptions.HtsgetException as he: error_message(str(he)) except KeyboardInterrupt: error_message("interrupted") finally: if output is not sys.stdout: output.close() sys.exit(exit_status)
def test_no_bearer_token(self): ticket_url = "http://ticket.com" ticket = {"htsget": {"urls": []}} returned_response = MockedTicketResponse(json.dumps(ticket).encode()) with mock.patch("requests.get", return_value=returned_response) as mocked_get: with tempfile.NamedTemporaryFile("wb+") as f: htsget.get(ticket_url, f) f.seek(0) self.assertEqual(f.read(), b"") # Because we have no URLs in the returned ticked, it should be called # only once. self.assertEqual(mocked_get.call_count, 1) # Note that we only get the arguments for the last call using this method. args, kwargs = mocked_get.call_args self.assertEqual(args[0], ticket_url) headers = {} self.assertEqual(kwargs["headers"], headers) self.assertEqual(kwargs["stream"], True)
def htsget_api(url, filename, reference_name=None, reference_md5=None, start=None, end=None, data_format=None): logging.info( "htsget-api: request (name='{}', md5='{}', start={}, end={})".format( reference_name, reference_md5, start, end)) with open(filename, "wb") as tmp: htsget.get(url, tmp, reference_name=reference_name, reference_md5=reference_md5, start=start, end=end, data_format=data_format, timeout=120)
def test_simple_case(self): ticket_url = "http://ticket.com" data_url = "http://data.url.com" headers = {"a": "a", "xyz": "ghj"} ticket = {"htsget": { "urls": [{"url": data_url, "headers": headers}]}} data = b"0" * 1024 returned_response = MockedResponse(ticket, data) with mock.patch("requests.get", return_value=returned_response) as mocked_get: with tempfile.NamedTemporaryFile("wb+") as f: htsget.get(ticket_url, f) f.seek(0) self.assertEqual(f.read(), data) self.assertEqual(mocked_get.call_count, 2) # Note that we only get the arguments for the last call using this method. args, kwargs = mocked_get.call_args self.assertEqual(args[0], data_url) self.assertEqual(kwargs["headers"], headers) self.assertEqual(kwargs["stream"], True)
def download_file_retry(creds, file_id, display_file_name, file_name, file_size, check_sum, num_connections, key, output_file, genomic_range_args, max_retries, retry_wait): time0 = time.time() token = get_token(creds) if file_name.endswith(".gpg"): logging.info( "GPG files are not supported, please use the Java client - https://ega-archive.org/download/using-ega-download-client" ) return logging.info("File Id: '{}'({} bytes).".format(file_id, file_size)) if output_file is None: output_file = generate_output_filename(os.getcwd(), file_id, display_file_name, genomic_range_args) dir = os.path.dirname(output_file) if not os.path.exists(dir) and len(dir) > 0: os.makedirs(dir) hdd = psutil.disk_usage(os.getcwd()) logging.info("Total space : {:.2f} GiB".format(hdd.total / (2**30))) logging.info("Used space : {:.2f} GiB".format(hdd.used / (2**30))) logging.info("Free space : {:.2f} GiB".format(hdd.free / (2**30))) if is_genomic_range(genomic_range_args): with open(output_file, 'wb') as output: htsget.get( URL_API_TICKET + "/files/{}".format(file_id), output, reference_name=genomic_range_args[0], reference_md5=genomic_range_args[1], start=genomic_range_args[2], end=genomic_range_args[3], data_format=genomic_range_args[4], max_retries=sys.maxsize if max_retries < 0 else max_retries, retry_wait=retry_wait, bearer_token=token) print_local_file_info_genomic_range('Saved to : ', output_file, genomic_range_args) return done = False num_retries = 0 while not done: try: if time.time() - time0 > 1 * 60 * 60: # token expires in 1 hour time0 = time.time() token = get_token(creds) download_file(token, file_id, file_size, check_sum, num_connections, key, output_file) done = True except Exception as e: logging.exception(e) if num_retries == max_retries: if TEMPORARY_FILES_SHOULD_BE_DELETED: delete_temporary_files(TEMPORARY_FILES) raise e time.sleep(retry_wait) num_retries += 1 logging.info("retry attempt {}".format(num_retries))