예제 #1
0
 def assert_data_transfer_ok(self, test_instances, max_retries=0):
     self.httpd.test_instances = test_instances
     htsget.get(
         TestRequestHandler.ticket_url, self.output_file, max_retries=max_retries)
     self.output_file.seek(0)
     all_data = b"".join(test_instance.data for test_instance in test_instances)
     self.assertEqual(self.output_file.read(), all_data)
def htsget_api(
        url, filename, reference_name=None, reference_md5=None, start=None, end=None,
        data_format=None):
    logging.info("htsget-api: request (name='{}', md5='{}', start={}, end={})".format(
        reference_name, reference_md5, start, end))
    with open(filename, "wb") as tmp:
        htsget.get(
            url, tmp, reference_name=reference_name, reference_md5=reference_md5,
            start=start, end=end, data_format=data_format, timeout=120)
예제 #3
0
def download_file_retry(creds, file_id, file_name, file_size, check_sum,
                        num_connections, key, output_file, genomic_range_args,
                        max_retries, retry_wait):
    time0 = time.time()
    token = get_token(creds)

    if file_name.endswith(".gpg"):
        logging.info("GPG files are not supported")
        return

    logging.info("File Id: '{}'({} bytes).".format(file_id, file_size))

    if output_file is None:
        output_file = generate_output_filename(os.getcwd(), file_id, file_name,
                                               genomic_range_args)
    dir = os.path.dirname(output_file)
    if not os.path.exists(dir) and len(dir) > 0:
        os.makedirs(dir)

    if is_genomic_range(genomic_range_args):
        with open(output_file, 'wb') as output:
            htsget.get(
                URL_API + "/tickets/files/{}".format(file_id),
                output,
                reference_name=genomic_range_args[0],
                reference_md5=genomic_range_args[1],
                start=genomic_range_args[2],
                end=genomic_range_args[3],
                data_format=genomic_range_args[4],
                max_retries=sys.maxsize if max_retries < 0 else max_retries,
                retry_wait=retry_wait,
                bearer_token=token)
        print_local_file_info_genomic_range('Saved to : ', output_file,
                                            genomic_range_args)
        return

    done = False
    num_retries = 0
    while not done:
        try:
            if time.time() - time0 > 1 * 60 * 60:  # token expires in 1 hour
                time0 = time.time()
                token = get_token(creds)
            download_file(token, file_id, file_size, check_sum,
                          num_connections, key, output_file)
            done = True
        except Exception as e:
            logging.info(e)
            if num_retries == max_retries:
                raise e
            time.sleep(retry_wait)
            num_retries += 1
            logging.info("retry attempt {}".format(num_retries))
예제 #4
0
파일: test_errors.py 프로젝트: ga4gh/htsget
 def test_404(self):
     body = "XXXX"
     returned_response = MockedErrorResponse(404, body)
     with mock.patch("requests.get", return_value=returned_response):
         with tempfile.TemporaryFile("wb+") as f:
             try:
                 htsget.get("http://some_url", f)
             except exceptions.ClientError as cse:
                 s = str(cse)
                 self.assertIn(body, s)
             else:
                 self.assertFalse(True)
예제 #5
0
def download_file_retry(token, file_id, file_name, file_size, check_sum,
                        num_connections, key, output_file, genomic_range_args):
    max_retries = 50
    retry_wait = 5

    if file_name.endswith(".gpg"):
        logging.info("GPG files are not supported")
        return

    logging.info("File Id: '{}'({} bytes).".format(file_id, file_size))

    if output_file is None:
        output_file = generate_output_filename(os.getcwd(), file_id, file_name,
                                               genomic_range_args)
    dir = os.path.dirname(output_file)
    if not os.path.exists(dir) and len(dir) > 0: os.makedirs(dir)

    if is_genomic_range(genomic_range_args):
        with open(output_file, 'wb') as output:
            htsget.get(
                "https://ega.ebi.ac.uk:8051/elixir/data/tickets/files/{}".
                format(file_id),
                output,
                reference_name=genomic_range_args[0],
                reference_md5=genomic_range_args[1],
                start=genomic_range_args[2],
                end=genomic_range_args[3],
                data_format=genomic_range_args[4],
                max_retries=max_retries,
                retry_wait=retry_wait,
                bearer_token=token)
        print_local_file_info_genomic_range('Saved to : ', output_file,
                                            genomic_range_args)
        return

    done = False
    num_retries = 0
    while not done:
        try:
            download_file(token, file_id, file_size, check_sum,
                          num_connections, key, output_file)
            done = True
        except Exception as e:
            logging.info(e)
            if num_retries == max_retries:
                raise e
            time.sleep(retry_wait)
            num_retries += 1
            logging.info("retry attempt {}".format(num_retries))
예제 #6
0
파일: cli.py 프로젝트: ga4gh/htsget
def run(args):
    log_level = logging.WARNING
    if args.verbose == 1:
        log_level = logging.INFO
    elif args.verbose >= 2:
        log_level = logging.DEBUG
    logging.basicConfig(format='%(asctime)s %(message)s', level=log_level)

    if args.output is not None:
        output = open(args.output, "wb")
    else:
        # This is an awkard hack to get things to work on Python 2 and 3. In Python 3,
        # if we want to write bytes directly, we need to get the underlying buffer.
        # This isn't a problem in Python 2, which doesn't have a buffer. Also, to
        # facilitate testing, we allow stdout to be swapped out for a different file
        # handle.
        try:
            output = sys.stdout.buffer
        except AttributeError:
            output = sys.stdout
        if args.max_retries != 0:
            logging.warn(
                "Cannot retry failed transfers when writing to stdout. Setting "
                "max_retries to zero")
            args.max_retries = 0
    exit_status = 1
    try:
        htsget.get(args.url,
                   output,
                   reference_name=args.reference_name,
                   reference_md5=args.reference_md5,
                   start=args.start,
                   end=args.end,
                   data_format=args.format,
                   max_retries=args.max_retries,
                   retry_wait=args.retry_wait,
                   timeout=args.timeout,
                   bearer_token=args.bearer_token)
        exit_status = 0
    except exceptions.ExceptionWrapper as ew:
        error_message(str(ew))
    except exceptions.HtsgetException as he:
        error_message(str(he))
    except KeyboardInterrupt:
        error_message("interrupted")
    finally:
        if output is not sys.stdout:
            output.close()
    sys.exit(exit_status)
예제 #7
0
파일: test_io.py 프로젝트: ga4gh/htsget
 def test_no_bearer_token(self):
     ticket_url = "http://ticket.com"
     ticket = {"htsget": {"urls": []}}
     returned_response = MockedTicketResponse(json.dumps(ticket).encode())
     with mock.patch("requests.get", return_value=returned_response) as mocked_get:
         with tempfile.NamedTemporaryFile("wb+") as f:
             htsget.get(ticket_url, f)
             f.seek(0)
             self.assertEqual(f.read(), b"")
         # Because we have no URLs in the returned ticked, it should be called
         # only once.
         self.assertEqual(mocked_get.call_count, 1)
         # Note that we only get the arguments for the last call using this method.
         args, kwargs = mocked_get.call_args
         self.assertEqual(args[0], ticket_url)
         headers = {}
         self.assertEqual(kwargs["headers"], headers)
         self.assertEqual(kwargs["stream"], True)
예제 #8
0
def htsget_api(url,
               filename,
               reference_name=None,
               reference_md5=None,
               start=None,
               end=None,
               data_format=None):
    logging.info(
        "htsget-api: request (name='{}', md5='{}', start={}, end={})".format(
            reference_name, reference_md5, start, end))
    with open(filename, "wb") as tmp:
        htsget.get(url,
                   tmp,
                   reference_name=reference_name,
                   reference_md5=reference_md5,
                   start=start,
                   end=end,
                   data_format=data_format,
                   timeout=120)
예제 #9
0
파일: test_io.py 프로젝트: jmtcsngr/htsget
 def test_simple_case(self):
     ticket_url = "http://ticket.com"
     data_url = "http://data.url.com"
     headers = {"a": "a", "xyz": "ghj"}
     ticket = {"htsget": {
         "urls": [{"url": data_url, "headers": headers}]}}
     data = b"0" * 1024
     returned_response = MockedResponse(ticket, data)
     with mock.patch("requests.get", return_value=returned_response) as mocked_get:
         with tempfile.NamedTemporaryFile("wb+") as f:
             htsget.get(ticket_url, f)
             f.seek(0)
             self.assertEqual(f.read(), data)
         self.assertEqual(mocked_get.call_count, 2)
         # Note that we only get the arguments for the last call using this method.
         args, kwargs = mocked_get.call_args
         self.assertEqual(args[0], data_url)
         self.assertEqual(kwargs["headers"], headers)
         self.assertEqual(kwargs["stream"], True)
예제 #10
0
def download_file_retry(creds, file_id, display_file_name, file_name,
                        file_size, check_sum, num_connections, key,
                        output_file, genomic_range_args, max_retries,
                        retry_wait):
    time0 = time.time()
    token = get_token(creds)

    if file_name.endswith(".gpg"):
        logging.info(
            "GPG files are not supported, please use the Java client - https://ega-archive.org/download/using-ega-download-client"
        )
        return

    logging.info("File Id: '{}'({} bytes).".format(file_id, file_size))

    if output_file is None:
        output_file = generate_output_filename(os.getcwd(), file_id,
                                               display_file_name,
                                               genomic_range_args)
    dir = os.path.dirname(output_file)
    if not os.path.exists(dir) and len(dir) > 0: os.makedirs(dir)

    hdd = psutil.disk_usage(os.getcwd())
    logging.info("Total space : {:.2f} GiB".format(hdd.total / (2**30)))
    logging.info("Used space : {:.2f} GiB".format(hdd.used / (2**30)))
    logging.info("Free space : {:.2f} GiB".format(hdd.free / (2**30)))

    if is_genomic_range(genomic_range_args):
        with open(output_file, 'wb') as output:
            htsget.get(
                URL_API_TICKET + "/files/{}".format(file_id),
                output,
                reference_name=genomic_range_args[0],
                reference_md5=genomic_range_args[1],
                start=genomic_range_args[2],
                end=genomic_range_args[3],
                data_format=genomic_range_args[4],
                max_retries=sys.maxsize if max_retries < 0 else max_retries,
                retry_wait=retry_wait,
                bearer_token=token)
        print_local_file_info_genomic_range('Saved to : ', output_file,
                                            genomic_range_args)
        return

    done = False
    num_retries = 0
    while not done:
        try:
            if time.time() - time0 > 1 * 60 * 60:  # token expires in 1 hour
                time0 = time.time()
                token = get_token(creds)
            download_file(token, file_id, file_size, check_sum,
                          num_connections, key, output_file)
            done = True
        except Exception as e:
            logging.exception(e)
            if num_retries == max_retries:
                if TEMPORARY_FILES_SHOULD_BE_DELETED:
                    delete_temporary_files(TEMPORARY_FILES)

                raise e
            time.sleep(retry_wait)
            num_retries += 1
            logging.info("retry attempt {}".format(num_retries))