def test_download_bad_request():
    source_uri = "https://mock.uri"
    dest_path = "file/path/does/not/matter"
    responses.add(responses.GET, source_uri, status=403)

    with pytest.raises(DownloadError):
        download_file(source_uri, dest_path, False)
Ejemplo n.º 2
0
    def _download_http(source_uri, dest_path, version):
        """ Download dataset from Public HTTP URL.

        Args:
            source_uri (str): source url where the file should be downloaded
            dest_path (str): destination path of the file

        Raises:
            DownloadError if the download file failed
            ChecksumError if the download file checksum does not match
        """

        try:
            logger.info("Downloading the dataset.")
            download_file(source_uri=source_uri, dest_path=dest_path)
        except DownloadError as e:
            logger.info(
                f"The request download from {source_uri} -> {dest_path} can't "
                f"be completed.")
            raise e
        expected_checksum = GroceriesReal.GROCERIES_REAL_DATASET_TABLES[
            version].checksum
        try:
            validate_checksum(dest_path, expected_checksum)
        except ChecksumError as e:
            logger.info("Checksum mismatch. Delete the downloaded files.")
            os.remove(dest_path)
            raise e
Ejemplo n.º 3
0
    def download(data_root, version):
        """Downloads dataset zip file and unzips it.

        Args:
            data_root (str): Path where to download the dataset.
            version (str): version of GroceriesReal dataset, e.g. "v1"

        Raises:
             ValueError if the dataset version is not supported
             ChecksumError if the download file checksum does not match
             DownloadError if the download file failed

        Note: Synthetic dataset is downloaded and unzipped to
        data_root/synthetic.
        """
        if version not in SynDetection2D.SYNTHETIC_DATASET_TABLES.keys():
            raise ValueError(
                f"A valid dataset version is required. Available versions are:"
                f"{SynDetection2D.SYNTHETIC_DATASET_TABLES.keys()}"
            )

        source_uri = SynDetection2D.SYNTHETIC_DATASET_TABLES[version].source_uri
        expected_checksum = SynDetection2D.SYNTHETIC_DATASET_TABLES[
            version
        ].checksum
        dataset_file = SynDetection2D.SYNTHETIC_DATASET_TABLES[version].filename

        extract_folder = os.path.join(data_root, const.SYNTHETIC_SUBFOLDER)
        dataset_path = os.path.join(extract_folder, dataset_file)

        if os.path.exists(dataset_path):
            logger.info("The dataset file exists. Skip download.")
            try:
                validate_checksum(dataset_path, expected_checksum)
            except ChecksumError:
                logger.info(
                    "The checksum of the previous dataset mismatches. "
                    "Delete the previously downloaded dataset."
                )
                os.remove(dataset_path)

        if not os.path.exists(dataset_path):
            logger.info(f"Downloading dataset to {extract_folder}.")
            download_file(source_uri, dataset_path)
            try:
                validate_checksum(dataset_path, expected_checksum)
            except ChecksumError as e:
                logger.info("Checksum mismatch. Delete the downloaded files.")
                os.remove(dataset_path)
                raise e

        SynDetection2D.unzip_file(
            filepath=dataset_path, destination=extract_folder
        )
def test_download_file_from_url():
    source_uri = "https://mock.uri"
    body = b"some test string here"
    responses.add(responses.GET,
                  source_uri,
                  body=body,
                  content_type="text/plain")

    with tempfile.TemporaryDirectory() as tmp_dir:
        dest_path = os.path.join(tmp_dir, "test.txt")
        download_file(source_uri, dest_path, False)

        assert os.path.exists(dest_path)
        assert open(dest_path, "rb").read() == body
Ejemplo n.º 5
0
def load_from_http(estimator, url):
    """Load estimator from checkpoint files on GCS.

    Args:
        estimator (datasetinsights.estimators.Estimator):
            datasetinsights estimator object.
        url: URL of the checkpoint file

    """
    with tempfile.TemporaryDirectory() as temp_dir:
        path = os.path.join(temp_dir, "estimator_checkpoint")
        logger.debug(f"Downloading estimator from {url} to {path}")
        download_file(source_uri=url, dest_path=path)
        logger.debug(f"Loading estimator from {path}")
        estimator.load(path)
Ejemplo n.º 6
0
    def download(self, source_uri, output, checksum_file=None, **kwargs):
        """ This method is used to download the dataset from HTTP or HTTPS url.

        Args:
            source_uri (str): This is the downloader-uri that indicates where
                              the dataset should be downloaded from.

            output (str): This is the path to the directory where the download
                          will store the dataset.

            checksum_file (str): This is path of the txt file that contains
                                 checksum of the dataset to be downloaded. It
                                 can be HTTP or HTTPS url or local path.

        Raises:
            ChecksumError: This will raise this error if checksum doesn't
                           matches

        """
        dataset_path = download_file(source_uri, output)

        if checksum_file:
            logger.debug("Reading checksum from checksum file.")
            checksum = get_checksum_from_file(checksum_file)
            try:
                logger.debug("Validating checksum!!")
                validate_checksum(dataset_path, int(checksum))
            except ChecksumError as e:
                logger.info("Checksum mismatch. Deleting the downloaded file.")
                os.remove(dataset_path)
                raise e