Ejemplo n.º 1
0
    def test_when_file_is_corrupt_but_checksum_not_provided(self):
        fake = Faker()
        fake_suffix = fake.pystr(min_chars=15, max_chars=25)
        temp_fd, temp_file_path = mkstemp(suffix=fake_suffix)
        os.close(temp_fd)
        del fake_suffix

        # Write some modified data to the file.
        with open(temp_file_path, "wt") as g:
            g.write(self.file_contents * 5)

        self.assertTrue(os.path.isfile(temp_file_path))
        download_file_if_missing(FAKE_URL, temp_file_path)
        self.assertTrue(os.path.isfile(temp_file_path))
        self.assertFalse(self.good_data(temp_file_path))
        os.remove(temp_file_path)
Ejemplo n.º 2
0
    def test_when_file_is_ok(self):
        fake = Faker()
        fake_suffix = fake.pystr(min_chars=10, max_chars=15)
        temp_fd, temp_file_path = mkstemp(suffix=fake_suffix)
        os.close(temp_fd)
        del fake_suffix

        # Write the good contents to the file.
        with open(temp_file_path, "wt") as g:
            g.write(self.file_contents)

        self.assertTrue(os.path.isfile(temp_file_path))
        download_file_if_missing(FAKE_URL, temp_file_path, self.correct_sha256)
        self.assertTrue(os.path.isfile(temp_file_path))
        self.assertTrue(self.good_data(temp_file_path))
        os.remove(temp_file_path)
Ejemplo n.º 3
0
    def test_when_file_is_corrupt(self):
        fake = Faker()
        fake_suffix = fake.pystr(min_chars=15, max_chars=25)
        temp_fd, temp_file_path = mkstemp(suffix=fake_suffix)
        os.close(temp_fd)
        del fake_suffix

        # Write some modified data to the file.
        with open(temp_file_path, "wb") as g:
            g.write(self.file_contents.encode() * 10)
        self.fake_download_response()

        self.assertTrue(os.path.isfile(temp_file_path))
        download_file_if_missing(FAKE_URL, temp_file_path, self.correct_sha256)
        self.assertTrue(os.path.isfile(temp_file_path))
        self.assertTrue(self.good_data(temp_file_path))
        os.remove(temp_file_path)
Ejemplo n.º 4
0
    def test_when_file_is_missing(self):
        # A flaky way of generating a temporary file name without the file.
        fake = Faker()
        fake_suffix = fake.pystr(min_chars=15, max_chars=25)
        temp_fd, temp_file_path = mkstemp(suffix=fake_suffix)
        del fake_suffix

        os.close(temp_fd)
        os.remove(temp_file_path)

        self.fake_download_response()

        self.assertFalse(os.path.isfile(temp_file_path))
        download_file_if_missing(FAKE_URL, temp_file_path, self.correct_sha256)
        self.assertTrue(os.path.isfile(temp_file_path))
        self.assertTrue(self.good_data(temp_file_path))
        os.remove(temp_file_path)
Ejemplo n.º 5
0
def download_dataset(dataset_id, integrity_check):
    """
    Downloads a dataset identified by it's dataset ID (Collection).

    The maybe already downloaded local copy is checked for integrity
    according to the specified integrity check. If the local version is up to
    date, then nothing is done. Otherwise, the dataset is downloaded.

    Returns a code (int): with the following semantics:
    * 1: dataset is available locally and the integrity check passed;
    * 2: the dataset has been downloaded (was not available locally).
    """
    assert (isinstance(dataset_id, Collection))
    if integrity_check(dataset_id):  # Dataset is already downloaded.
        return 1
    msg.info("Downloading {} ...".format(dataset_id.name))
    config = load_datasets_config()[dataset_id.name]
    dataset_dir = os.path.join(datamine_cache_dir(), dataset_id.name)
    if not os.path.exists(dataset_dir):
        os.makedirs(dataset_dir, mode=0o755)

    # Download all the requirements.
    for requirement in config["requirements"]:
        url = requirement["URL"]
        expected_sha256 = requirement["SHA256"]

        # Attempt to guess the filename from the URL. In the future,
        # if it is required, we may have another field in the requirements.
        filename = url_to_filename(url)
        assert (filename is not None and len(filename) > 0)
        filepath = os.path.join(dataset_dir, filename)

        download_file_if_missing(url,
                                 filepath,
                                 expected_sha256=expected_sha256,
                                 desc="Downloading {}".format(filename))
        assert (os.path.isfile(filepath))

        # Unpack the file if it is archived or compressed.
        if is_archive(filepath):
            msg.info("Unpacking {} ...".format(filename))
            extract_archive(filepath, outdir=dataset_dir)
    msg.info("{} has been downloaded.".format(dataset_id.name))
    return 2
Ejemplo n.º 6
0
    def test_description_is_provided(self):
        fake = Faker()
        fake_suffix = fake.pystr(min_chars=15, max_chars=25)
        temp_fd, temp_file_path = mkstemp(suffix=fake_suffix)
        del fake_suffix

        os.close(temp_fd)
        os.remove(temp_file_path)

        self.fake_download_response()

        message = "We are downloading some data.,!?=-"
        self.assertFalse(os.path.isfile(temp_file_path))
        with patch('sys.stdout', new_callable=StringIO) as mock_stdout:
            with patch('sys.stderr', new_callable=StringIO) as mock_stderr:
                download_file_if_missing(FAKE_URL,
                                         temp_file_path,
                                         expected_sha256=self.correct_sha256,
                                         desc=message)
        self.assertTrue(message in mock_stdout.getvalue()
                        or message in mock_stderr.getvalue())
        self.assertTrue(os.path.isfile(temp_file_path))
        self.assertTrue(self.good_data(temp_file_path))
        os.remove(temp_file_path)