def test_ida_download(testpath):
    """Add test dataset metadata and associated file metadata to Metax. Run
    partial workflow by calling CreateMets task with luigi.

    :param testpath: temporary directory fixture
    """
    # Read configuration file
    conf = Configuration(tests.conftest.TEST_CONFIG_FILE)
    # Override Ida password in configuration file with real password from
    # the user
    # pylint: disable=protected-access
    conf._parser.set(
        'siptools_research', 'ida_password',
        getpass.getpass(prompt='Ida password for user \'testuser_1\':'))

    # Download a file that is should be available
    download_path = os.path.join(testpath, 'ida_file')
    download_file(
        {
            'file_path': '/file',
            'identifier': 'pid:urn:111',
            'file_storage': {
                'identifier': 'urn:nbn:fi:att:file-storage-ida'
            }
        }, download_path, tests.conftest.TEST_CONFIG_FILE)

    # Check contents of downloaded file
    with open(download_path) as open_file:
        assert open_file.read() == 'foo\n'
def test_download_file(testpath, requests_mock, config_file, request_verified):
    """Test downloading a file to a temporary directory.

    :param testpath: Temporary directory fixture
    :param requests_mock: HTTP request mocker
    :param config_file: used configuration file
    :param request_verified: should HTTP request to Ida be verified?
    :returns: ``None``
    """
    requests_mock.get("https://ida.test/files/pid:urn:1/download",
                      content=b"foo\n")

    new_file_path = os.path.join(testpath, 'new_file')
    download_file(
        _get_file_metadata('pid:urn:1'),
        new_file_path,
        config_file
    )

    # Remove file from file_cache and test that the workspace copy stays
    # intact
    os.remove(os.path.join(testpath, "file_cache", "pid:urn:1"))

    # The file should be a text file that says: "foo\n"
    with open(new_file_path, 'r') as new_file:
        assert new_file.read() == 'foo\n'

    assert requests_mock.last_request.verify is request_verified
def test_download_file_404(testpath, requests_mock):
    """Try to download non-existing file from IDA.

    :param testpath: Temporary directory fixture
    :returns: ``None``
    """
    requests_mock.get('https://ida.test/files/pid:urn:does_not_exist/download',
                      status_code=404)

    new_file_path = os.path.join(testpath, 'new_file')
    with pytest.raises(FileNotAvailableError):
        download_file(
            _get_file_metadata('pid:urn:does_not_exist'),
            new_file_path,
            UNIT_TEST_CONFIG_FILE
        )
def test_download_file_502(testpath, requests_mock):
    """Try to download from Ida when Ida returns 502.

    :param testpath: Temporary directory fixture
    :returns: ``None``
    """
    requests_mock.get('https://ida.test/files/pid:urn:502/download',
                      status_code=502)

    new_file_path = os.path.join(testpath, 'new_file')
    with pytest.raises(FileAccessError) as exc_info:
        download_file(
            _get_file_metadata('pid:urn:502'),
            new_file_path,
            UNIT_TEST_CONFIG_FILE
        )
    assert str(exc_info.value) == ("Ida service temporarily unavailable. "
                                   "Please, try again later.")
    def run(self):
        """Read list of required files from Metax and download them.

        Files are written to path based on ``file_path`` in Metax.

        :returns: ``None``
        """
        upload_database = upload_rest_api.database.Database()

        # Find file identifiers from Metax dataset metadata.
        config_object = Configuration(self.config)
        metax_client = Metax(
            config_object.get('metax_url'),
            config_object.get('metax_user'),
            config_object.get('metax_password'),
            verify=config_object.getboolean('metax_ssl_verification'))
        dataset_files = metax_client.get_dataset_files(self.dataset_id)

        # Download files to temporary directory which will be moved to
        # output target path when all files have been downloaded
        with self.output().temporary_path() as temporary_directory:
            os.mkdir(temporary_directory)

            for dataset_file in dataset_files:
                identifier = dataset_file["identifier"]

                # Full path to file
                target_path = os.path.normpath(
                    os.path.join(temporary_directory,
                                 dataset_file["file_path"].strip('/')))
                if not target_path.startswith(temporary_directory):
                    raise InvalidFileMetadataError(
                        'The file path of file %s is invalid: %s' %
                        (identifier, dataset_file["file_path"]))

                # Create the download directory for file if it does not
                # exist already
                if not os.path.isdir(os.path.dirname(target_path)):
                    # TODO: Use exist_ok -parameter when moving to
                    # python3
                    os.makedirs(os.path.dirname(target_path))

                download_file(dataset_file, target_path, self.config,
                              upload_database)
Beispiel #6
0
def _download_files(metax_client, dataset_id, config_file, missing_files):
    """Download all dataset files.

    :param metax_client: metax access
    :param dataset_id: dataset identifier
    :param config_file: configuration file path
    :returns: A list of the metadata of all downloaded files
    """
    upload_database = upload_rest_api.database.Database()
    dataset_files = metax_client.get_dataset_files(dataset_id)
    for dataset_file in dataset_files:
        try:
            download_file(
                dataset_file,
                config_file=config_file,
                upload_database=upload_database
            )
        except FileNotAvailableError:
            missing_files.append(dataset_file['identifier'])

    return dataset_files